// Copyright 2013 Dolphin Emulator Project // Licensed under GPLv2+ // Refer to the license.txt file included. // Dolby Pro Logic 2 decoder from ffdshow-tryout // * Copyright 2001 Anders Johansson ajh@atri.curtin.edu.au // * Copyright (c) 2004-2006 Milan Cutka // * based on mplayer HRTF plugin by ylai #include #include #include #include #include #include #include "AudioCommon/DPL2Decoder.h" #include "Common/CommonTypes.h" #include "Common/MathUtil.h" #ifndef M_PI #define M_PI 3.14159265358979323846 #endif #ifndef M_SQRT1_2 #define M_SQRT1_2 0.70710678118654752440 #endif static int olddelay = -1; static unsigned int oldfreq = 0; static unsigned int dlbuflen; static int cyc_pos; static float l_fwr, r_fwr, lpr_fwr, lmr_fwr; static std::vector fwrbuf_l, fwrbuf_r; static float adapt_l_gain, adapt_r_gain, adapt_lpr_gain, adapt_lmr_gain; static std::vector lf, rf, lr, rr, cf, cr; static float LFE_buf[256]; static unsigned int lfe_pos; static float* filter_coefs_lfe; static unsigned int len125; template static _ftype_t DotProduct(int count, const T* buf, const _ftype_t* coefficients) { int i; float sum0 = 0.0f, sum1 = 0.0f, sum2 = 0.0f, sum3 = 0.0f; // Unrolled loop for (i = 0; (i + 3) < count; i += 4) { sum0 += buf[i + 0] * coefficients[i + 0]; sum1 += buf[i + 1] * coefficients[i + 1]; sum2 += buf[i + 2] * coefficients[i + 2]; sum3 += buf[i + 3] * coefficients[i + 3]; } // Epilogue of unrolled loop for (; i < count; i++) sum0 += buf[i] * coefficients[i]; return sum0 + sum1 + sum2 + sum3; } template static T FIRFilter(const T* buf, int pos, int len, int count, const float* coefficients) { int count1, count2; if (pos >= count) { pos -= count; count1 = count; count2 = 0; } else { count2 = pos; count1 = count - pos; pos = len - count1; } // high part of window const T* ptr = &buf[pos]; float r1 = DotProduct(count1, ptr, coefficients); coefficients += count1; float r2 = DotProduct(count2, buf, coefficients); return T(r1 + r2); } /* // Hamming // 2*pi*k // w(k) = 0.54 - 0.46*cos(------), where 0 <= k < N // N-1 // // n window length // w buffer for the window parameters */ static void Hamming(int n, float* w) { float k = float(2 * M_PI / ((float)(n - 1))); // 2*pi/(N-1) // Calculate window coefficients for (int i = 0; i < n; i++) *w++ = float(0.54 - 0.46 * cos(k * (float)i)); } // FIR filter design /* Design FIR filter using the Window method n filter length must be odd for HP and BS filters w buffer for the filter taps (must be n long) fc cutoff frequencies (1 for LP and HP, 2 for BP and BS) 0 < fc < 1 where 1 <=> Fs/2 flags window and filter type as defined in filter.h variables are ored together: i.e. LP|HAMMING will give a low pass filter designed using a hamming window opt beta constant used only when designing using kaiser windows returns 0 if OK, -1 if fail */ static float* DesignFIR(unsigned int n, float fc, float opt) { unsigned int o = n & 1; // Indicator for odd filter length unsigned int end = ((n + 1) >> 1) - o; // Loop end float k1 = 2 * float(M_PI); // 2*pi*fc1 float k2 = 0.5f * (float)(1 - o); // Constant used if the filter has even length float g = 0.0f; // Gain float t1; // Temporary variables float fc1; // Cutoff frequencies // Sanity check if (n == 0) return nullptr; fc = MathUtil::Clamp(fc, 0.001f, 1.0f); float* w = (float*)calloc(sizeof(float), n); // Get window coefficients Hamming(n, w); fc1 = fc; // Cutoff frequency must be < 0.5 where 0.5 <=> Fs/2 fc1 = ((fc1 <= 1.0) && (fc1 > 0.0)) ? fc1 / 2 : 0.25f; k1 *= fc1; // Low pass filter // If the filter length is odd, there is one point which is exactly // in the middle. The value at this point is 2*fCutoff*sin(x)/x, // where x is zero. To make sure nothing strange happens, we set this // value separately. if (o) { w[end] = fc1 * w[end] * 2.0f; g = w[end]; } // Create filter for (u32 i = 0; i < end; i++) { t1 = (float)(i + 1) - k2; w[end - i - 1] = w[n - end + i] = float(w[end - i - 1] * sin(k1 * t1) / (M_PI * t1)); // Sinc g += 2 * w[end - i - 1]; // Total gain in filter } // Normalize gain g = 1 / g; for (u32 i = 0; i < n; i++) w[i] *= g; return w; } static void OnSeek() { l_fwr = r_fwr = lpr_fwr = lmr_fwr = 0; std::fill(fwrbuf_l.begin(), fwrbuf_l.end(), 0.0f); std::fill(fwrbuf_r.begin(), fwrbuf_r.end(), 0.0f); adapt_l_gain = adapt_r_gain = adapt_lpr_gain = adapt_lmr_gain = 0; std::fill(lf.begin(), lf.end(), 0.0f); std::fill(rf.begin(), rf.end(), 0.0f); std::fill(lr.begin(), lr.end(), 0.0f); std::fill(rr.begin(), rr.end(), 0.0f); std::fill(cf.begin(), cf.end(), 0.0f); std::fill(cr.begin(), cr.end(), 0.0f); lfe_pos = 0; memset(LFE_buf, 0, sizeof(LFE_buf)); } static void Done() { OnSeek(); if (filter_coefs_lfe) { free(filter_coefs_lfe); } filter_coefs_lfe = nullptr; } static float* CalculateCoefficients125HzLowpass(int rate) { len125 = 256; float f = 125.0f / (rate / 2); float* coeffs = DesignFIR(len125, f, 0); static const float M3_01DB = 0.7071067812f; for (unsigned int i = 0; i < len125; i++) { coeffs[i] *= M3_01DB; } return coeffs; } static float PassiveLock(float x) { static const float MATAGCLOCK = 0.2f; /* AGC range (around 1) where the matrix behaves passively */ const float x1 = x - 1; const float ax1s = fabs(x - 1) * (1.0f / MATAGCLOCK); return x1 - x1 / (1 + ax1s * ax1s) + 1; } static void MatrixDecode(const float* in, const int k, const int il, const int ir, bool decode_rear, const int _dlbuflen, float _l_fwr, float _r_fwr, float _lpr_fwr, float _lmr_fwr, float* _adapt_l_gain, float* _adapt_r_gain, float* _adapt_lpr_gain, float* _adapt_lmr_gain, float* _lf, float* _rf, float* _lr, float* _rr, float* _cf) { static const float M9_03DB = 0.3535533906f; static const float MATAGCTRIG = 8.0f; /* (Fuzzy) AGC trigger */ static const float MATAGCDECAY = 1.0f; /* AGC baseline decay rate (1/samp.) */ static const float MATCOMPGAIN = 0.37f; /* Cross talk compensation gain, 0.50 - 0.55 is full cancellation. */ const int kr = (k + olddelay) % _dlbuflen; float l_gain = (_l_fwr + _r_fwr) / (1 + _l_fwr + _l_fwr); float r_gain = (_l_fwr + _r_fwr) / (1 + _r_fwr + _r_fwr); // The 2nd axis has strong gain fluctuations, and therefore require // limits. The factor corresponds to the 1 / amplification of (Lt // - Rt) when (Lt, Rt) is strongly correlated. (e.g. during // dialogues). It should be bigger than -12 dB to prevent // distortion. float lmr_lim_fwr = _lmr_fwr > M9_03DB * _lpr_fwr ? _lmr_fwr : M9_03DB * _lpr_fwr; float lpr_gain = (_lpr_fwr + lmr_lim_fwr) / (1 + _lpr_fwr + _lpr_fwr); float lmr_gain = (_lpr_fwr + lmr_lim_fwr) / (1 + lmr_lim_fwr + lmr_lim_fwr); float lmr_unlim_gain = (_lpr_fwr + _lmr_fwr) / (1 + _lmr_fwr + _lmr_fwr); float lpr, lmr; float l_agc, r_agc, lpr_agc, lmr_agc; float f, d_gain, c_gain, c_agc_cfk; /*** AXIS NO. 1: (Lt, Rt) -> (C, Ls, Rs) ***/ /* AGC adaption */ d_gain = (fabs(l_gain - *_adapt_l_gain) + fabs(r_gain - *_adapt_r_gain)) * 0.5f; f = d_gain * (1.0f / MATAGCTRIG); f = MATAGCDECAY - MATAGCDECAY / (1 + f * f); *_adapt_l_gain = (1 - f) * *_adapt_l_gain + f * l_gain; *_adapt_r_gain = (1 - f) * *_adapt_r_gain + f * r_gain; /* Matrix */ l_agc = in[il] * PassiveLock(*_adapt_l_gain); r_agc = in[ir] * PassiveLock(*_adapt_r_gain); _cf[k] = (l_agc + r_agc) * (float)M_SQRT1_2; if (decode_rear) { _lr[kr] = _rr[kr] = (l_agc - r_agc) * (float)M_SQRT1_2; // Stereo rear channel is steered with the same AGC steering as // the decoding matrix. Note this requires a fast updating AGC // at the order of 20 ms (which is the case here). _lr[kr] *= (_l_fwr + _l_fwr) / (1 + _l_fwr + _r_fwr); _rr[kr] *= (_r_fwr + _r_fwr) / (1 + _l_fwr + _r_fwr); } /*** AXIS NO. 2: (Lt + Rt, Lt - Rt) -> (L, R) ***/ lpr = (in[il] + in[ir]) * (float)M_SQRT1_2; lmr = (in[il] - in[ir]) * (float)M_SQRT1_2; /* AGC adaption */ d_gain = fabs(lmr_unlim_gain - *_adapt_lmr_gain); f = d_gain * (1.0f / MATAGCTRIG); f = MATAGCDECAY - MATAGCDECAY / (1 + f * f); *_adapt_lpr_gain = (1 - f) * *_adapt_lpr_gain + f * lpr_gain; *_adapt_lmr_gain = (1 - f) * *_adapt_lmr_gain + f * lmr_gain; /* Matrix */ lpr_agc = lpr * PassiveLock(*_adapt_lpr_gain); lmr_agc = lmr * PassiveLock(*_adapt_lmr_gain); _lf[k] = (lpr_agc + lmr_agc) * (float)M_SQRT1_2; _rf[k] = (lpr_agc - lmr_agc) * (float)M_SQRT1_2; /*** CENTER FRONT CANCELLATION ***/ // A heuristic approach exploits that Lt + Rt gain contains the // information about Lt, Rt correlation. This effectively reshapes // the front and rear "cones" to concentrate Lt + Rt to C and // introduce Lt - Rt in L, R. /* 0.67677 is the empirical lower bound for lpr_gain. */ c_gain = 8 * (*_adapt_lpr_gain - 0.67677f); c_gain = c_gain > 0 ? c_gain : 0; // c_gain should not be too high, not even reaching full // cancellation (~ 0.50 - 0.55 at current AGC implementation), or // the center will sound too narrow. */ c_gain = MATCOMPGAIN / (1 + c_gain * c_gain); c_agc_cfk = c_gain * _cf[k]; _lf[k] -= c_agc_cfk; _rf[k] -= c_agc_cfk; _cf[k] += c_agc_cfk + c_agc_cfk; } void DPL2Decode(float* samples, int numsamples, float* out) { static const unsigned int FWRDURATION = 240; // FWR average duration (samples) static const int cfg_delay = 0; static const unsigned int fmt_freq = 48000; static const unsigned int fmt_nchannels = 2; // input channels int cur = 0; if (olddelay != cfg_delay || oldfreq != fmt_freq) { Done(); olddelay = cfg_delay; oldfreq = fmt_freq; dlbuflen = std::max(FWRDURATION, (fmt_freq * cfg_delay / 1000)); //+(len7000-1); cyc_pos = dlbuflen - 1; fwrbuf_l.resize(dlbuflen); fwrbuf_r.resize(dlbuflen); lf.resize(dlbuflen); rf.resize(dlbuflen); lr.resize(dlbuflen); rr.resize(dlbuflen); cf.resize(dlbuflen); cr.resize(dlbuflen); filter_coefs_lfe = CalculateCoefficients125HzLowpass(fmt_freq); lfe_pos = 0; memset(LFE_buf, 0, sizeof(LFE_buf)); } float* in = samples; // Input audio data float* end = in + numsamples * fmt_nchannels; // Loop end while (in < end) { const int k = cyc_pos; const int fwr_pos = (k + FWRDURATION) % dlbuflen; /* Update the full wave rectified total amplitude */ /* Input matrix decoder */ l_fwr += fabs(in[0]) - fabs(fwrbuf_l[fwr_pos]); r_fwr += fabs(in[1]) - fabs(fwrbuf_r[fwr_pos]); lpr_fwr += fabs(in[0] + in[1]) - fabs(fwrbuf_l[fwr_pos] + fwrbuf_r[fwr_pos]); lmr_fwr += fabs(in[0] - in[1]) - fabs(fwrbuf_l[fwr_pos] - fwrbuf_r[fwr_pos]); /* Matrix encoded 2 channel sources */ fwrbuf_l[k] = in[0]; fwrbuf_r[k] = in[1]; MatrixDecode(in, k, 0, 1, true, dlbuflen, l_fwr, r_fwr, lpr_fwr, lmr_fwr, &adapt_l_gain, &adapt_r_gain, &adapt_lpr_gain, &adapt_lmr_gain, &lf[0], &rf[0], &lr[0], &rr[0], &cf[0]); out[cur + 0] = lf[k]; out[cur + 1] = rf[k]; out[cur + 2] = cf[k]; LFE_buf[lfe_pos] = (lf[k] + rf[k] + 2.0f * cf[k] + lr[k] + rr[k]) / 2.0f; out[cur + 3] = FIRFilter(LFE_buf, lfe_pos, len125, len125, filter_coefs_lfe); lfe_pos++; if (lfe_pos == len125) { lfe_pos = 0; } out[cur + 4] = lr[k]; out[cur + 5] = rr[k]; // Next sample... in += 2; cur += 6; cyc_pos--; if (cyc_pos < 0) { cyc_pos += dlbuflen; } } } void DPL2Reset() { olddelay = -1; oldfreq = 0; filter_coefs_lfe = nullptr; }