webRTC AEC 非线性滤波处理算法

先把代码贴上，有空时候回来注释
static void NonLinearProcessing(AecCore* aec, short* output, short* outputH) {
  float efw[2][PART_LEN1], dfw[2][PART_LEN1], xfw[2][PART_LEN1];
  complex_t comfortNoiseHband[PART_LEN1];
  float fft[PART_LEN2];
  float scale, dtmp;
  float nlpGainHband;
  int i, j, pos;

  // Coherence and non-linear filter
  float cohde[PART_LEN1], cohxd[PART_LEN1];
  float hNlDeAvg, hNlXdAvg;
  float hNl[PART_LEN1];
  float hNlPref[kPrefBandSize];
  float hNlFb = 0, hNlFbLow = 0;
  const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f;
  const int prefBandSize = kPrefBandSize / aec->mult;
  const int minPrefBand = 4 / aec->mult;

  // Near and error power sums
  float sdSum = 0, seSum = 0;

  // Power estimate smoothing coefficients.
  const float* ptrGCoh = aec->extended_filter_enabled
                             ? kExtendedSmoothingCoefficients[aec->mult - 1]
                             : kNormalSmoothingCoefficients[aec->mult - 1];
  const float* min_overdrive = aec->extended_filter_enabled
                                   ? kExtendedMinOverDrive
                                   : kNormalMinOverDrive;

  // Filter energy
  float wfEnMax = 0, wfEn = 0;
  const int delayEstInterval = 10 * aec->mult;

  float* xfw_ptr = NULL;

  aec->delayEstCtr++;
  if (aec->delayEstCtr == delayEstInterval) {
    aec->delayEstCtr = 0;
  }

  // initialize comfort noise for H band
  memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband));
  nlpGainHband = (float)0.0;
  dtmp = (float)0.0;

  // Measure energy in each filter partition to determine delay.
  // TODO: Spread by computing one partition per block?
  if (aec->delayEstCtr == 0) {
    wfEnMax = 0;
    aec->delayIdx = 0;
    for (i = 0; i < aec->num_partitions; i++) {
      pos = i * PART_LEN1;
      wfEn = 0;
      for (j = 0; j < PART_LEN1; j++) {
        wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
                aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
      }

      if (wfEn > wfEnMax) {
        wfEnMax = wfEn;
        aec->delayIdx = i;
      }
    }
  }

  // We should always have at least one element stored in |far_buf|.
  assert(WebRtc_available_read(aec->far_buf_windowed) > 0);
  // NLP
  WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1);

  // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of
  // |xfwBuf|.
  // Buffer far.
  memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);

  // Use delayed far.
  memcpy(xfw, aec->xfwBuf + aec->delayIdx * PART_LEN1, sizeof(xfw));

  // Windowed near fft
  for (i = 0; i < PART_LEN; i++) {
    fft[i] = aec->dBuf[i] * sqrtHanning[i];
    fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
  }
  aec_rdft_forward_128(fft);

  dfw[1][0] = 0;
  dfw[1][PART_LEN] = 0;
  dfw[0][0] = fft[0];
  dfw[0][PART_LEN] = fft[1];
  for (i = 1; i < PART_LEN; i++) {
    dfw[0][i] = fft[2 * i];
    dfw[1][i] = fft[2 * i + 1];
  }

  // Windowed error fft
  for (i = 0; i < PART_LEN; i++) {
    fft[i] = aec->eBuf[i] * sqrtHanning[i];
    fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
  }
  aec_rdft_forward_128(fft);
  efw[1][0] = 0;
  efw[1][PART_LEN] = 0;
  efw[0][0] = fft[0];
  efw[0][PART_LEN] = fft[1];
  for (i = 1; i < PART_LEN; i++) {
    efw[0][i] = fft[2 * i];
    efw[1][i] = fft[2 * i + 1];
  }

  // Smoothed PSD
  for (i = 0; i < PART_LEN1; i++) {
    aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
                 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
    aec->se[i] = ptrGCoh[0] * aec->se[i] +
                 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
    // We threshold here to protect against the ill-effects of a zero farend.
    // The threshold is not arbitrarily chosen, but balances protection and
    // adverse interaction with the algorithm's tuning.
    // TODO: investigate further why this is so sensitive.
    aec->sx[i] =
        ptrGCoh[0] * aec->sx[i] +
        ptrGCoh[1] *
            WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], 15);

    aec->sde[i][0] =
        ptrGCoh[0] * aec->sde[i][0] +
        ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
    aec->sde[i][1] =
        ptrGCoh[0] * aec->sde[i][1] +
        ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);

    aec->sxd[i][0] =
        ptrGCoh[0] * aec->sxd[i][0] +
        ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
    aec->sxd[i][1] =
        ptrGCoh[0] * aec->sxd[i][1] +
        ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);

    sdSum += aec->sd[i];
    seSum += aec->se[i];
  }

  // Divergent filter safeguard.
  if (aec->divergeState == 0) {
    if (seSum > sdSum) {
      aec->divergeState = 1;
    }
  } else {
    if (seSum * 1.05f < sdSum) {
      aec->divergeState = 0;
    }
  }

  if (aec->divergeState == 1) {
    memcpy(efw, dfw, sizeof(efw));
  }

  if (!aec->extended_filter_enabled) {
    // Reset if error is significantly larger than nearend (13 dB).
    if (seSum > (19.95f * sdSum)) {
      memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
    }
  }

  // Subband coherence
  for (i = 0; i < PART_LEN1; i++) {
    cohde[i] =
        (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
        (aec->sd[i] * aec->se[i] + 1e-10f);
    cohxd[i] =
        (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
        (aec->sx[i] * aec->sd[i] + 1e-10f);
  }

  hNlXdAvg = 0;
  for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
    hNlXdAvg += cohxd[i];
  }
  hNlXdAvg /= prefBandSize;
  hNlXdAvg = 1 - hNlXdAvg;

  hNlDeAvg = 0;
  for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
    hNlDeAvg += cohde[i];
  }
  hNlDeAvg /= prefBandSize;

  if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) {
    aec->hNlXdAvgMin = hNlXdAvg;
  }

  if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) {
    aec->stNearState = 1;
  } else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) {
    aec->stNearState = 0;
  }

  if (aec->hNlXdAvgMin == 1) {
    aec->echoState = 0;
    aec->overDrive = min_overdrive[aec->nlp_mode];

    if (aec->stNearState == 1) {
      memcpy(hNl, cohde, sizeof(hNl));
      hNlFb = hNlDeAvg;
      hNlFbLow = hNlDeAvg;
    } else {
      for (i = 0; i < PART_LEN1; i++) {
        hNl[i] = 1 - cohxd[i];
      }
      hNlFb = hNlXdAvg;
      hNlFbLow = hNlXdAvg;
    }
  } else {

    if (aec->stNearState == 1) {
      aec->echoState = 0;
      memcpy(hNl, cohde, sizeof(hNl));
      hNlFb = hNlDeAvg;
      hNlFbLow = hNlDeAvg;
    } else {
      aec->echoState = 1;
      for (i = 0; i < PART_LEN1; i++) {
        hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]);
      }

      // Select an order statistic from the preferred bands.
      // TODO: Using quicksort now, but a selection algorithm may be preferred.
      memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize);
      qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat);
      hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))];
      hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))];
    }
  }

  // Track the local filter minimum to determine suppression overdrive.
  if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) {
    aec->hNlFbLocalMin = hNlFbLow;
    aec->hNlFbMin = hNlFbLow;
    aec->hNlNewMin = 1;
    aec->hNlMinCtr = 0;
  }
  aec->hNlFbLocalMin =
      WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1);
  aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1);

  if (aec->hNlNewMin == 1) {
    aec->hNlMinCtr++;
  }
  if (aec->hNlMinCtr == 2) {
    aec->hNlNewMin = 0;
    aec->hNlMinCtr = 0;
    aec->overDrive =
        WEBRTC_SPL_MAX(kTargetSupp[aec->nlp_mode] /
                           ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f),
                       min_overdrive[aec->nlp_mode]);
  }

  // Smooth the overdrive.
  if (aec->overDrive < aec->overDriveSm) {
    aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive;
  } else {
    aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive;
  }

  WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw);

  // Add comfort noise.
  ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl);

  // TODO(bjornv): Investigate how to take the windowing below into account if
  // needed.
  if (aec->metricsMode == 1) {
    // Note that we have a scaling by two in the time domain |eBuf|.
    // In addition the time domain signal is windowed before transformation,
    // losing half the energy on the average. We take care of the first
    // scaling only in UpdateMetrics().
    UpdateLevel(&aec->nlpoutlevel, efw);
  }
  // Inverse error fft.
  fft[0] = efw[0][0];
  fft[1] = efw[0][PART_LEN];
  for (i = 1; i < PART_LEN; i++) {
    fft[2 * i] = efw[0][i];
    // Sign change required by Ooura fft.
    fft[2 * i + 1] = -efw[1][i];
  }
  aec_rdft_inverse_128(fft);

  // Overlap and add to obtain output.
  scale = 2.0f / PART_LEN2;
  for (i = 0; i < PART_LEN; i++) {
    fft[i] *= scale;  // fft scaling
    fft[i] = fft[i] * sqrtHanning[i] + aec->outBuf[i];

    // Saturation protection
    output[i] = (short)WEBRTC_SPL_SAT(
        WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN);

    fft[PART_LEN + i] *= scale;  // fft scaling
    aec->outBuf[i] = fft[PART_LEN + i] * sqrtHanning[PART_LEN - i];
  }

  // For H band
  if (aec->sampFreq == 32000) {

    // H band gain
    // average nlp over low band: average over second half of freq spectrum
    // (4->8khz)
    GetHighbandGain(hNl, &nlpGainHband);

    // Inverse comfort_noise
    if (flagHbandCn == 1) {
      fft[0] = comfortNoiseHband[0][0];
      fft[1] = comfortNoiseHband[PART_LEN][0];
      for (i = 1; i < PART_LEN; i++) {
        fft[2 * i] = comfortNoiseHband[i][0];
        fft[2 * i + 1] = comfortNoiseHband[i][1];
      }
      aec_rdft_inverse_128(fft);
      scale = 2.0f / PART_LEN2;
    }

    // compute gain factor
    for (i = 0; i < PART_LEN; i++) {
      dtmp = (float)aec->dBufH[i];
      dtmp = (float)dtmp * nlpGainHband;  // for variable gain

      // add some comfort noise where Hband is attenuated
      if (flagHbandCn == 1) {
        fft[i] *= scale;  // fft scaling
        dtmp += cnScaleHband * fft[i];
      }

      // Saturation protection
      outputH[i] = (short)WEBRTC_SPL_SAT(
          WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN);
    }
  }

  // Copy the current block to the old position.
  memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN);
  memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN);

  // Copy the current block to the old position for H band
  if (aec->sampFreq == 32000) {
    memcpy(aec->dBufH, aec->dBufH + PART_LEN, sizeof(float) * PART_LEN);
  }

  memmove(aec->xfwBuf + PART_LEN1,
          aec->xfwBuf,
          sizeof(aec->xfwBuf) - sizeof(complex_t) * PART_LEN1);
}

static void GetHighbandGain(const float* lambda, float* nlpGainHband) {
  int i;

  nlpGainHband[0] = (float)0.0;
  for (i = freqAvgIc; i < PART_LEN1 - 1; i++) {
    nlpGainHband[0] += lambda[i];
  }
  nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc);
}
webRTC AEC 非线性滤波处理算法

猜你喜欢