1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 #include <math.h> 13 #include <string.h> 14 #include <stdlib.h> 15 16 #include "webrtc/common_audio/fft4g.h" 17 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 18 #include "webrtc/modules/audio_processing/ns/noise_suppression.h" 19 #include "webrtc/modules/audio_processing/ns/ns_core.h" 20 #include "webrtc/modules/audio_processing/ns/windows_private.h" 21 22 // Set Feature Extraction Parameters. 23 static void set_feature_extraction_parameters(NoiseSuppressionC* self) { 24 // Bin size of histogram. 25 self->featureExtractionParams.binSizeLrt = 0.1f; 26 self->featureExtractionParams.binSizeSpecFlat = 0.05f; 27 self->featureExtractionParams.binSizeSpecDiff = 0.1f; 28 29 // Range of histogram over which LRT threshold is computed. 30 self->featureExtractionParams.rangeAvgHistLrt = 1.f; 31 32 // Scale parameters: multiply dominant peaks of the histograms by scale factor 33 // to obtain thresholds for prior model. 34 // For LRT and spectral difference. 35 self->featureExtractionParams.factor1ModelPars = 1.2f; 36 // For spectral_flatness: used when noise is flatter than speech. 37 self->featureExtractionParams.factor2ModelPars = 0.9f; 38 39 // Peak limit for spectral flatness (varies between 0 and 1). 40 self->featureExtractionParams.thresPosSpecFlat = 0.6f; 41 42 // Limit on spacing of two highest peaks in histogram: spacing determined by 43 // bin size. 44 self->featureExtractionParams.limitPeakSpacingSpecFlat = 45 2 * self->featureExtractionParams.binSizeSpecFlat; 46 self->featureExtractionParams.limitPeakSpacingSpecDiff = 47 2 * self->featureExtractionParams.binSizeSpecDiff; 48 49 // Limit on relevance of second peak. 50 self->featureExtractionParams.limitPeakWeightsSpecFlat = 0.5f; 51 self->featureExtractionParams.limitPeakWeightsSpecDiff = 0.5f; 52 53 // Fluctuation limit of LRT feature. 54 self->featureExtractionParams.thresFluctLrt = 0.05f; 55 56 // Limit on the max and min values for the feature thresholds. 57 self->featureExtractionParams.maxLrt = 1.f; 58 self->featureExtractionParams.minLrt = 0.2f; 59 60 self->featureExtractionParams.maxSpecFlat = 0.95f; 61 self->featureExtractionParams.minSpecFlat = 0.1f; 62 63 self->featureExtractionParams.maxSpecDiff = 1.f; 64 self->featureExtractionParams.minSpecDiff = 0.16f; 65 66 // Criteria of weight of histogram peak to accept/reject feature. 67 self->featureExtractionParams.thresWeightSpecFlat = 68 (int)(0.3 * (self->modelUpdatePars[1])); // For spectral flatness. 69 self->featureExtractionParams.thresWeightSpecDiff = 70 (int)(0.3 * (self->modelUpdatePars[1])); // For spectral difference. 71 } 72 73 // Initialize state. 74 int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs) { 75 int i; 76 // Check for valid pointer. 77 if (self == NULL) { 78 return -1; 79 } 80 81 // Initialization of struct. 82 if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) { 83 self->fs = fs; 84 } else { 85 return -1; 86 } 87 self->windShift = 0; 88 // We only support 10ms frames. 89 if (fs == 8000) { 90 self->blockLen = 80; 91 self->anaLen = 128; 92 self->window = kBlocks80w128; 93 } else { 94 self->blockLen = 160; 95 self->anaLen = 256; 96 self->window = kBlocks160w256; 97 } 98 self->magnLen = self->anaLen / 2 + 1; // Number of frequency bins. 99 100 // Initialize FFT work arrays. 101 self->ip[0] = 0; // Setting this triggers initialization. 102 memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 103 WebRtc_rdft(self->anaLen, 1, self->dataBuf, self->ip, self->wfft); 104 105 memset(self->analyzeBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 106 memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 107 memset(self->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 108 109 // For HB processing. 110 memset(self->dataBufHB, 111 0, 112 sizeof(float) * NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX); 113 114 // For quantile noise estimation. 115 memset(self->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL); 116 for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) { 117 self->lquantile[i] = 8.f; 118 self->density[i] = 0.3f; 119 } 120 121 for (i = 0; i < SIMULT; i++) { 122 self->counter[i] = 123 (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT); 124 } 125 126 self->updates = 0; 127 128 // Wiener filter initialization. 129 for (i = 0; i < HALF_ANAL_BLOCKL; i++) { 130 self->smooth[i] = 1.f; 131 } 132 133 // Set the aggressiveness: default. 134 self->aggrMode = 0; 135 136 // Initialize variables for new method. 137 self->priorSpeechProb = 0.5f; // Prior prob for speech/noise. 138 // Previous analyze mag spectrum. 139 memset(self->magnPrevAnalyze, 0, sizeof(float) * HALF_ANAL_BLOCKL); 140 // Previous process mag spectrum. 141 memset(self->magnPrevProcess, 0, sizeof(float) * HALF_ANAL_BLOCKL); 142 // Current noise-spectrum. 143 memset(self->noise, 0, sizeof(float) * HALF_ANAL_BLOCKL); 144 // Previous noise-spectrum. 145 memset(self->noisePrev, 0, sizeof(float) * HALF_ANAL_BLOCKL); 146 // Conservative noise spectrum estimate. 147 memset(self->magnAvgPause, 0, sizeof(float) * HALF_ANAL_BLOCKL); 148 // For estimation of HB in second pass. 149 memset(self->speechProb, 0, sizeof(float) * HALF_ANAL_BLOCKL); 150 // Initial average magnitude spectrum. 151 memset(self->initMagnEst, 0, sizeof(float) * HALF_ANAL_BLOCKL); 152 for (i = 0; i < HALF_ANAL_BLOCKL; i++) { 153 // Smooth LR (same as threshold). 154 self->logLrtTimeAvg[i] = LRT_FEATURE_THR; 155 } 156 157 // Feature quantities. 158 // Spectral flatness (start on threshold). 159 self->featureData[0] = SF_FEATURE_THR; 160 self->featureData[1] = 0.f; // Spectral entropy: not used in this version. 161 self->featureData[2] = 0.f; // Spectral variance: not used in this version. 162 // Average LRT factor (start on threshold). 163 self->featureData[3] = LRT_FEATURE_THR; 164 // Spectral template diff (start on threshold). 165 self->featureData[4] = SF_FEATURE_THR; 166 self->featureData[5] = 0.f; // Normalization for spectral difference. 167 // Window time-average of input magnitude spectrum. 168 self->featureData[6] = 0.f; 169 170 // Histogram quantities: used to estimate/update thresholds for features. 171 memset(self->histLrt, 0, sizeof(int) * HIST_PAR_EST); 172 memset(self->histSpecFlat, 0, sizeof(int) * HIST_PAR_EST); 173 memset(self->histSpecDiff, 0, sizeof(int) * HIST_PAR_EST); 174 175 176 self->blockInd = -1; // Frame counter. 177 // Default threshold for LRT feature. 178 self->priorModelPars[0] = LRT_FEATURE_THR; 179 // Threshold for spectral flatness: determined on-line. 180 self->priorModelPars[1] = 0.5f; 181 // sgn_map par for spectral measure: 1 for flatness measure. 182 self->priorModelPars[2] = 1.f; 183 // Threshold for template-difference feature: determined on-line. 184 self->priorModelPars[3] = 0.5f; 185 // Default weighting parameter for LRT feature. 186 self->priorModelPars[4] = 1.f; 187 // Default weighting parameter for spectral flatness feature. 188 self->priorModelPars[5] = 0.f; 189 // Default weighting parameter for spectral difference feature. 190 self->priorModelPars[6] = 0.f; 191 192 // Update flag for parameters: 193 // 0 no update, 1 = update once, 2 = update every window. 194 self->modelUpdatePars[0] = 2; 195 self->modelUpdatePars[1] = 500; // Window for update. 196 // Counter for update of conservative noise spectrum. 197 self->modelUpdatePars[2] = 0; 198 // Counter if the feature thresholds are updated during the sequence. 199 self->modelUpdatePars[3] = self->modelUpdatePars[1]; 200 201 self->signalEnergy = 0.0; 202 self->sumMagn = 0.0; 203 self->whiteNoiseLevel = 0.0; 204 self->pinkNoiseNumerator = 0.0; 205 self->pinkNoiseExp = 0.0; 206 207 set_feature_extraction_parameters(self); 208 209 // Default mode. 210 WebRtcNs_set_policy_core(self, 0); 211 212 self->initFlag = 1; 213 return 0; 214 } 215 216 // Estimate noise. 217 static void NoiseEstimation(NoiseSuppressionC* self, 218 float* magn, 219 float* noise) { 220 size_t i, s, offset; 221 float lmagn[HALF_ANAL_BLOCKL], delta; 222 223 if (self->updates < END_STARTUP_LONG) { 224 self->updates++; 225 } 226 227 for (i = 0; i < self->magnLen; i++) { 228 lmagn[i] = (float)log(magn[i]); 229 } 230 231 // Loop over simultaneous estimates. 232 for (s = 0; s < SIMULT; s++) { 233 offset = s * self->magnLen; 234 235 // newquantest(...) 236 for (i = 0; i < self->magnLen; i++) { 237 // Compute delta. 238 if (self->density[offset + i] > 1.0) { 239 delta = FACTOR * 1.f / self->density[offset + i]; 240 } else { 241 delta = FACTOR; 242 } 243 244 // Update log quantile estimate. 245 if (lmagn[i] > self->lquantile[offset + i]) { 246 self->lquantile[offset + i] += 247 QUANTILE * delta / (float)(self->counter[s] + 1); 248 } else { 249 self->lquantile[offset + i] -= 250 (1.f - QUANTILE) * delta / (float)(self->counter[s] + 1); 251 } 252 253 // Update density estimate. 254 if (fabs(lmagn[i] - self->lquantile[offset + i]) < WIDTH) { 255 self->density[offset + i] = 256 ((float)self->counter[s] * self->density[offset + i] + 257 1.f / (2.f * WIDTH)) / 258 (float)(self->counter[s] + 1); 259 } 260 } // End loop over magnitude spectrum. 261 262 if (self->counter[s] >= END_STARTUP_LONG) { 263 self->counter[s] = 0; 264 if (self->updates >= END_STARTUP_LONG) { 265 for (i = 0; i < self->magnLen; i++) { 266 self->quantile[i] = (float)exp(self->lquantile[offset + i]); 267 } 268 } 269 } 270 271 self->counter[s]++; 272 } // End loop over simultaneous estimates. 273 274 // Sequentially update the noise during startup. 275 if (self->updates < END_STARTUP_LONG) { 276 // Use the last "s" to get noise during startup that differ from zero. 277 for (i = 0; i < self->magnLen; i++) { 278 self->quantile[i] = (float)exp(self->lquantile[offset + i]); 279 } 280 } 281 282 for (i = 0; i < self->magnLen; i++) { 283 noise[i] = self->quantile[i]; 284 } 285 } 286 287 // Extract thresholds for feature parameters. 288 // Histograms are computed over some window size (given by 289 // self->modelUpdatePars[1]). 290 // Thresholds and weights are extracted every window. 291 // |flag| = 0 updates histogram only, |flag| = 1 computes the threshold/weights. 292 // Threshold and weights are returned in: self->priorModelPars. 293 static void FeatureParameterExtraction(NoiseSuppressionC* self, int flag) { 294 int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt; 295 int maxPeak1, maxPeak2; 296 int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, 297 weightPeak2SpecDiff; 298 299 float binMid, featureSum; 300 float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff; 301 float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl; 302 303 // 3 features: LRT, flatness, difference. 304 // lrt_feature = self->featureData[3]; 305 // flat_feature = self->featureData[0]; 306 // diff_feature = self->featureData[4]; 307 308 // Update histograms. 309 if (flag == 0) { 310 // LRT 311 if ((self->featureData[3] < 312 HIST_PAR_EST * self->featureExtractionParams.binSizeLrt) && 313 (self->featureData[3] >= 0.0)) { 314 i = (int)(self->featureData[3] / 315 self->featureExtractionParams.binSizeLrt); 316 self->histLrt[i]++; 317 } 318 // Spectral flatness. 319 if ((self->featureData[0] < 320 HIST_PAR_EST * self->featureExtractionParams.binSizeSpecFlat) && 321 (self->featureData[0] >= 0.0)) { 322 i = (int)(self->featureData[0] / 323 self->featureExtractionParams.binSizeSpecFlat); 324 self->histSpecFlat[i]++; 325 } 326 // Spectral difference. 327 if ((self->featureData[4] < 328 HIST_PAR_EST * self->featureExtractionParams.binSizeSpecDiff) && 329 (self->featureData[4] >= 0.0)) { 330 i = (int)(self->featureData[4] / 331 self->featureExtractionParams.binSizeSpecDiff); 332 self->histSpecDiff[i]++; 333 } 334 } 335 336 // Extract parameters for speech/noise probability. 337 if (flag == 1) { 338 // LRT feature: compute the average over 339 // self->featureExtractionParams.rangeAvgHistLrt. 340 avgHistLrt = 0.0; 341 avgHistLrtCompl = 0.0; 342 avgSquareHistLrt = 0.0; 343 numHistLrt = 0; 344 for (i = 0; i < HIST_PAR_EST; i++) { 345 binMid = ((float)i + 0.5f) * self->featureExtractionParams.binSizeLrt; 346 if (binMid <= self->featureExtractionParams.rangeAvgHistLrt) { 347 avgHistLrt += self->histLrt[i] * binMid; 348 numHistLrt += self->histLrt[i]; 349 } 350 avgSquareHistLrt += self->histLrt[i] * binMid * binMid; 351 avgHistLrtCompl += self->histLrt[i] * binMid; 352 } 353 if (numHistLrt > 0) { 354 avgHistLrt = avgHistLrt / ((float)numHistLrt); 355 } 356 avgHistLrtCompl = avgHistLrtCompl / ((float)self->modelUpdatePars[1]); 357 avgSquareHistLrt = avgSquareHistLrt / ((float)self->modelUpdatePars[1]); 358 fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl; 359 // Get threshold for LRT feature. 360 if (fluctLrt < self->featureExtractionParams.thresFluctLrt) { 361 // Very low fluctuation, so likely noise. 362 self->priorModelPars[0] = self->featureExtractionParams.maxLrt; 363 } else { 364 self->priorModelPars[0] = 365 self->featureExtractionParams.factor1ModelPars * avgHistLrt; 366 // Check if value is within min/max range. 367 if (self->priorModelPars[0] < self->featureExtractionParams.minLrt) { 368 self->priorModelPars[0] = self->featureExtractionParams.minLrt; 369 } 370 if (self->priorModelPars[0] > self->featureExtractionParams.maxLrt) { 371 self->priorModelPars[0] = self->featureExtractionParams.maxLrt; 372 } 373 } 374 // Done with LRT feature. 375 376 // For spectral flatness and spectral difference: compute the main peaks of 377 // histogram. 378 maxPeak1 = 0; 379 maxPeak2 = 0; 380 posPeak1SpecFlat = 0.0; 381 posPeak2SpecFlat = 0.0; 382 weightPeak1SpecFlat = 0; 383 weightPeak2SpecFlat = 0; 384 385 // Peaks for flatness. 386 for (i = 0; i < HIST_PAR_EST; i++) { 387 binMid = 388 (i + 0.5f) * self->featureExtractionParams.binSizeSpecFlat; 389 if (self->histSpecFlat[i] > maxPeak1) { 390 // Found new "first" peak. 391 maxPeak2 = maxPeak1; 392 weightPeak2SpecFlat = weightPeak1SpecFlat; 393 posPeak2SpecFlat = posPeak1SpecFlat; 394 395 maxPeak1 = self->histSpecFlat[i]; 396 weightPeak1SpecFlat = self->histSpecFlat[i]; 397 posPeak1SpecFlat = binMid; 398 } else if (self->histSpecFlat[i] > maxPeak2) { 399 // Found new "second" peak. 400 maxPeak2 = self->histSpecFlat[i]; 401 weightPeak2SpecFlat = self->histSpecFlat[i]; 402 posPeak2SpecFlat = binMid; 403 } 404 } 405 406 // Compute two peaks for spectral difference. 407 maxPeak1 = 0; 408 maxPeak2 = 0; 409 posPeak1SpecDiff = 0.0; 410 posPeak2SpecDiff = 0.0; 411 weightPeak1SpecDiff = 0; 412 weightPeak2SpecDiff = 0; 413 // Peaks for spectral difference. 414 for (i = 0; i < HIST_PAR_EST; i++) { 415 binMid = 416 ((float)i + 0.5f) * self->featureExtractionParams.binSizeSpecDiff; 417 if (self->histSpecDiff[i] > maxPeak1) { 418 // Found new "first" peak. 419 maxPeak2 = maxPeak1; 420 weightPeak2SpecDiff = weightPeak1SpecDiff; 421 posPeak2SpecDiff = posPeak1SpecDiff; 422 423 maxPeak1 = self->histSpecDiff[i]; 424 weightPeak1SpecDiff = self->histSpecDiff[i]; 425 posPeak1SpecDiff = binMid; 426 } else if (self->histSpecDiff[i] > maxPeak2) { 427 // Found new "second" peak. 428 maxPeak2 = self->histSpecDiff[i]; 429 weightPeak2SpecDiff = self->histSpecDiff[i]; 430 posPeak2SpecDiff = binMid; 431 } 432 } 433 434 // For spectrum flatness feature. 435 useFeatureSpecFlat = 1; 436 // Merge the two peaks if they are close. 437 if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) < 438 self->featureExtractionParams.limitPeakSpacingSpecFlat) && 439 (weightPeak2SpecFlat > 440 self->featureExtractionParams.limitPeakWeightsSpecFlat * 441 weightPeak1SpecFlat)) { 442 weightPeak1SpecFlat += weightPeak2SpecFlat; 443 posPeak1SpecFlat = 0.5f * (posPeak1SpecFlat + posPeak2SpecFlat); 444 } 445 // Reject if weight of peaks is not large enough, or peak value too small. 446 if (weightPeak1SpecFlat < 447 self->featureExtractionParams.thresWeightSpecFlat || 448 posPeak1SpecFlat < self->featureExtractionParams.thresPosSpecFlat) { 449 useFeatureSpecFlat = 0; 450 } 451 // If selected, get the threshold. 452 if (useFeatureSpecFlat == 1) { 453 // Compute the threshold. 454 self->priorModelPars[1] = 455 self->featureExtractionParams.factor2ModelPars * posPeak1SpecFlat; 456 // Check if value is within min/max range. 457 if (self->priorModelPars[1] < self->featureExtractionParams.minSpecFlat) { 458 self->priorModelPars[1] = self->featureExtractionParams.minSpecFlat; 459 } 460 if (self->priorModelPars[1] > self->featureExtractionParams.maxSpecFlat) { 461 self->priorModelPars[1] = self->featureExtractionParams.maxSpecFlat; 462 } 463 } 464 // Done with flatness feature. 465 466 // For template feature. 467 useFeatureSpecDiff = 1; 468 // Merge the two peaks if they are close. 469 if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) < 470 self->featureExtractionParams.limitPeakSpacingSpecDiff) && 471 (weightPeak2SpecDiff > 472 self->featureExtractionParams.limitPeakWeightsSpecDiff * 473 weightPeak1SpecDiff)) { 474 weightPeak1SpecDiff += weightPeak2SpecDiff; 475 posPeak1SpecDiff = 0.5f * (posPeak1SpecDiff + posPeak2SpecDiff); 476 } 477 // Get the threshold value. 478 self->priorModelPars[3] = 479 self->featureExtractionParams.factor1ModelPars * posPeak1SpecDiff; 480 // Reject if weight of peaks is not large enough. 481 if (weightPeak1SpecDiff < 482 self->featureExtractionParams.thresWeightSpecDiff) { 483 useFeatureSpecDiff = 0; 484 } 485 // Check if value is within min/max range. 486 if (self->priorModelPars[3] < self->featureExtractionParams.minSpecDiff) { 487 self->priorModelPars[3] = self->featureExtractionParams.minSpecDiff; 488 } 489 if (self->priorModelPars[3] > self->featureExtractionParams.maxSpecDiff) { 490 self->priorModelPars[3] = self->featureExtractionParams.maxSpecDiff; 491 } 492 // Done with spectral difference feature. 493 494 // Don't use template feature if fluctuation of LRT feature is very low: 495 // most likely just noise state. 496 if (fluctLrt < self->featureExtractionParams.thresFluctLrt) { 497 useFeatureSpecDiff = 0; 498 } 499 500 // Select the weights between the features. 501 // self->priorModelPars[4] is weight for LRT: always selected. 502 // self->priorModelPars[5] is weight for spectral flatness. 503 // self->priorModelPars[6] is weight for spectral difference. 504 featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff); 505 self->priorModelPars[4] = 1.f / featureSum; 506 self->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum; 507 self->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum; 508 509 // Set hists to zero for next update. 510 if (self->modelUpdatePars[0] >= 1) { 511 for (i = 0; i < HIST_PAR_EST; i++) { 512 self->histLrt[i] = 0; 513 self->histSpecFlat[i] = 0; 514 self->histSpecDiff[i] = 0; 515 } 516 } 517 } // End of flag == 1. 518 } 519 520 // Compute spectral flatness on input spectrum. 521 // |magnIn| is the magnitude spectrum. 522 // Spectral flatness is returned in self->featureData[0]. 523 static void ComputeSpectralFlatness(NoiseSuppressionC* self, 524 const float* magnIn) { 525 size_t i; 526 size_t shiftLP = 1; // Option to remove first bin(s) from spectral measures. 527 float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp; 528 529 // Compute spectral measures. 530 // For flatness. 531 avgSpectralFlatnessNum = 0.0; 532 avgSpectralFlatnessDen = self->sumMagn; 533 for (i = 0; i < shiftLP; i++) { 534 avgSpectralFlatnessDen -= magnIn[i]; 535 } 536 // Compute log of ratio of the geometric to arithmetic mean: check for log(0) 537 // case. 538 for (i = shiftLP; i < self->magnLen; i++) { 539 if (magnIn[i] > 0.0) { 540 avgSpectralFlatnessNum += (float)log(magnIn[i]); 541 } else { 542 self->featureData[0] -= SPECT_FL_TAVG * self->featureData[0]; 543 return; 544 } 545 } 546 // Normalize. 547 avgSpectralFlatnessDen = avgSpectralFlatnessDen / self->magnLen; 548 avgSpectralFlatnessNum = avgSpectralFlatnessNum / self->magnLen; 549 550 // Ratio and inverse log: check for case of log(0). 551 spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen; 552 553 // Time-avg update of spectral flatness feature. 554 self->featureData[0] += SPECT_FL_TAVG * (spectralTmp - self->featureData[0]); 555 // Done with flatness feature. 556 } 557 558 // Compute prior and post SNR based on quantile noise estimation. 559 // Compute DD estimate of prior SNR. 560 // Inputs: 561 // * |magn| is the signal magnitude spectrum estimate. 562 // * |noise| is the magnitude noise spectrum estimate. 563 // Outputs: 564 // * |snrLocPrior| is the computed prior SNR. 565 // * |snrLocPost| is the computed post SNR. 566 static void ComputeSnr(const NoiseSuppressionC* self, 567 const float* magn, 568 const float* noise, 569 float* snrLocPrior, 570 float* snrLocPost) { 571 size_t i; 572 573 for (i = 0; i < self->magnLen; i++) { 574 // Previous post SNR. 575 // Previous estimate: based on previous frame with gain filter. 576 float previousEstimateStsa = self->magnPrevAnalyze[i] / 577 (self->noisePrev[i] + 0.0001f) * self->smooth[i]; 578 // Post SNR. 579 snrLocPost[i] = 0.f; 580 if (magn[i] > noise[i]) { 581 snrLocPost[i] = magn[i] / (noise[i] + 0.0001f) - 1.f; 582 } 583 // DD estimate is sum of two terms: current estimate and previous estimate. 584 // Directed decision update of snrPrior. 585 snrLocPrior[i] = 586 DD_PR_SNR * previousEstimateStsa + (1.f - DD_PR_SNR) * snrLocPost[i]; 587 } // End of loop over frequencies. 588 } 589 590 // Compute the difference measure between input spectrum and a template/learned 591 // noise spectrum. 592 // |magnIn| is the input spectrum. 593 // The reference/template spectrum is self->magnAvgPause[i]. 594 // Returns (normalized) spectral difference in self->featureData[4]. 595 static void ComputeSpectralDifference(NoiseSuppressionC* self, 596 const float* magnIn) { 597 // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / 598 // var(magnAvgPause) 599 size_t i; 600 float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn; 601 602 avgPause = 0.0; 603 avgMagn = self->sumMagn; 604 // Compute average quantities. 605 for (i = 0; i < self->magnLen; i++) { 606 // Conservative smooth noise spectrum from pause frames. 607 avgPause += self->magnAvgPause[i]; 608 } 609 avgPause /= self->magnLen; 610 avgMagn /= self->magnLen; 611 612 covMagnPause = 0.0; 613 varPause = 0.0; 614 varMagn = 0.0; 615 // Compute variance and covariance quantities. 616 for (i = 0; i < self->magnLen; i++) { 617 covMagnPause += (magnIn[i] - avgMagn) * (self->magnAvgPause[i] - avgPause); 618 varPause += 619 (self->magnAvgPause[i] - avgPause) * (self->magnAvgPause[i] - avgPause); 620 varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn); 621 } 622 covMagnPause /= self->magnLen; 623 varPause /= self->magnLen; 624 varMagn /= self->magnLen; 625 // Update of average magnitude spectrum. 626 self->featureData[6] += self->signalEnergy; 627 628 avgDiffNormMagn = 629 varMagn - (covMagnPause * covMagnPause) / (varPause + 0.0001f); 630 // Normalize and compute time-avg update of difference feature. 631 avgDiffNormMagn = (float)(avgDiffNormMagn / (self->featureData[5] + 0.0001f)); 632 self->featureData[4] += 633 SPECT_DIFF_TAVG * (avgDiffNormMagn - self->featureData[4]); 634 } 635 636 // Compute speech/noise probability. 637 // Speech/noise probability is returned in |probSpeechFinal|. 638 // |magn| is the input magnitude spectrum. 639 // |noise| is the noise spectrum. 640 // |snrLocPrior| is the prior SNR for each frequency. 641 // |snrLocPost| is the post SNR for each frequency. 642 static void SpeechNoiseProb(NoiseSuppressionC* self, 643 float* probSpeechFinal, 644 const float* snrLocPrior, 645 const float* snrLocPost) { 646 size_t i; 647 int sgnMap; 648 float invLrt, gainPrior, indPrior; 649 float logLrtTimeAvgKsum, besselTmp; 650 float indicator0, indicator1, indicator2; 651 float tmpFloat1, tmpFloat2; 652 float weightIndPrior0, weightIndPrior1, weightIndPrior2; 653 float threshPrior0, threshPrior1, threshPrior2; 654 float widthPrior, widthPrior0, widthPrior1, widthPrior2; 655 656 widthPrior0 = WIDTH_PR_MAP; 657 // Width for pause region: lower range, so increase width in tanh map. 658 widthPrior1 = 2.f * WIDTH_PR_MAP; 659 widthPrior2 = 2.f * WIDTH_PR_MAP; // For spectral-difference measure. 660 661 // Threshold parameters for features. 662 threshPrior0 = self->priorModelPars[0]; 663 threshPrior1 = self->priorModelPars[1]; 664 threshPrior2 = self->priorModelPars[3]; 665 666 // Sign for flatness feature. 667 sgnMap = (int)(self->priorModelPars[2]); 668 669 // Weight parameters for features. 670 weightIndPrior0 = self->priorModelPars[4]; 671 weightIndPrior1 = self->priorModelPars[5]; 672 weightIndPrior2 = self->priorModelPars[6]; 673 674 // Compute feature based on average LR factor. 675 // This is the average over all frequencies of the smooth log LRT. 676 logLrtTimeAvgKsum = 0.0; 677 for (i = 0; i < self->magnLen; i++) { 678 tmpFloat1 = 1.f + 2.f * snrLocPrior[i]; 679 tmpFloat2 = 2.f * snrLocPrior[i] / (tmpFloat1 + 0.0001f); 680 besselTmp = (snrLocPost[i] + 1.f) * tmpFloat2; 681 self->logLrtTimeAvg[i] += 682 LRT_TAVG * (besselTmp - (float)log(tmpFloat1) - self->logLrtTimeAvg[i]); 683 logLrtTimeAvgKsum += self->logLrtTimeAvg[i]; 684 } 685 logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (self->magnLen); 686 self->featureData[3] = logLrtTimeAvgKsum; 687 // Done with computation of LR factor. 688 689 // Compute the indicator functions. 690 // Average LRT feature. 691 widthPrior = widthPrior0; 692 // Use larger width in tanh map for pause regions. 693 if (logLrtTimeAvgKsum < threshPrior0) { 694 widthPrior = widthPrior1; 695 } 696 // Compute indicator function: sigmoid map. 697 indicator0 = 698 0.5f * 699 ((float)tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 1.f); 700 701 // Spectral flatness feature. 702 tmpFloat1 = self->featureData[0]; 703 widthPrior = widthPrior0; 704 // Use larger width in tanh map for pause regions. 705 if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) { 706 widthPrior = widthPrior1; 707 } 708 if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) { 709 widthPrior = widthPrior1; 710 } 711 // Compute indicator function: sigmoid map. 712 indicator1 = 713 0.5f * 714 ((float)tanh((float)sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) + 715 1.f); 716 717 // For template spectrum-difference. 718 tmpFloat1 = self->featureData[4]; 719 widthPrior = widthPrior0; 720 // Use larger width in tanh map for pause regions. 721 if (tmpFloat1 < threshPrior2) { 722 widthPrior = widthPrior2; 723 } 724 // Compute indicator function: sigmoid map. 725 indicator2 = 726 0.5f * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.f); 727 728 // Combine the indicator function with the feature weights. 729 indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + 730 weightIndPrior2 * indicator2; 731 // Done with computing indicator function. 732 733 // Compute the prior probability. 734 self->priorSpeechProb += PRIOR_UPDATE * (indPrior - self->priorSpeechProb); 735 // Make sure probabilities are within range: keep floor to 0.01. 736 if (self->priorSpeechProb > 1.f) { 737 self->priorSpeechProb = 1.f; 738 } 739 if (self->priorSpeechProb < 0.01f) { 740 self->priorSpeechProb = 0.01f; 741 } 742 743 // Final speech probability: combine prior model with LR factor:. 744 gainPrior = (1.f - self->priorSpeechProb) / (self->priorSpeechProb + 0.0001f); 745 for (i = 0; i < self->magnLen; i++) { 746 invLrt = (float)exp(-self->logLrtTimeAvg[i]); 747 invLrt = (float)gainPrior * invLrt; 748 probSpeechFinal[i] = 1.f / (1.f + invLrt); 749 } 750 } 751 752 // Update the noise features. 753 // Inputs: 754 // * |magn| is the signal magnitude spectrum estimate. 755 // * |updateParsFlag| is an update flag for parameters. 756 static void FeatureUpdate(NoiseSuppressionC* self, 757 const float* magn, 758 int updateParsFlag) { 759 // Compute spectral flatness on input spectrum. 760 ComputeSpectralFlatness(self, magn); 761 // Compute difference of input spectrum with learned/estimated noise spectrum. 762 ComputeSpectralDifference(self, magn); 763 // Compute histograms for parameter decisions (thresholds and weights for 764 // features). 765 // Parameters are extracted once every window time. 766 // (=self->modelUpdatePars[1]) 767 if (updateParsFlag >= 1) { 768 // Counter update. 769 self->modelUpdatePars[3]--; 770 // Update histogram. 771 if (self->modelUpdatePars[3] > 0) { 772 FeatureParameterExtraction(self, 0); 773 } 774 // Compute model parameters. 775 if (self->modelUpdatePars[3] == 0) { 776 FeatureParameterExtraction(self, 1); 777 self->modelUpdatePars[3] = self->modelUpdatePars[1]; 778 // If wish to update only once, set flag to zero. 779 if (updateParsFlag == 1) { 780 self->modelUpdatePars[0] = 0; 781 } else { 782 // Update every window: 783 // Get normalization for spectral difference for next window estimate. 784 self->featureData[6] = 785 self->featureData[6] / ((float)self->modelUpdatePars[1]); 786 self->featureData[5] = 787 0.5f * (self->featureData[6] + self->featureData[5]); 788 self->featureData[6] = 0.f; 789 } 790 } 791 } 792 } 793 794 // Update the noise estimate. 795 // Inputs: 796 // * |magn| is the signal magnitude spectrum estimate. 797 // * |snrLocPrior| is the prior SNR. 798 // * |snrLocPost| is the post SNR. 799 // Output: 800 // * |noise| is the updated noise magnitude spectrum estimate. 801 static void UpdateNoiseEstimate(NoiseSuppressionC* self, 802 const float* magn, 803 const float* snrLocPrior, 804 const float* snrLocPost, 805 float* noise) { 806 size_t i; 807 float probSpeech, probNonSpeech; 808 // Time-avg parameter for noise update. 809 float gammaNoiseTmp = NOISE_UPDATE; 810 float gammaNoiseOld; 811 float noiseUpdateTmp; 812 813 for (i = 0; i < self->magnLen; i++) { 814 probSpeech = self->speechProb[i]; 815 probNonSpeech = 1.f - probSpeech; 816 // Temporary noise update: 817 // Use it for speech frames if update value is less than previous. 818 noiseUpdateTmp = gammaNoiseTmp * self->noisePrev[i] + 819 (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] + 820 probSpeech * self->noisePrev[i]); 821 // Time-constant based on speech/noise state. 822 gammaNoiseOld = gammaNoiseTmp; 823 gammaNoiseTmp = NOISE_UPDATE; 824 // Increase gamma (i.e., less noise update) for frame likely to be speech. 825 if (probSpeech > PROB_RANGE) { 826 gammaNoiseTmp = SPEECH_UPDATE; 827 } 828 // Conservative noise update. 829 if (probSpeech < PROB_RANGE) { 830 self->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - self->magnAvgPause[i]); 831 } 832 // Noise update. 833 if (gammaNoiseTmp == gammaNoiseOld) { 834 noise[i] = noiseUpdateTmp; 835 } else { 836 noise[i] = gammaNoiseTmp * self->noisePrev[i] + 837 (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] + 838 probSpeech * self->noisePrev[i]); 839 // Allow for noise update downwards: 840 // If noise update decreases the noise, it is safe, so allow it to 841 // happen. 842 if (noiseUpdateTmp < noise[i]) { 843 noise[i] = noiseUpdateTmp; 844 } 845 } 846 } // End of freq loop. 847 } 848 849 // Updates |buffer| with a new |frame|. 850 // Inputs: 851 // * |frame| is a new speech frame or NULL for setting to zero. 852 // * |frame_length| is the length of the new frame. 853 // * |buffer_length| is the length of the buffer. 854 // Output: 855 // * |buffer| is the updated buffer. 856 static void UpdateBuffer(const float* frame, 857 size_t frame_length, 858 size_t buffer_length, 859 float* buffer) { 860 assert(buffer_length < 2 * frame_length); 861 862 memcpy(buffer, 863 buffer + frame_length, 864 sizeof(*buffer) * (buffer_length - frame_length)); 865 if (frame) { 866 memcpy(buffer + buffer_length - frame_length, 867 frame, 868 sizeof(*buffer) * frame_length); 869 } else { 870 memset(buffer + buffer_length - frame_length, 871 0, 872 sizeof(*buffer) * frame_length); 873 } 874 } 875 876 // Transforms the signal from time to frequency domain. 877 // Inputs: 878 // * |time_data| is the signal in the time domain. 879 // * |time_data_length| is the length of the analysis buffer. 880 // * |magnitude_length| is the length of the spectrum magnitude, which equals 881 // the length of both |real| and |imag| (time_data_length / 2 + 1). 882 // Outputs: 883 // * |time_data| is the signal in the frequency domain. 884 // * |real| is the real part of the frequency domain. 885 // * |imag| is the imaginary part of the frequency domain. 886 // * |magn| is the calculated signal magnitude in the frequency domain. 887 static void FFT(NoiseSuppressionC* self, 888 float* time_data, 889 size_t time_data_length, 890 size_t magnitude_length, 891 float* real, 892 float* imag, 893 float* magn) { 894 size_t i; 895 896 assert(magnitude_length == time_data_length / 2 + 1); 897 898 WebRtc_rdft(time_data_length, 1, time_data, self->ip, self->wfft); 899 900 imag[0] = 0; 901 real[0] = time_data[0]; 902 magn[0] = fabsf(real[0]) + 1.f; 903 imag[magnitude_length - 1] = 0; 904 real[magnitude_length - 1] = time_data[1]; 905 magn[magnitude_length - 1] = fabsf(real[magnitude_length - 1]) + 1.f; 906 for (i = 1; i < magnitude_length - 1; ++i) { 907 real[i] = time_data[2 * i]; 908 imag[i] = time_data[2 * i + 1]; 909 // Magnitude spectrum. 910 magn[i] = sqrtf(real[i] * real[i] + imag[i] * imag[i]) + 1.f; 911 } 912 } 913 914 // Transforms the signal from frequency to time domain. 915 // Inputs: 916 // * |real| is the real part of the frequency domain. 917 // * |imag| is the imaginary part of the frequency domain. 918 // * |magnitude_length| is the length of the spectrum magnitude, which equals 919 // the length of both |real| and |imag|. 920 // * |time_data_length| is the length of the analysis buffer 921 // (2 * (magnitude_length - 1)). 922 // Output: 923 // * |time_data| is the signal in the time domain. 924 static void IFFT(NoiseSuppressionC* self, 925 const float* real, 926 const float* imag, 927 size_t magnitude_length, 928 size_t time_data_length, 929 float* time_data) { 930 size_t i; 931 932 assert(time_data_length == 2 * (magnitude_length - 1)); 933 934 time_data[0] = real[0]; 935 time_data[1] = real[magnitude_length - 1]; 936 for (i = 1; i < magnitude_length - 1; ++i) { 937 time_data[2 * i] = real[i]; 938 time_data[2 * i + 1] = imag[i]; 939 } 940 WebRtc_rdft(time_data_length, -1, time_data, self->ip, self->wfft); 941 942 for (i = 0; i < time_data_length; ++i) { 943 time_data[i] *= 2.f / time_data_length; // FFT scaling. 944 } 945 } 946 947 // Calculates the energy of a buffer. 948 // Inputs: 949 // * |buffer| is the buffer over which the energy is calculated. 950 // * |length| is the length of the buffer. 951 // Returns the calculated energy. 952 static float Energy(const float* buffer, size_t length) { 953 size_t i; 954 float energy = 0.f; 955 956 for (i = 0; i < length; ++i) { 957 energy += buffer[i] * buffer[i]; 958 } 959 960 return energy; 961 } 962 963 // Windows a buffer. 964 // Inputs: 965 // * |window| is the window by which to multiply. 966 // * |data| is the data without windowing. 967 // * |length| is the length of the window and data. 968 // Output: 969 // * |data_windowed| is the windowed data. 970 static void Windowing(const float* window, 971 const float* data, 972 size_t length, 973 float* data_windowed) { 974 size_t i; 975 976 for (i = 0; i < length; ++i) { 977 data_windowed[i] = window[i] * data[i]; 978 } 979 } 980 981 // Estimate prior SNR decision-directed and compute DD based Wiener Filter. 982 // Input: 983 // * |magn| is the signal magnitude spectrum estimate. 984 // Output: 985 // * |theFilter| is the frequency response of the computed Wiener filter. 986 static void ComputeDdBasedWienerFilter(const NoiseSuppressionC* self, 987 const float* magn, 988 float* theFilter) { 989 size_t i; 990 float snrPrior, previousEstimateStsa, currentEstimateStsa; 991 992 for (i = 0; i < self->magnLen; i++) { 993 // Previous estimate: based on previous frame with gain filter. 994 previousEstimateStsa = self->magnPrevProcess[i] / 995 (self->noisePrev[i] + 0.0001f) * self->smooth[i]; 996 // Post and prior SNR. 997 currentEstimateStsa = 0.f; 998 if (magn[i] > self->noise[i]) { 999 currentEstimateStsa = magn[i] / (self->noise[i] + 0.0001f) - 1.f; 1000 } 1001 // DD estimate is sum of two terms: current estimate and previous estimate. 1002 // Directed decision update of |snrPrior|. 1003 snrPrior = DD_PR_SNR * previousEstimateStsa + 1004 (1.f - DD_PR_SNR) * currentEstimateStsa; 1005 // Gain filter. 1006 theFilter[i] = snrPrior / (self->overdrive + snrPrior); 1007 } // End of loop over frequencies. 1008 } 1009 1010 // Changes the aggressiveness of the noise suppression method. 1011 // |mode| = 0 is mild (6dB), |mode| = 1 is medium (10dB) and |mode| = 2 is 1012 // aggressive (15dB). 1013 // Returns 0 on success and -1 otherwise. 1014 int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode) { 1015 // Allow for modes: 0, 1, 2, 3. 1016 if (mode < 0 || mode > 3) { 1017 return (-1); 1018 } 1019 1020 self->aggrMode = mode; 1021 if (mode == 0) { 1022 self->overdrive = 1.f; 1023 self->denoiseBound = 0.5f; 1024 self->gainmap = 0; 1025 } else if (mode == 1) { 1026 // self->overdrive = 1.25f; 1027 self->overdrive = 1.f; 1028 self->denoiseBound = 0.25f; 1029 self->gainmap = 1; 1030 } else if (mode == 2) { 1031 // self->overdrive = 1.25f; 1032 self->overdrive = 1.1f; 1033 self->denoiseBound = 0.125f; 1034 self->gainmap = 1; 1035 } else if (mode == 3) { 1036 // self->overdrive = 1.3f; 1037 self->overdrive = 1.25f; 1038 self->denoiseBound = 0.09f; 1039 self->gainmap = 1; 1040 } 1041 return 0; 1042 } 1043 1044 void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame) { 1045 size_t i; 1046 const size_t kStartBand = 5; // Skip first frequency bins during estimation. 1047 int updateParsFlag; 1048 float energy; 1049 float signalEnergy = 0.f; 1050 float sumMagn = 0.f; 1051 float tmpFloat1, tmpFloat2, tmpFloat3; 1052 float winData[ANAL_BLOCKL_MAX]; 1053 float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL]; 1054 float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL]; 1055 float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; 1056 // Variables during startup. 1057 float sum_log_i = 0.0; 1058 float sum_log_i_square = 0.0; 1059 float sum_log_magn = 0.0; 1060 float sum_log_i_log_magn = 0.0; 1061 float parametric_exp = 0.0; 1062 float parametric_num = 0.0; 1063 1064 // Check that initiation has been done. 1065 assert(self->initFlag == 1); 1066 updateParsFlag = self->modelUpdatePars[0]; 1067 1068 // Update analysis buffer for L band. 1069 UpdateBuffer(speechFrame, self->blockLen, self->anaLen, self->analyzeBuf); 1070 1071 Windowing(self->window, self->analyzeBuf, self->anaLen, winData); 1072 energy = Energy(winData, self->anaLen); 1073 if (energy == 0.0) { 1074 // We want to avoid updating statistics in this case: 1075 // Updating feature statistics when we have zeros only will cause 1076 // thresholds to move towards zero signal situations. This in turn has the 1077 // effect that once the signal is "turned on" (non-zero values) everything 1078 // will be treated as speech and there is no noise suppression effect. 1079 // Depending on the duration of the inactive signal it takes a 1080 // considerable amount of time for the system to learn what is noise and 1081 // what is speech. 1082 return; 1083 } 1084 1085 self->blockInd++; // Update the block index only when we process a block. 1086 1087 FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn); 1088 1089 for (i = 0; i < self->magnLen; i++) { 1090 signalEnergy += real[i] * real[i] + imag[i] * imag[i]; 1091 sumMagn += magn[i]; 1092 if (self->blockInd < END_STARTUP_SHORT) { 1093 if (i >= kStartBand) { 1094 tmpFloat2 = logf((float)i); 1095 sum_log_i += tmpFloat2; 1096 sum_log_i_square += tmpFloat2 * tmpFloat2; 1097 tmpFloat1 = logf(magn[i]); 1098 sum_log_magn += tmpFloat1; 1099 sum_log_i_log_magn += tmpFloat2 * tmpFloat1; 1100 } 1101 } 1102 } 1103 signalEnergy /= self->magnLen; 1104 self->signalEnergy = signalEnergy; 1105 self->sumMagn = sumMagn; 1106 1107 // Quantile noise estimate. 1108 NoiseEstimation(self, magn, noise); 1109 // Compute simplified noise model during startup. 1110 if (self->blockInd < END_STARTUP_SHORT) { 1111 // Estimate White noise. 1112 self->whiteNoiseLevel += sumMagn / self->magnLen * self->overdrive; 1113 // Estimate Pink noise parameters. 1114 tmpFloat1 = sum_log_i_square * (self->magnLen - kStartBand); 1115 tmpFloat1 -= (sum_log_i * sum_log_i); 1116 tmpFloat2 = 1117 (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn); 1118 tmpFloat3 = tmpFloat2 / tmpFloat1; 1119 // Constrain the estimated spectrum to be positive. 1120 if (tmpFloat3 < 0.f) { 1121 tmpFloat3 = 0.f; 1122 } 1123 self->pinkNoiseNumerator += tmpFloat3; 1124 tmpFloat2 = (sum_log_i * sum_log_magn); 1125 tmpFloat2 -= (self->magnLen - kStartBand) * sum_log_i_log_magn; 1126 tmpFloat3 = tmpFloat2 / tmpFloat1; 1127 // Constrain the pink noise power to be in the interval [0, 1]. 1128 if (tmpFloat3 < 0.f) { 1129 tmpFloat3 = 0.f; 1130 } 1131 if (tmpFloat3 > 1.f) { 1132 tmpFloat3 = 1.f; 1133 } 1134 self->pinkNoiseExp += tmpFloat3; 1135 1136 // Calculate frequency independent parts of parametric noise estimate. 1137 if (self->pinkNoiseExp > 0.f) { 1138 // Use pink noise estimate. 1139 parametric_num = 1140 expf(self->pinkNoiseNumerator / (float)(self->blockInd + 1)); 1141 parametric_num *= (float)(self->blockInd + 1); 1142 parametric_exp = self->pinkNoiseExp / (float)(self->blockInd + 1); 1143 } 1144 for (i = 0; i < self->magnLen; i++) { 1145 // Estimate the background noise using the white and pink noise 1146 // parameters. 1147 if (self->pinkNoiseExp == 0.f) { 1148 // Use white noise estimate. 1149 self->parametricNoise[i] = self->whiteNoiseLevel; 1150 } else { 1151 // Use pink noise estimate. 1152 float use_band = (float)(i < kStartBand ? kStartBand : i); 1153 self->parametricNoise[i] = 1154 parametric_num / powf(use_band, parametric_exp); 1155 } 1156 // Weight quantile noise with modeled noise. 1157 noise[i] *= (self->blockInd); 1158 tmpFloat2 = 1159 self->parametricNoise[i] * (END_STARTUP_SHORT - self->blockInd); 1160 noise[i] += (tmpFloat2 / (float)(self->blockInd + 1)); 1161 noise[i] /= END_STARTUP_SHORT; 1162 } 1163 } 1164 // Compute average signal during END_STARTUP_LONG time: 1165 // used to normalize spectral difference measure. 1166 if (self->blockInd < END_STARTUP_LONG) { 1167 self->featureData[5] *= self->blockInd; 1168 self->featureData[5] += signalEnergy; 1169 self->featureData[5] /= (self->blockInd + 1); 1170 } 1171 1172 // Post and prior SNR needed for SpeechNoiseProb. 1173 ComputeSnr(self, magn, noise, snrLocPrior, snrLocPost); 1174 1175 FeatureUpdate(self, magn, updateParsFlag); 1176 SpeechNoiseProb(self, self->speechProb, snrLocPrior, snrLocPost); 1177 UpdateNoiseEstimate(self, magn, snrLocPrior, snrLocPost, noise); 1178 1179 // Keep track of noise spectrum for next frame. 1180 memcpy(self->noise, noise, sizeof(*noise) * self->magnLen); 1181 memcpy(self->magnPrevAnalyze, magn, sizeof(*magn) * self->magnLen); 1182 } 1183 1184 void WebRtcNs_ProcessCore(NoiseSuppressionC* self, 1185 const float* const* speechFrame, 1186 size_t num_bands, 1187 float* const* outFrame) { 1188 // Main routine for noise reduction. 1189 int flagHB = 0; 1190 size_t i, j; 1191 1192 float energy1, energy2, gain, factor, factor1, factor2; 1193 float fout[BLOCKL_MAX]; 1194 float winData[ANAL_BLOCKL_MAX]; 1195 float magn[HALF_ANAL_BLOCKL]; 1196 float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL]; 1197 float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; 1198 1199 // SWB variables. 1200 int deltaBweHB = 1; 1201 int deltaGainHB = 1; 1202 float decayBweHB = 1.0; 1203 float gainMapParHB = 1.0; 1204 float gainTimeDomainHB = 1.0; 1205 float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB; 1206 float sumMagnAnalyze, sumMagnProcess; 1207 1208 // Check that initiation has been done. 1209 assert(self->initFlag == 1); 1210 assert((num_bands - 1) <= NUM_HIGH_BANDS_MAX); 1211 1212 const float* const* speechFrameHB = NULL; 1213 float* const* outFrameHB = NULL; 1214 size_t num_high_bands = 0; 1215 if (num_bands > 1) { 1216 speechFrameHB = &speechFrame[1]; 1217 outFrameHB = &outFrame[1]; 1218 num_high_bands = num_bands - 1; 1219 flagHB = 1; 1220 // Range for averaging low band quantities for H band gain. 1221 deltaBweHB = (int)self->magnLen / 4; 1222 deltaGainHB = deltaBweHB; 1223 } 1224 1225 // Update analysis buffer for L band. 1226 UpdateBuffer(speechFrame[0], self->blockLen, self->anaLen, self->dataBuf); 1227 1228 if (flagHB == 1) { 1229 // Update analysis buffer for H bands. 1230 for (i = 0; i < num_high_bands; ++i) { 1231 UpdateBuffer(speechFrameHB[i], 1232 self->blockLen, 1233 self->anaLen, 1234 self->dataBufHB[i]); 1235 } 1236 } 1237 1238 Windowing(self->window, self->dataBuf, self->anaLen, winData); 1239 energy1 = Energy(winData, self->anaLen); 1240 if (energy1 == 0.0) { 1241 // Synthesize the special case of zero input. 1242 // Read out fully processed segment. 1243 for (i = self->windShift; i < self->blockLen + self->windShift; i++) { 1244 fout[i - self->windShift] = self->syntBuf[i]; 1245 } 1246 // Update synthesis buffer. 1247 UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf); 1248 1249 for (i = 0; i < self->blockLen; ++i) 1250 outFrame[0][i] = 1251 WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); 1252 1253 // For time-domain gain of HB. 1254 if (flagHB == 1) { 1255 for (i = 0; i < num_high_bands; ++i) { 1256 for (j = 0; j < self->blockLen; ++j) { 1257 outFrameHB[i][j] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, 1258 self->dataBufHB[i][j], 1259 WEBRTC_SPL_WORD16_MIN); 1260 } 1261 } 1262 } 1263 1264 return; 1265 } 1266 1267 FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn); 1268 1269 if (self->blockInd < END_STARTUP_SHORT) { 1270 for (i = 0; i < self->magnLen; i++) { 1271 self->initMagnEst[i] += magn[i]; 1272 } 1273 } 1274 1275 ComputeDdBasedWienerFilter(self, magn, theFilter); 1276 1277 for (i = 0; i < self->magnLen; i++) { 1278 // Flooring bottom. 1279 if (theFilter[i] < self->denoiseBound) { 1280 theFilter[i] = self->denoiseBound; 1281 } 1282 // Flooring top. 1283 if (theFilter[i] > 1.f) { 1284 theFilter[i] = 1.f; 1285 } 1286 if (self->blockInd < END_STARTUP_SHORT) { 1287 theFilterTmp[i] = 1288 (self->initMagnEst[i] - self->overdrive * self->parametricNoise[i]); 1289 theFilterTmp[i] /= (self->initMagnEst[i] + 0.0001f); 1290 // Flooring bottom. 1291 if (theFilterTmp[i] < self->denoiseBound) { 1292 theFilterTmp[i] = self->denoiseBound; 1293 } 1294 // Flooring top. 1295 if (theFilterTmp[i] > 1.f) { 1296 theFilterTmp[i] = 1.f; 1297 } 1298 // Weight the two suppression filters. 1299 theFilter[i] *= (self->blockInd); 1300 theFilterTmp[i] *= (END_STARTUP_SHORT - self->blockInd); 1301 theFilter[i] += theFilterTmp[i]; 1302 theFilter[i] /= (END_STARTUP_SHORT); 1303 } 1304 1305 self->smooth[i] = theFilter[i]; 1306 real[i] *= self->smooth[i]; 1307 imag[i] *= self->smooth[i]; 1308 } 1309 // Keep track of |magn| spectrum for next frame. 1310 memcpy(self->magnPrevProcess, magn, sizeof(*magn) * self->magnLen); 1311 memcpy(self->noisePrev, self->noise, sizeof(self->noise[0]) * self->magnLen); 1312 // Back to time domain. 1313 IFFT(self, real, imag, self->magnLen, self->anaLen, winData); 1314 1315 // Scale factor: only do it after END_STARTUP_LONG time. 1316 factor = 1.f; 1317 if (self->gainmap == 1 && self->blockInd > END_STARTUP_LONG) { 1318 factor1 = 1.f; 1319 factor2 = 1.f; 1320 1321 energy2 = Energy(winData, self->anaLen); 1322 gain = (float)sqrt(energy2 / (energy1 + 1.f)); 1323 1324 // Scaling for new version. 1325 if (gain > B_LIM) { 1326 factor1 = 1.f + 1.3f * (gain - B_LIM); 1327 if (gain * factor1 > 1.f) { 1328 factor1 = 1.f / gain; 1329 } 1330 } 1331 if (gain < B_LIM) { 1332 // Don't reduce scale too much for pause regions: 1333 // attenuation here should be controlled by flooring. 1334 if (gain <= self->denoiseBound) { 1335 gain = self->denoiseBound; 1336 } 1337 factor2 = 1.f - 0.3f * (B_LIM - gain); 1338 } 1339 // Combine both scales with speech/noise prob: 1340 // note prior (priorSpeechProb) is not frequency dependent. 1341 factor = self->priorSpeechProb * factor1 + 1342 (1.f - self->priorSpeechProb) * factor2; 1343 } // Out of self->gainmap == 1. 1344 1345 Windowing(self->window, winData, self->anaLen, winData); 1346 1347 // Synthesis. 1348 for (i = 0; i < self->anaLen; i++) { 1349 self->syntBuf[i] += factor * winData[i]; 1350 } 1351 // Read out fully processed segment. 1352 for (i = self->windShift; i < self->blockLen + self->windShift; i++) { 1353 fout[i - self->windShift] = self->syntBuf[i]; 1354 } 1355 // Update synthesis buffer. 1356 UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf); 1357 1358 for (i = 0; i < self->blockLen; ++i) 1359 outFrame[0][i] = 1360 WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); 1361 1362 // For time-domain gain of HB. 1363 if (flagHB == 1) { 1364 // Average speech prob from low band. 1365 // Average over second half (i.e., 4->8kHz) of frequencies spectrum. 1366 avgProbSpeechHB = 0.0; 1367 for (i = self->magnLen - deltaBweHB - 1; i < self->magnLen - 1; i++) { 1368 avgProbSpeechHB += self->speechProb[i]; 1369 } 1370 avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB); 1371 // If the speech was suppressed by a component between Analyze and 1372 // Process, for example the AEC, then it should not be considered speech 1373 // for high band suppression purposes. 1374 sumMagnAnalyze = 0; 1375 sumMagnProcess = 0; 1376 for (i = 0; i < self->magnLen; ++i) { 1377 sumMagnAnalyze += self->magnPrevAnalyze[i]; 1378 sumMagnProcess += self->magnPrevProcess[i]; 1379 } 1380 avgProbSpeechHB *= sumMagnProcess / sumMagnAnalyze; 1381 // Average filter gain from low band. 1382 // Average over second half (i.e., 4->8kHz) of frequencies spectrum. 1383 avgFilterGainHB = 0.0; 1384 for (i = self->magnLen - deltaGainHB - 1; i < self->magnLen - 1; i++) { 1385 avgFilterGainHB += self->smooth[i]; 1386 } 1387 avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB)); 1388 avgProbSpeechHBTmp = 2.f * avgProbSpeechHB - 1.f; 1389 // Gain based on speech probability. 1390 gainModHB = 0.5f * (1.f + (float)tanh(gainMapParHB * avgProbSpeechHBTmp)); 1391 // Combine gain with low band gain. 1392 gainTimeDomainHB = 0.5f * gainModHB + 0.5f * avgFilterGainHB; 1393 if (avgProbSpeechHB >= 0.5f) { 1394 gainTimeDomainHB = 0.25f * gainModHB + 0.75f * avgFilterGainHB; 1395 } 1396 gainTimeDomainHB = gainTimeDomainHB * decayBweHB; 1397 // Make sure gain is within flooring range. 1398 // Flooring bottom. 1399 if (gainTimeDomainHB < self->denoiseBound) { 1400 gainTimeDomainHB = self->denoiseBound; 1401 } 1402 // Flooring top. 1403 if (gainTimeDomainHB > 1.f) { 1404 gainTimeDomainHB = 1.f; 1405 } 1406 // Apply gain. 1407 for (i = 0; i < num_high_bands; ++i) { 1408 for (j = 0; j < self->blockLen; j++) { 1409 outFrameHB[i][j] = 1410 WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, 1411 gainTimeDomainHB * self->dataBufHB[i][j], 1412 WEBRTC_SPL_WORD16_MIN); 1413 } 1414 } 1415 } // End of H band gain computation. 1416 } 1417