1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <string.h> 12 #include <math.h> 13 //#include <stdio.h> 14 #include <stdlib.h> 15 #include "noise_suppression.h" 16 #include "ns_core.h" 17 #include "windows_private.h" 18 #include "fft4g.h" 19 #include "signal_processing_library.h" 20 21 // Set Feature Extraction Parameters 22 void WebRtcNs_set_feature_extraction_parameters(NSinst_t* inst) { 23 //bin size of histogram 24 inst->featureExtractionParams.binSizeLrt = (float)0.1; 25 inst->featureExtractionParams.binSizeSpecFlat = (float)0.05; 26 inst->featureExtractionParams.binSizeSpecDiff = (float)0.1; 27 28 //range of histogram over which lrt threshold is computed 29 inst->featureExtractionParams.rangeAvgHistLrt = (float)1.0; 30 31 //scale parameters: multiply dominant peaks of the histograms by scale factor to obtain 32 // thresholds for prior model 33 inst->featureExtractionParams.factor1ModelPars = (float)1.20; //for lrt and spectral diff 34 inst->featureExtractionParams.factor2ModelPars = (float)0.9; //for spectral_flatness: 35 // used when noise is flatter than speech 36 37 //peak limit for spectral flatness (varies between 0 and 1) 38 inst->featureExtractionParams.thresPosSpecFlat = (float)0.6; 39 40 //limit on spacing of two highest peaks in histogram: spacing determined by bin size 41 inst->featureExtractionParams.limitPeakSpacingSpecFlat = 42 2 * inst->featureExtractionParams.binSizeSpecFlat; 43 inst->featureExtractionParams.limitPeakSpacingSpecDiff = 44 2 * inst->featureExtractionParams.binSizeSpecDiff; 45 46 //limit on relevance of second peak: 47 inst->featureExtractionParams.limitPeakWeightsSpecFlat = (float)0.5; 48 inst->featureExtractionParams.limitPeakWeightsSpecDiff = (float)0.5; 49 50 // fluctuation limit of lrt feature 51 inst->featureExtractionParams.thresFluctLrt = (float)0.05; 52 53 //limit on the max and min values for the feature thresholds 54 inst->featureExtractionParams.maxLrt = (float)1.0; 55 inst->featureExtractionParams.minLrt = (float)0.20; 56 57 inst->featureExtractionParams.maxSpecFlat = (float)0.95; 58 inst->featureExtractionParams.minSpecFlat = (float)0.10; 59 60 inst->featureExtractionParams.maxSpecDiff = (float)1.0; 61 inst->featureExtractionParams.minSpecDiff = (float)0.16; 62 63 //criteria of weight of histogram peak to accept/reject feature 64 inst->featureExtractionParams.thresWeightSpecFlat = (int)(0.3 65 * (inst->modelUpdatePars[1])); //for spectral flatness 66 inst->featureExtractionParams.thresWeightSpecDiff = (int)(0.3 67 * (inst->modelUpdatePars[1])); //for spectral difference 68 } 69 70 // Initialize state 71 int WebRtcNs_InitCore(NSinst_t* inst, WebRtc_UWord32 fs) { 72 int i; 73 //We only support 10ms frames 74 75 //check for valid pointer 76 if (inst == NULL) { 77 return -1; 78 } 79 80 // Initialization of struct 81 if (fs == 8000 || fs == 16000 || fs == 32000) { 82 inst->fs = fs; 83 } else { 84 return -1; 85 } 86 inst->windShift = 0; 87 if (fs == 8000) { 88 // We only support 10ms frames 89 inst->blockLen = 80; 90 inst->blockLen10ms = 80; 91 inst->anaLen = 128; 92 inst->window = kBlocks80w128; 93 inst->outLen = 0; 94 } else if (fs == 16000) { 95 // We only support 10ms frames 96 inst->blockLen = 160; 97 inst->blockLen10ms = 160; 98 inst->anaLen = 256; 99 inst->window = kBlocks160w256; 100 inst->outLen = 0; 101 } else if (fs == 32000) { 102 // We only support 10ms frames 103 inst->blockLen = 160; 104 inst->blockLen10ms = 160; 105 inst->anaLen = 256; 106 inst->window = kBlocks160w256; 107 inst->outLen = 0; 108 } 109 inst->magnLen = inst->anaLen / 2 + 1; // Number of frequency bins 110 111 // Initialize fft work arrays. 112 inst->ip[0] = 0; // Setting this triggers initialization. 113 memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 114 WebRtc_rdft(inst->anaLen, 1, inst->dataBuf, inst->ip, inst->wfft); 115 116 memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 117 memset(inst->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 118 119 //for HB processing 120 memset(inst->dataBufHB, 0, sizeof(float) * ANAL_BLOCKL_MAX); 121 122 //for quantile noise estimation 123 memset(inst->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL); 124 for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) { 125 inst->lquantile[i] = (float)8.0; 126 inst->density[i] = (float)0.3; 127 } 128 129 for (i = 0; i < SIMULT; i++) { 130 inst->counter[i] = (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT); 131 } 132 133 inst->updates = 0; 134 135 // Wiener filter initialization 136 for (i = 0; i < HALF_ANAL_BLOCKL; i++) { 137 inst->smooth[i] = (float)1.0; 138 } 139 140 // Set the aggressiveness: default 141 inst->aggrMode = 0; 142 143 //initialize variables for new method 144 inst->priorSpeechProb = (float)0.5; //prior prob for speech/noise 145 for (i = 0; i < HALF_ANAL_BLOCKL; i++) { 146 inst->magnPrev[i] = (float)0.0; //previous mag spectrum 147 inst->noisePrev[i] = (float)0.0; //previous noise-spectrum 148 inst->logLrtTimeAvg[i] = LRT_FEATURE_THR; //smooth LR ratio (same as threshold) 149 inst->magnAvgPause[i] = (float)0.0; //conservative noise spectrum estimate 150 inst->speechProbHB[i] = (float)0.0; //for estimation of HB in second pass 151 inst->initMagnEst[i] = (float)0.0; //initial average mag spectrum 152 } 153 154 //feature quantities 155 inst->featureData[0] = SF_FEATURE_THR; //spectral flatness (start on threshold) 156 inst->featureData[1] = (float)0.0; //spectral entropy: not used in this version 157 inst->featureData[2] = (float)0.0; //spectral variance: not used in this version 158 inst->featureData[3] = LRT_FEATURE_THR; //average lrt factor (start on threshold) 159 inst->featureData[4] = SF_FEATURE_THR; //spectral template diff (start on threshold) 160 inst->featureData[5] = (float)0.0; //normalization for spectral-diff 161 inst->featureData[6] = (float)0.0; //window time-average of input magnitude spectrum 162 163 //histogram quantities: used to estimate/update thresholds for features 164 for (i = 0; i < HIST_PAR_EST; i++) { 165 inst->histLrt[i] = 0; 166 inst->histSpecFlat[i] = 0; 167 inst->histSpecDiff[i] = 0; 168 } 169 170 inst->blockInd = -1; //frame counter 171 inst->priorModelPars[0] = LRT_FEATURE_THR; //default threshold for lrt feature 172 inst->priorModelPars[1] = (float)0.5; //threshold for spectral flatness: 173 // determined on-line 174 inst->priorModelPars[2] = (float)1.0; //sgn_map par for spectral measure: 175 // 1 for flatness measure 176 inst->priorModelPars[3] = (float)0.5; //threshold for template-difference feature: 177 // determined on-line 178 inst->priorModelPars[4] = (float)1.0; //default weighting parameter for lrt feature 179 inst->priorModelPars[5] = (float)0.0; //default weighting parameter for 180 // spectral flatness feature 181 inst->priorModelPars[6] = (float)0.0; //default weighting parameter for 182 // spectral difference feature 183 184 inst->modelUpdatePars[0] = 2; //update flag for parameters: 185 // 0 no update, 1=update once, 2=update every window 186 inst->modelUpdatePars[1] = 500; //window for update 187 inst->modelUpdatePars[2] = 0; //counter for update of conservative noise spectrum 188 //counter if the feature thresholds are updated during the sequence 189 inst->modelUpdatePars[3] = inst->modelUpdatePars[1]; 190 191 inst->signalEnergy = 0.0; 192 inst->sumMagn = 0.0; 193 inst->whiteNoiseLevel = 0.0; 194 inst->pinkNoiseNumerator = 0.0; 195 inst->pinkNoiseExp = 0.0; 196 197 WebRtcNs_set_feature_extraction_parameters(inst); // Set feature configuration 198 199 //default mode 200 WebRtcNs_set_policy_core(inst, 0); 201 202 203 memset(inst->outBuf, 0, sizeof(float) * 3 * BLOCKL_MAX); 204 205 inst->initFlag = 1; 206 return 0; 207 } 208 209 int WebRtcNs_set_policy_core(NSinst_t* inst, int mode) { 210 // allow for modes:0,1,2,3 211 if (mode < 0 || mode > 3) { 212 return (-1); 213 } 214 215 inst->aggrMode = mode; 216 if (mode == 0) { 217 inst->overdrive = (float)1.0; 218 inst->denoiseBound = (float)0.5; 219 inst->gainmap = 0; 220 } else if (mode == 1) { 221 //inst->overdrive = (float)1.25; 222 inst->overdrive = (float)1.0; 223 inst->denoiseBound = (float)0.25; 224 inst->gainmap = 1; 225 } else if (mode == 2) { 226 //inst->overdrive = (float)1.25; 227 inst->overdrive = (float)1.1; 228 inst->denoiseBound = (float)0.125; 229 inst->gainmap = 1; 230 } else if (mode == 3) { 231 //inst->overdrive = (float)1.30; 232 inst->overdrive = (float)1.25; 233 inst->denoiseBound = (float)0.09; 234 inst->gainmap = 1; 235 } 236 return 0; 237 } 238 239 // Estimate noise 240 void WebRtcNs_NoiseEstimation(NSinst_t* inst, float* magn, float* noise) { 241 int i, s, offset; 242 float lmagn[HALF_ANAL_BLOCKL], delta; 243 244 if (inst->updates < END_STARTUP_LONG) { 245 inst->updates++; 246 } 247 248 for (i = 0; i < inst->magnLen; i++) { 249 lmagn[i] = (float)log(magn[i]); 250 } 251 252 // loop over simultaneous estimates 253 for (s = 0; s < SIMULT; s++) { 254 offset = s * inst->magnLen; 255 256 // newquantest(...) 257 for (i = 0; i < inst->magnLen; i++) { 258 // compute delta 259 if (inst->density[offset + i] > 1.0) { 260 delta = FACTOR * (float)1.0 / inst->density[offset + i]; 261 } else { 262 delta = FACTOR; 263 } 264 265 // update log quantile estimate 266 if (lmagn[i] > inst->lquantile[offset + i]) { 267 inst->lquantile[offset + i] += QUANTILE * delta 268 / (float)(inst->counter[s] + 1); 269 } else { 270 inst->lquantile[offset + i] -= ((float)1.0 - QUANTILE) * delta 271 / (float)(inst->counter[s] + 1); 272 } 273 274 // update density estimate 275 if (fabs(lmagn[i] - inst->lquantile[offset + i]) < WIDTH) { 276 inst->density[offset + i] = ((float)inst->counter[s] * inst->density[offset 277 + i] + (float)1.0 / ((float)2.0 * WIDTH)) / (float)(inst->counter[s] + 1); 278 } 279 } // end loop over magnitude spectrum 280 281 if (inst->counter[s] >= END_STARTUP_LONG) { 282 inst->counter[s] = 0; 283 if (inst->updates >= END_STARTUP_LONG) { 284 for (i = 0; i < inst->magnLen; i++) { 285 inst->quantile[i] = (float)exp(inst->lquantile[offset + i]); 286 } 287 } 288 } 289 290 inst->counter[s]++; 291 } // end loop over simultaneous estimates 292 293 // Sequentially update the noise during startup 294 if (inst->updates < END_STARTUP_LONG) { 295 // Use the last "s" to get noise during startup that differ from zero. 296 for (i = 0; i < inst->magnLen; i++) { 297 inst->quantile[i] = (float)exp(inst->lquantile[offset + i]); 298 } 299 } 300 301 for (i = 0; i < inst->magnLen; i++) { 302 noise[i] = inst->quantile[i]; 303 } 304 } 305 306 // Extract thresholds for feature parameters 307 // histograms are computed over some window_size (given by inst->modelUpdatePars[1]) 308 // thresholds and weights are extracted every window 309 // flag 0 means update histogram only, flag 1 means compute the thresholds/weights 310 // threshold and weights are returned in: inst->priorModelPars 311 void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) { 312 int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt; 313 int maxPeak1, maxPeak2; 314 int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, weightPeak2SpecDiff; 315 316 float binMid, featureSum; 317 float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff; 318 float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl; 319 320 //3 features: lrt, flatness, difference 321 //lrt_feature = inst->featureData[3]; 322 //flat_feature = inst->featureData[0]; 323 //diff_feature = inst->featureData[4]; 324 325 //update histograms 326 if (flag == 0) { 327 // LRT 328 if ((inst->featureData[3] < HIST_PAR_EST * inst->featureExtractionParams.binSizeLrt) 329 && (inst->featureData[3] >= 0.0)) { 330 i = (int)(inst->featureData[3] / inst->featureExtractionParams.binSizeLrt); 331 inst->histLrt[i]++; 332 } 333 // Spectral flatness 334 if ((inst->featureData[0] < HIST_PAR_EST 335 * inst->featureExtractionParams.binSizeSpecFlat) 336 && (inst->featureData[0] >= 0.0)) { 337 i = (int)(inst->featureData[0] / inst->featureExtractionParams.binSizeSpecFlat); 338 inst->histSpecFlat[i]++; 339 } 340 // Spectral difference 341 if ((inst->featureData[4] < HIST_PAR_EST 342 * inst->featureExtractionParams.binSizeSpecDiff) 343 && (inst->featureData[4] >= 0.0)) { 344 i = (int)(inst->featureData[4] / inst->featureExtractionParams.binSizeSpecDiff); 345 inst->histSpecDiff[i]++; 346 } 347 } 348 349 // extract parameters for speech/noise probability 350 if (flag == 1) { 351 //lrt feature: compute the average over inst->featureExtractionParams.rangeAvgHistLrt 352 avgHistLrt = 0.0; 353 avgHistLrtCompl = 0.0; 354 avgSquareHistLrt = 0.0; 355 numHistLrt = 0; 356 for (i = 0; i < HIST_PAR_EST; i++) { 357 binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeLrt; 358 if (binMid <= inst->featureExtractionParams.rangeAvgHistLrt) { 359 avgHistLrt += inst->histLrt[i] * binMid; 360 numHistLrt += inst->histLrt[i]; 361 } 362 avgSquareHistLrt += inst->histLrt[i] * binMid * binMid; 363 avgHistLrtCompl += inst->histLrt[i] * binMid; 364 } 365 if (numHistLrt > 0) { 366 avgHistLrt = avgHistLrt / ((float)numHistLrt); 367 } 368 avgHistLrtCompl = avgHistLrtCompl / ((float)inst->modelUpdatePars[1]); 369 avgSquareHistLrt = avgSquareHistLrt / ((float)inst->modelUpdatePars[1]); 370 fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl; 371 // get threshold for lrt feature: 372 if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) { 373 //very low fluct, so likely noise 374 inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt; 375 } else { 376 inst->priorModelPars[0] = inst->featureExtractionParams.factor1ModelPars 377 * avgHistLrt; 378 // check if value is within min/max range 379 if (inst->priorModelPars[0] < inst->featureExtractionParams.minLrt) { 380 inst->priorModelPars[0] = inst->featureExtractionParams.minLrt; 381 } 382 if (inst->priorModelPars[0] > inst->featureExtractionParams.maxLrt) { 383 inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt; 384 } 385 } 386 // done with lrt feature 387 388 // 389 // for spectral flatness and spectral difference: compute the main peaks of histogram 390 maxPeak1 = 0; 391 maxPeak2 = 0; 392 posPeak1SpecFlat = 0.0; 393 posPeak2SpecFlat = 0.0; 394 weightPeak1SpecFlat = 0; 395 weightPeak2SpecFlat = 0; 396 397 // peaks for flatness 398 for (i = 0; i < HIST_PAR_EST; i++) { 399 binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecFlat; 400 if (inst->histSpecFlat[i] > maxPeak1) { 401 // Found new "first" peak 402 maxPeak2 = maxPeak1; 403 weightPeak2SpecFlat = weightPeak1SpecFlat; 404 posPeak2SpecFlat = posPeak1SpecFlat; 405 406 maxPeak1 = inst->histSpecFlat[i]; 407 weightPeak1SpecFlat = inst->histSpecFlat[i]; 408 posPeak1SpecFlat = binMid; 409 } else if (inst->histSpecFlat[i] > maxPeak2) { 410 // Found new "second" peak 411 maxPeak2 = inst->histSpecFlat[i]; 412 weightPeak2SpecFlat = inst->histSpecFlat[i]; 413 posPeak2SpecFlat = binMid; 414 } 415 } 416 417 //compute two peaks for spectral difference 418 maxPeak1 = 0; 419 maxPeak2 = 0; 420 posPeak1SpecDiff = 0.0; 421 posPeak2SpecDiff = 0.0; 422 weightPeak1SpecDiff = 0; 423 weightPeak2SpecDiff = 0; 424 // peaks for spectral difference 425 for (i = 0; i < HIST_PAR_EST; i++) { 426 binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecDiff; 427 if (inst->histSpecDiff[i] > maxPeak1) { 428 // Found new "first" peak 429 maxPeak2 = maxPeak1; 430 weightPeak2SpecDiff = weightPeak1SpecDiff; 431 posPeak2SpecDiff = posPeak1SpecDiff; 432 433 maxPeak1 = inst->histSpecDiff[i]; 434 weightPeak1SpecDiff = inst->histSpecDiff[i]; 435 posPeak1SpecDiff = binMid; 436 } else if (inst->histSpecDiff[i] > maxPeak2) { 437 // Found new "second" peak 438 maxPeak2 = inst->histSpecDiff[i]; 439 weightPeak2SpecDiff = inst->histSpecDiff[i]; 440 posPeak2SpecDiff = binMid; 441 } 442 } 443 444 // for spectrum flatness feature 445 useFeatureSpecFlat = 1; 446 // merge the two peaks if they are close 447 if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) 448 < inst->featureExtractionParams.limitPeakSpacingSpecFlat) 449 && (weightPeak2SpecFlat 450 > inst->featureExtractionParams.limitPeakWeightsSpecFlat 451 * weightPeak1SpecFlat)) { 452 weightPeak1SpecFlat += weightPeak2SpecFlat; 453 posPeak1SpecFlat = (float)0.5 * (posPeak1SpecFlat + posPeak2SpecFlat); 454 } 455 //reject if weight of peaks is not large enough, or peak value too small 456 if (weightPeak1SpecFlat < inst->featureExtractionParams.thresWeightSpecFlat 457 || posPeak1SpecFlat < inst->featureExtractionParams.thresPosSpecFlat) { 458 useFeatureSpecFlat = 0; 459 } 460 // if selected, get the threshold 461 if (useFeatureSpecFlat == 1) { 462 // compute the threshold 463 inst->priorModelPars[1] = inst->featureExtractionParams.factor2ModelPars 464 * posPeak1SpecFlat; 465 //check if value is within min/max range 466 if (inst->priorModelPars[1] < inst->featureExtractionParams.minSpecFlat) { 467 inst->priorModelPars[1] = inst->featureExtractionParams.minSpecFlat; 468 } 469 if (inst->priorModelPars[1] > inst->featureExtractionParams.maxSpecFlat) { 470 inst->priorModelPars[1] = inst->featureExtractionParams.maxSpecFlat; 471 } 472 } 473 // done with flatness feature 474 475 // for template feature 476 useFeatureSpecDiff = 1; 477 // merge the two peaks if they are close 478 if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) 479 < inst->featureExtractionParams.limitPeakSpacingSpecDiff) 480 && (weightPeak2SpecDiff 481 > inst->featureExtractionParams.limitPeakWeightsSpecDiff 482 * weightPeak1SpecDiff)) { 483 weightPeak1SpecDiff += weightPeak2SpecDiff; 484 posPeak1SpecDiff = (float)0.5 * (posPeak1SpecDiff + posPeak2SpecDiff); 485 } 486 // get the threshold value 487 inst->priorModelPars[3] = inst->featureExtractionParams.factor1ModelPars 488 * posPeak1SpecDiff; 489 //reject if weight of peaks is not large enough 490 if (weightPeak1SpecDiff < inst->featureExtractionParams.thresWeightSpecDiff) { 491 useFeatureSpecDiff = 0; 492 } 493 //check if value is within min/max range 494 if (inst->priorModelPars[3] < inst->featureExtractionParams.minSpecDiff) { 495 inst->priorModelPars[3] = inst->featureExtractionParams.minSpecDiff; 496 } 497 if (inst->priorModelPars[3] > inst->featureExtractionParams.maxSpecDiff) { 498 inst->priorModelPars[3] = inst->featureExtractionParams.maxSpecDiff; 499 } 500 // done with spectral difference feature 501 502 // don't use template feature if fluctuation of lrt feature is very low: 503 // most likely just noise state 504 if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) { 505 useFeatureSpecDiff = 0; 506 } 507 508 // select the weights between the features 509 // inst->priorModelPars[4] is weight for lrt: always selected 510 // inst->priorModelPars[5] is weight for spectral flatness 511 // inst->priorModelPars[6] is weight for spectral difference 512 featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff); 513 inst->priorModelPars[4] = (float)1.0 / featureSum; 514 inst->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum; 515 inst->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum; 516 517 // set hists to zero for next update 518 if (inst->modelUpdatePars[0] >= 1) { 519 for (i = 0; i < HIST_PAR_EST; i++) { 520 inst->histLrt[i] = 0; 521 inst->histSpecFlat[i] = 0; 522 inst->histSpecDiff[i] = 0; 523 } 524 } 525 } // end of flag == 1 526 } 527 528 // Compute spectral flatness on input spectrum 529 // magnIn is the magnitude spectrum 530 // spectral flatness is returned in inst->featureData[0] 531 void WebRtcNs_ComputeSpectralFlatness(NSinst_t* inst, float* magnIn) { 532 int i; 533 int shiftLP = 1; //option to remove first bin(s) from spectral measures 534 float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp; 535 536 // comute spectral measures 537 // for flatness 538 avgSpectralFlatnessNum = 0.0; 539 avgSpectralFlatnessDen = inst->sumMagn; 540 for (i = 0; i < shiftLP; i++) { 541 avgSpectralFlatnessDen -= magnIn[i]; 542 } 543 // compute log of ratio of the geometric to arithmetic mean: check for log(0) case 544 for (i = shiftLP; i < inst->magnLen; i++) { 545 if (magnIn[i] > 0.0) { 546 avgSpectralFlatnessNum += (float)log(magnIn[i]); 547 } else { 548 inst->featureData[0] -= SPECT_FL_TAVG * inst->featureData[0]; 549 return; 550 } 551 } 552 //normalize 553 avgSpectralFlatnessDen = avgSpectralFlatnessDen / inst->magnLen; 554 avgSpectralFlatnessNum = avgSpectralFlatnessNum / inst->magnLen; 555 556 //ratio and inverse log: check for case of log(0) 557 spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen; 558 559 //time-avg update of spectral flatness feature 560 inst->featureData[0] += SPECT_FL_TAVG * (spectralTmp - inst->featureData[0]); 561 // done with flatness feature 562 } 563 564 // Compute the difference measure between input spectrum and a template/learned noise spectrum 565 // magnIn is the input spectrum 566 // the reference/template spectrum is inst->magnAvgPause[i] 567 // returns (normalized) spectral difference in inst->featureData[4] 568 void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) { 569 // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause) 570 int i; 571 float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn; 572 573 avgPause = 0.0; 574 avgMagn = inst->sumMagn; 575 // compute average quantities 576 for (i = 0; i < inst->magnLen; i++) { 577 //conservative smooth noise spectrum from pause frames 578 avgPause += inst->magnAvgPause[i]; 579 } 580 avgPause = avgPause / ((float)inst->magnLen); 581 avgMagn = avgMagn / ((float)inst->magnLen); 582 583 covMagnPause = 0.0; 584 varPause = 0.0; 585 varMagn = 0.0; 586 // compute variance and covariance quantities 587 for (i = 0; i < inst->magnLen; i++) { 588 covMagnPause += (magnIn[i] - avgMagn) * (inst->magnAvgPause[i] - avgPause); 589 varPause += (inst->magnAvgPause[i] - avgPause) * (inst->magnAvgPause[i] - avgPause); 590 varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn); 591 } 592 covMagnPause = covMagnPause / ((float)inst->magnLen); 593 varPause = varPause / ((float)inst->magnLen); 594 varMagn = varMagn / ((float)inst->magnLen); 595 // update of average magnitude spectrum 596 inst->featureData[6] += inst->signalEnergy; 597 598 avgDiffNormMagn = varMagn - (covMagnPause * covMagnPause) / (varPause + (float)0.0001); 599 // normalize and compute time-avg update of difference feature 600 avgDiffNormMagn = (float)(avgDiffNormMagn / (inst->featureData[5] + (float)0.0001)); 601 inst->featureData[4] += SPECT_DIFF_TAVG * (avgDiffNormMagn - inst->featureData[4]); 602 } 603 604 // Compute speech/noise probability 605 // speech/noise probability is returned in: probSpeechFinal 606 //magn is the input magnitude spectrum 607 //noise is the noise spectrum 608 //snrLocPrior is the prior snr for each freq. 609 //snr loc_post is the post snr for each freq. 610 void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snrLocPrior, 611 float* snrLocPost) { 612 int i, sgnMap; 613 float invLrt, gainPrior, indPrior; 614 float logLrtTimeAvgKsum, besselTmp; 615 float indicator0, indicator1, indicator2; 616 float tmpFloat1, tmpFloat2; 617 float weightIndPrior0, weightIndPrior1, weightIndPrior2; 618 float threshPrior0, threshPrior1, threshPrior2; 619 float widthPrior, widthPrior0, widthPrior1, widthPrior2; 620 621 widthPrior0 = WIDTH_PR_MAP; 622 widthPrior1 = (float)2.0 * WIDTH_PR_MAP; //width for pause region: 623 // lower range, so increase width in tanh map 624 widthPrior2 = (float)2.0 * WIDTH_PR_MAP; //for spectral-difference measure 625 626 //threshold parameters for features 627 threshPrior0 = inst->priorModelPars[0]; 628 threshPrior1 = inst->priorModelPars[1]; 629 threshPrior2 = inst->priorModelPars[3]; 630 631 //sign for flatness feature 632 sgnMap = (int)(inst->priorModelPars[2]); 633 634 //weight parameters for features 635 weightIndPrior0 = inst->priorModelPars[4]; 636 weightIndPrior1 = inst->priorModelPars[5]; 637 weightIndPrior2 = inst->priorModelPars[6]; 638 639 // compute feature based on average LR factor 640 // this is the average over all frequencies of the smooth log lrt 641 logLrtTimeAvgKsum = 0.0; 642 for (i = 0; i < inst->magnLen; i++) { 643 tmpFloat1 = (float)1.0 + (float)2.0 * snrLocPrior[i]; 644 tmpFloat2 = (float)2.0 * snrLocPrior[i] / (tmpFloat1 + (float)0.0001); 645 besselTmp = (snrLocPost[i] + (float)1.0) * tmpFloat2; 646 inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - (float)log(tmpFloat1) 647 - inst->logLrtTimeAvg[i]); 648 logLrtTimeAvgKsum += inst->logLrtTimeAvg[i]; 649 } 650 logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (inst->magnLen); 651 inst->featureData[3] = logLrtTimeAvgKsum; 652 // done with computation of LR factor 653 654 // 655 //compute the indicator functions 656 // 657 658 // average lrt feature 659 widthPrior = widthPrior0; 660 //use larger width in tanh map for pause regions 661 if (logLrtTimeAvgKsum < threshPrior0) { 662 widthPrior = widthPrior1; 663 } 664 // compute indicator function: sigmoid map 665 indicator0 = (float)0.5 * ((float)tanh(widthPrior * 666 (logLrtTimeAvgKsum - threshPrior0)) + (float)1.0); 667 668 //spectral flatness feature 669 tmpFloat1 = inst->featureData[0]; 670 widthPrior = widthPrior0; 671 //use larger width in tanh map for pause regions 672 if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) { 673 widthPrior = widthPrior1; 674 } 675 if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) { 676 widthPrior = widthPrior1; 677 } 678 // compute indicator function: sigmoid map 679 indicator1 = (float)0.5 * ((float)tanh((float)sgnMap * 680 widthPrior * (threshPrior1 - tmpFloat1)) + (float)1.0); 681 682 //for template spectrum-difference 683 tmpFloat1 = inst->featureData[4]; 684 widthPrior = widthPrior0; 685 //use larger width in tanh map for pause regions 686 if (tmpFloat1 < threshPrior2) { 687 widthPrior = widthPrior2; 688 } 689 // compute indicator function: sigmoid map 690 indicator2 = (float)0.5 * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) 691 + (float)1.0); 692 693 //combine the indicator function with the feature weights 694 indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2 695 * indicator2; 696 // done with computing indicator function 697 698 //compute the prior probability 699 inst->priorSpeechProb += PRIOR_UPDATE * (indPrior - inst->priorSpeechProb); 700 // make sure probabilities are within range: keep floor to 0.01 701 if (inst->priorSpeechProb > 1.0) { 702 inst->priorSpeechProb = (float)1.0; 703 } 704 if (inst->priorSpeechProb < 0.01) { 705 inst->priorSpeechProb = (float)0.01; 706 } 707 708 //final speech probability: combine prior model with LR factor: 709 gainPrior = ((float)1.0 - inst->priorSpeechProb) / (inst->priorSpeechProb + (float)0.0001); 710 for (i = 0; i < inst->magnLen; i++) { 711 invLrt = (float)exp(-inst->logLrtTimeAvg[i]); 712 invLrt = (float)gainPrior * invLrt; 713 probSpeechFinal[i] = (float)1.0 / ((float)1.0 + invLrt); 714 } 715 } 716 717 int WebRtcNs_ProcessCore(NSinst_t* inst, 718 short* speechFrame, 719 short* speechFrameHB, 720 short* outFrame, 721 short* outFrameHB) { 722 // main routine for noise reduction 723 724 int flagHB = 0; 725 int i; 726 const int kStartBand = 5; // Skip first frequency bins during estimation. 727 int updateParsFlag; 728 729 float energy1, energy2, gain, factor, factor1, factor2; 730 float signalEnergy, sumMagn; 731 float snrPrior, currentEstimateStsa; 732 float tmpFloat1, tmpFloat2, tmpFloat3, probSpeech, probNonSpeech; 733 float gammaNoiseTmp, gammaNoiseOld; 734 float noiseUpdateTmp, fTmp, dTmp; 735 float fin[BLOCKL_MAX], fout[BLOCKL_MAX]; 736 float winData[ANAL_BLOCKL_MAX]; 737 float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL]; 738 float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL]; 739 float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL]; 740 float probSpeechFinal[HALF_ANAL_BLOCKL], previousEstimateStsa[HALF_ANAL_BLOCKL]; 741 float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; 742 // Variables during startup 743 float sum_log_i = 0.0; 744 float sum_log_i_square = 0.0; 745 float sum_log_magn = 0.0; 746 float sum_log_i_log_magn = 0.0; 747 float parametric_noise = 0.0; 748 float parametric_exp = 0.0; 749 float parametric_num = 0.0; 750 751 // SWB variables 752 int deltaBweHB = 1; 753 int deltaGainHB = 1; 754 float decayBweHB = 1.0; 755 float gainMapParHB = 1.0; 756 float gainTimeDomainHB = 1.0; 757 float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB; 758 759 // Check that initiation has been done 760 if (inst->initFlag != 1) { 761 return (-1); 762 } 763 // Check for valid pointers based on sampling rate 764 if (inst->fs == 32000) { 765 if (speechFrameHB == NULL) { 766 return -1; 767 } 768 flagHB = 1; 769 // range for averaging low band quantities for H band gain 770 deltaBweHB = (int)inst->magnLen / 4; 771 deltaGainHB = deltaBweHB; 772 } 773 // 774 updateParsFlag = inst->modelUpdatePars[0]; 775 // 776 777 //for LB do all processing 778 // convert to float 779 for (i = 0; i < inst->blockLen10ms; i++) { 780 fin[i] = (float)speechFrame[i]; 781 } 782 // update analysis buffer for L band 783 memcpy(inst->dataBuf, inst->dataBuf + inst->blockLen10ms, 784 sizeof(float) * (inst->anaLen - inst->blockLen10ms)); 785 memcpy(inst->dataBuf + inst->anaLen - inst->blockLen10ms, fin, 786 sizeof(float) * inst->blockLen10ms); 787 788 if (flagHB == 1) { 789 // convert to float 790 for (i = 0; i < inst->blockLen10ms; i++) { 791 fin[i] = (float)speechFrameHB[i]; 792 } 793 // update analysis buffer for H band 794 memcpy(inst->dataBufHB, inst->dataBufHB + inst->blockLen10ms, 795 sizeof(float) * (inst->anaLen - inst->blockLen10ms)); 796 memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms, fin, 797 sizeof(float) * inst->blockLen10ms); 798 } 799 800 // check if processing needed 801 if (inst->outLen == 0) { 802 // windowing 803 energy1 = 0.0; 804 for (i = 0; i < inst->anaLen; i++) { 805 winData[i] = inst->window[i] * inst->dataBuf[i]; 806 energy1 += winData[i] * winData[i]; 807 } 808 if (energy1 == 0.0) { 809 // synthesize the special case of zero input 810 // we want to avoid updating statistics in this case: 811 // Updating feature statistics when we have zeros only will cause thresholds to 812 // move towards zero signal situations. This in turn has the effect that once the 813 // signal is "turned on" (non-zero values) everything will be treated as speech 814 // and there is no noise suppression effect. Depending on the duration of the 815 // inactive signal it takes a considerable amount of time for the system to learn 816 // what is noise and what is speech. 817 818 // read out fully processed segment 819 for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) { 820 fout[i - inst->windShift] = inst->syntBuf[i]; 821 } 822 // update synthesis buffer 823 memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen, 824 sizeof(float) * (inst->anaLen - inst->blockLen)); 825 memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0, 826 sizeof(float) * inst->blockLen); 827 828 // out buffer 829 inst->outLen = inst->blockLen - inst->blockLen10ms; 830 if (inst->blockLen > inst->blockLen10ms) { 831 for (i = 0; i < inst->outLen; i++) { 832 inst->outBuf[i] = fout[i + inst->blockLen10ms]; 833 } 834 } 835 // convert to short 836 for (i = 0; i < inst->blockLen10ms; i++) { 837 dTmp = fout[i]; 838 if (dTmp < WEBRTC_SPL_WORD16_MIN) { 839 dTmp = WEBRTC_SPL_WORD16_MIN; 840 } else if (dTmp > WEBRTC_SPL_WORD16_MAX) { 841 dTmp = WEBRTC_SPL_WORD16_MAX; 842 } 843 outFrame[i] = (short)dTmp; 844 } 845 846 // for time-domain gain of HB 847 if (flagHB == 1) { 848 for (i = 0; i < inst->blockLen10ms; i++) { 849 dTmp = inst->dataBufHB[i]; 850 if (dTmp < WEBRTC_SPL_WORD16_MIN) { 851 dTmp = WEBRTC_SPL_WORD16_MIN; 852 } else if (dTmp > WEBRTC_SPL_WORD16_MAX) { 853 dTmp = WEBRTC_SPL_WORD16_MAX; 854 } 855 outFrameHB[i] = (short)dTmp; 856 } 857 } // end of H band gain computation 858 // 859 return 0; 860 } 861 862 // 863 inst->blockInd++; // Update the block index only when we process a block. 864 // FFT 865 WebRtc_rdft(inst->anaLen, 1, winData, inst->ip, inst->wfft); 866 867 imag[0] = 0; 868 real[0] = winData[0]; 869 magn[0] = (float)(fabs(real[0]) + 1.0f); 870 imag[inst->magnLen - 1] = 0; 871 real[inst->magnLen - 1] = winData[1]; 872 magn[inst->magnLen - 1] = (float)(fabs(real[inst->magnLen - 1]) + 1.0f); 873 signalEnergy = (float)(real[0] * real[0]) + 874 (float)(real[inst->magnLen - 1] * real[inst->magnLen - 1]); 875 sumMagn = magn[0] + magn[inst->magnLen - 1]; 876 if (inst->blockInd < END_STARTUP_SHORT) { 877 inst->initMagnEst[0] += magn[0]; 878 inst->initMagnEst[inst->magnLen - 1] += magn[inst->magnLen - 1]; 879 tmpFloat2 = log((float)(inst->magnLen - 1)); 880 sum_log_i = tmpFloat2; 881 sum_log_i_square = tmpFloat2 * tmpFloat2; 882 tmpFloat1 = log(magn[inst->magnLen - 1]); 883 sum_log_magn = tmpFloat1; 884 sum_log_i_log_magn = tmpFloat2 * tmpFloat1; 885 } 886 for (i = 1; i < inst->magnLen - 1; i++) { 887 real[i] = winData[2 * i]; 888 imag[i] = winData[2 * i + 1]; 889 // magnitude spectrum 890 fTmp = real[i] * real[i]; 891 fTmp += imag[i] * imag[i]; 892 signalEnergy += fTmp; 893 magn[i] = ((float)sqrt(fTmp)) + 1.0f; 894 sumMagn += magn[i]; 895 if (inst->blockInd < END_STARTUP_SHORT) { 896 inst->initMagnEst[i] += magn[i]; 897 if (i >= kStartBand) { 898 tmpFloat2 = log((float)i); 899 sum_log_i += tmpFloat2; 900 sum_log_i_square += tmpFloat2 * tmpFloat2; 901 tmpFloat1 = log(magn[i]); 902 sum_log_magn += tmpFloat1; 903 sum_log_i_log_magn += tmpFloat2 * tmpFloat1; 904 } 905 } 906 } 907 signalEnergy = signalEnergy / ((float)inst->magnLen); 908 inst->signalEnergy = signalEnergy; 909 inst->sumMagn = sumMagn; 910 911 //compute spectral flatness on input spectrum 912 WebRtcNs_ComputeSpectralFlatness(inst, magn); 913 // quantile noise estimate 914 WebRtcNs_NoiseEstimation(inst, magn, noise); 915 //compute simplified noise model during startup 916 if (inst->blockInd < END_STARTUP_SHORT) { 917 // Estimate White noise 918 inst->whiteNoiseLevel += sumMagn / ((float)inst->magnLen) * inst->overdrive; 919 // Estimate Pink noise parameters 920 tmpFloat1 = sum_log_i_square * ((float)(inst->magnLen - kStartBand)); 921 tmpFloat1 -= (sum_log_i * sum_log_i); 922 tmpFloat2 = (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn); 923 tmpFloat3 = tmpFloat2 / tmpFloat1; 924 // Constrain the estimated spectrum to be positive 925 if (tmpFloat3 < 0.0f) { 926 tmpFloat3 = 0.0f; 927 } 928 inst->pinkNoiseNumerator += tmpFloat3; 929 tmpFloat2 = (sum_log_i * sum_log_magn); 930 tmpFloat2 -= ((float)(inst->magnLen - kStartBand)) * sum_log_i_log_magn; 931 tmpFloat3 = tmpFloat2 / tmpFloat1; 932 // Constrain the pink noise power to be in the interval [0, 1]; 933 if (tmpFloat3 < 0.0f) { 934 tmpFloat3 = 0.0f; 935 } 936 if (tmpFloat3 > 1.0f) { 937 tmpFloat3 = 1.0f; 938 } 939 inst->pinkNoiseExp += tmpFloat3; 940 941 // Calculate frequency independent parts of parametric noise estimate. 942 if (inst->pinkNoiseExp == 0.0f) { 943 // Use white noise estimate 944 parametric_noise = inst->whiteNoiseLevel; 945 } else { 946 // Use pink noise estimate 947 parametric_num = exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1)); 948 parametric_num *= (float)(inst->blockInd + 1); 949 parametric_exp = inst->pinkNoiseExp / (float)(inst->blockInd + 1); 950 parametric_noise = parametric_num / pow((float)kStartBand, parametric_exp); 951 } 952 for (i = 0; i < inst->magnLen; i++) { 953 // Estimate the background noise using the white and pink noise parameters 954 if ((inst->pinkNoiseExp > 0.0f) && (i >= kStartBand)) { 955 // Use pink noise estimate 956 parametric_noise = parametric_num / pow((float)i, parametric_exp); 957 } 958 theFilterTmp[i] = (inst->initMagnEst[i] - inst->overdrive * parametric_noise); 959 theFilterTmp[i] /= (inst->initMagnEst[i] + (float)0.0001); 960 // Weight quantile noise with modeled noise 961 noise[i] *= (inst->blockInd); 962 tmpFloat2 = parametric_noise * (END_STARTUP_SHORT - inst->blockInd); 963 noise[i] += (tmpFloat2 / (float)(inst->blockInd + 1)); 964 noise[i] /= END_STARTUP_SHORT; 965 } 966 } 967 //compute average signal during END_STARTUP_LONG time: 968 // used to normalize spectral difference measure 969 if (inst->blockInd < END_STARTUP_LONG) { 970 inst->featureData[5] *= inst->blockInd; 971 inst->featureData[5] += signalEnergy; 972 inst->featureData[5] /= (inst->blockInd + 1); 973 } 974 975 #ifdef PROCESS_FLOW_0 976 if (inst->blockInd > END_STARTUP_LONG) { 977 //option: average the quantile noise: for check with AEC2 978 for (i = 0; i < inst->magnLen; i++) { 979 noise[i] = (float)0.6 * inst->noisePrev[i] + (float)0.4 * noise[i]; 980 } 981 for (i = 0; i < inst->magnLen; i++) { 982 // Wiener with over sub-substraction: 983 theFilter[i] = (magn[i] - inst->overdrive * noise[i]) / (magn[i] + (float)0.0001); 984 } 985 } 986 #else 987 //start processing at frames == converged+1 988 // 989 // STEP 1: compute prior and post snr based on quantile noise est 990 // 991 992 // compute DD estimate of prior SNR: needed for new method 993 for (i = 0; i < inst->magnLen; i++) { 994 // post snr 995 snrLocPost[i] = (float)0.0; 996 if (magn[i] > noise[i]) { 997 snrLocPost[i] = magn[i] / (noise[i] + (float)0.0001) - (float)1.0; 998 } 999 // previous post snr 1000 // previous estimate: based on previous frame with gain filter 1001 previousEstimateStsa[i] = inst->magnPrev[i] / (inst->noisePrev[i] + (float)0.0001) 1002 * (inst->smooth[i]); 1003 // DD estimate is sum of two terms: current estimate and previous estimate 1004 // directed decision update of snrPrior 1005 snrLocPrior[i] = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR) 1006 * snrLocPost[i]; 1007 // post and prior snr needed for step 2 1008 } // end of loop over freqs 1009 #ifdef PROCESS_FLOW_1 1010 for (i = 0; i < inst->magnLen; i++) { 1011 // gain filter 1012 tmpFloat1 = inst->overdrive + snrLocPrior[i]; 1013 tmpFloat2 = (float)snrLocPrior[i] / tmpFloat1; 1014 theFilter[i] = (float)tmpFloat2; 1015 } // end of loop over freqs 1016 #endif 1017 // done with step 1: dd computation of prior and post snr 1018 1019 // 1020 //STEP 2: compute speech/noise likelihood 1021 // 1022 #ifdef PROCESS_FLOW_2 1023 // compute difference of input spectrum with learned/estimated noise spectrum 1024 WebRtcNs_ComputeSpectralDifference(inst, magn); 1025 // compute histograms for parameter decisions (thresholds and weights for features) 1026 // parameters are extracted once every window time (=inst->modelUpdatePars[1]) 1027 if (updateParsFlag >= 1) { 1028 // counter update 1029 inst->modelUpdatePars[3]--; 1030 // update histogram 1031 if (inst->modelUpdatePars[3] > 0) { 1032 WebRtcNs_FeatureParameterExtraction(inst, 0); 1033 } 1034 // compute model parameters 1035 if (inst->modelUpdatePars[3] == 0) { 1036 WebRtcNs_FeatureParameterExtraction(inst, 1); 1037 inst->modelUpdatePars[3] = inst->modelUpdatePars[1]; 1038 // if wish to update only once, set flag to zero 1039 if (updateParsFlag == 1) { 1040 inst->modelUpdatePars[0] = 0; 1041 } else { 1042 // update every window: 1043 // get normalization for spectral difference for next window estimate 1044 inst->featureData[6] = inst->featureData[6] 1045 / ((float)inst->modelUpdatePars[1]); 1046 inst->featureData[5] = (float)0.5 * (inst->featureData[6] 1047 + inst->featureData[5]); 1048 inst->featureData[6] = (float)0.0; 1049 } 1050 } 1051 } 1052 // compute speech/noise probability 1053 WebRtcNs_SpeechNoiseProb(inst, probSpeechFinal, snrLocPrior, snrLocPost); 1054 // time-avg parameter for noise update 1055 gammaNoiseTmp = NOISE_UPDATE; 1056 for (i = 0; i < inst->magnLen; i++) { 1057 probSpeech = probSpeechFinal[i]; 1058 probNonSpeech = (float)1.0 - probSpeech; 1059 // temporary noise update: 1060 // use it for speech frames if update value is less than previous 1061 noiseUpdateTmp = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp) 1062 * (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); 1063 // 1064 // time-constant based on speech/noise state 1065 gammaNoiseOld = gammaNoiseTmp; 1066 gammaNoiseTmp = NOISE_UPDATE; 1067 // increase gamma (i.e., less noise update) for frame likely to be speech 1068 if (probSpeech > PROB_RANGE) { 1069 gammaNoiseTmp = SPEECH_UPDATE; 1070 } 1071 // conservative noise update 1072 if (probSpeech < PROB_RANGE) { 1073 inst->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - inst->magnAvgPause[i]); 1074 } 1075 // noise update 1076 if (gammaNoiseTmp == gammaNoiseOld) { 1077 noise[i] = noiseUpdateTmp; 1078 } else { 1079 noise[i] = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp) 1080 * (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); 1081 // allow for noise update downwards: 1082 // if noise update decreases the noise, it is safe, so allow it to happen 1083 if (noiseUpdateTmp < noise[i]) { 1084 noise[i] = noiseUpdateTmp; 1085 } 1086 } 1087 } // end of freq loop 1088 // done with step 2: noise update 1089 1090 // 1091 // STEP 3: compute dd update of prior snr and post snr based on new noise estimate 1092 // 1093 for (i = 0; i < inst->magnLen; i++) { 1094 // post and prior snr 1095 currentEstimateStsa = (float)0.0; 1096 if (magn[i] > noise[i]) { 1097 currentEstimateStsa = magn[i] / (noise[i] + (float)0.0001) - (float)1.0; 1098 } 1099 // DD estimate is sume of two terms: current estimate and previous estimate 1100 // directed decision update of snrPrior 1101 snrPrior = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR) 1102 * currentEstimateStsa; 1103 // gain filter 1104 tmpFloat1 = inst->overdrive + snrPrior; 1105 tmpFloat2 = (float)snrPrior / tmpFloat1; 1106 theFilter[i] = (float)tmpFloat2; 1107 } // end of loop over freqs 1108 // done with step3 1109 #endif 1110 #endif 1111 1112 for (i = 0; i < inst->magnLen; i++) { 1113 // flooring bottom 1114 if (theFilter[i] < inst->denoiseBound) { 1115 theFilter[i] = inst->denoiseBound; 1116 } 1117 // flooring top 1118 if (theFilter[i] > (float)1.0) { 1119 theFilter[i] = 1.0; 1120 } 1121 if (inst->blockInd < END_STARTUP_SHORT) { 1122 // flooring bottom 1123 if (theFilterTmp[i] < inst->denoiseBound) { 1124 theFilterTmp[i] = inst->denoiseBound; 1125 } 1126 // flooring top 1127 if (theFilterTmp[i] > (float)1.0) { 1128 theFilterTmp[i] = 1.0; 1129 } 1130 // Weight the two suppression filters 1131 theFilter[i] *= (inst->blockInd); 1132 theFilterTmp[i] *= (END_STARTUP_SHORT - inst->blockInd); 1133 theFilter[i] += theFilterTmp[i]; 1134 theFilter[i] /= (END_STARTUP_SHORT); 1135 } 1136 // smoothing 1137 #ifdef PROCESS_FLOW_0 1138 inst->smooth[i] *= SMOOTH; // value set to 0.7 in define.h file 1139 inst->smooth[i] += ((float)1.0 - SMOOTH) * theFilter[i]; 1140 #else 1141 inst->smooth[i] = theFilter[i]; 1142 #endif 1143 real[i] *= inst->smooth[i]; 1144 imag[i] *= inst->smooth[i]; 1145 } 1146 // keep track of noise and magn spectrum for next frame 1147 for (i = 0; i < inst->magnLen; i++) { 1148 inst->noisePrev[i] = noise[i]; 1149 inst->magnPrev[i] = magn[i]; 1150 } 1151 // back to time domain 1152 winData[0] = real[0]; 1153 winData[1] = real[inst->magnLen - 1]; 1154 for (i = 1; i < inst->magnLen - 1; i++) { 1155 winData[2 * i] = real[i]; 1156 winData[2 * i + 1] = imag[i]; 1157 } 1158 WebRtc_rdft(inst->anaLen, -1, winData, inst->ip, inst->wfft); 1159 1160 for (i = 0; i < inst->anaLen; i++) { 1161 real[i] = 2.0f * winData[i] / inst->anaLen; // fft scaling 1162 } 1163 1164 //scale factor: only do it after END_STARTUP_LONG time 1165 factor = (float)1.0; 1166 if (inst->gainmap == 1 && inst->blockInd > END_STARTUP_LONG) { 1167 factor1 = (float)1.0; 1168 factor2 = (float)1.0; 1169 1170 energy2 = 0.0; 1171 for (i = 0; i < inst->anaLen; i++) { 1172 energy2 += (float)real[i] * (float)real[i]; 1173 } 1174 gain = (float)sqrt(energy2 / (energy1 + (float)1.0)); 1175 1176 #ifdef PROCESS_FLOW_2 1177 // scaling for new version 1178 if (gain > B_LIM) { 1179 factor1 = (float)1.0 + (float)1.3 * (gain - B_LIM); 1180 if (gain * factor1 > (float)1.0) { 1181 factor1 = (float)1.0 / gain; 1182 } 1183 } 1184 if (gain < B_LIM) { 1185 //don't reduce scale too much for pause regions: 1186 // attenuation here should be controlled by flooring 1187 if (gain <= inst->denoiseBound) { 1188 gain = inst->denoiseBound; 1189 } 1190 factor2 = (float)1.0 - (float)0.3 * (B_LIM - gain); 1191 } 1192 //combine both scales with speech/noise prob: 1193 // note prior (priorSpeechProb) is not frequency dependent 1194 factor = inst->priorSpeechProb * factor1 + ((float)1.0 - inst->priorSpeechProb) 1195 * factor2; 1196 #else 1197 if (gain > B_LIM) { 1198 factor = (float)1.0 + (float)1.3 * (gain - B_LIM); 1199 } else { 1200 factor = (float)1.0 + (float)2.0 * (gain - B_LIM); 1201 } 1202 if (gain * factor > (float)1.0) { 1203 factor = (float)1.0 / gain; 1204 } 1205 #endif 1206 } // out of inst->gainmap==1 1207 1208 // synthesis 1209 for (i = 0; i < inst->anaLen; i++) { 1210 inst->syntBuf[i] += factor * inst->window[i] * (float)real[i]; 1211 } 1212 // read out fully processed segment 1213 for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) { 1214 fout[i - inst->windShift] = inst->syntBuf[i]; 1215 } 1216 // update synthesis buffer 1217 memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen, 1218 sizeof(float) * (inst->anaLen - inst->blockLen)); 1219 memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0, 1220 sizeof(float) * inst->blockLen); 1221 1222 // out buffer 1223 inst->outLen = inst->blockLen - inst->blockLen10ms; 1224 if (inst->blockLen > inst->blockLen10ms) { 1225 for (i = 0; i < inst->outLen; i++) { 1226 inst->outBuf[i] = fout[i + inst->blockLen10ms]; 1227 } 1228 } 1229 } // end of if out.len==0 1230 else { 1231 for (i = 0; i < inst->blockLen10ms; i++) { 1232 fout[i] = inst->outBuf[i]; 1233 } 1234 memcpy(inst->outBuf, inst->outBuf + inst->blockLen10ms, 1235 sizeof(float) * (inst->outLen - inst->blockLen10ms)); 1236 memset(inst->outBuf + inst->outLen - inst->blockLen10ms, 0, 1237 sizeof(float) * inst->blockLen10ms); 1238 inst->outLen -= inst->blockLen10ms; 1239 } 1240 1241 // convert to short 1242 for (i = 0; i < inst->blockLen10ms; i++) { 1243 dTmp = fout[i]; 1244 if (dTmp < WEBRTC_SPL_WORD16_MIN) { 1245 dTmp = WEBRTC_SPL_WORD16_MIN; 1246 } else if (dTmp > WEBRTC_SPL_WORD16_MAX) { 1247 dTmp = WEBRTC_SPL_WORD16_MAX; 1248 } 1249 outFrame[i] = (short)dTmp; 1250 } 1251 1252 // for time-domain gain of HB 1253 if (flagHB == 1) { 1254 for (i = 0; i < inst->magnLen; i++) { 1255 inst->speechProbHB[i] = probSpeechFinal[i]; 1256 } 1257 if (inst->blockInd > END_STARTUP_LONG) { 1258 // average speech prob from low band 1259 // avg over second half (i.e., 4->8kHz) of freq. spectrum 1260 avgProbSpeechHB = 0.0; 1261 for (i = inst->magnLen - deltaBweHB - 1; i < inst->magnLen - 1; i++) { 1262 avgProbSpeechHB += inst->speechProbHB[i]; 1263 } 1264 avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB); 1265 // average filter gain from low band 1266 // average over second half (i.e., 4->8kHz) of freq. spectrum 1267 avgFilterGainHB = 0.0; 1268 for (i = inst->magnLen - deltaGainHB - 1; i < inst->magnLen - 1; i++) { 1269 avgFilterGainHB += inst->smooth[i]; 1270 } 1271 avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB)); 1272 avgProbSpeechHBTmp = (float)2.0 * avgProbSpeechHB - (float)1.0; 1273 // gain based on speech prob: 1274 gainModHB = (float)0.5 * ((float)1.0 + (float)tanh(gainMapParHB * avgProbSpeechHBTmp)); 1275 //combine gain with low band gain 1276 gainTimeDomainHB = (float)0.5 * gainModHB + (float)0.5 * avgFilterGainHB; 1277 if (avgProbSpeechHB >= (float)0.5) { 1278 gainTimeDomainHB = (float)0.25 * gainModHB + (float)0.75 * avgFilterGainHB; 1279 } 1280 gainTimeDomainHB = gainTimeDomainHB * decayBweHB; 1281 } // end of converged 1282 //make sure gain is within flooring range 1283 // flooring bottom 1284 if (gainTimeDomainHB < inst->denoiseBound) { 1285 gainTimeDomainHB = inst->denoiseBound; 1286 } 1287 // flooring top 1288 if (gainTimeDomainHB > (float)1.0) { 1289 gainTimeDomainHB = 1.0; 1290 } 1291 //apply gain 1292 for (i = 0; i < inst->blockLen10ms; i++) { 1293 dTmp = gainTimeDomainHB * inst->dataBufHB[i]; 1294 if (dTmp < WEBRTC_SPL_WORD16_MIN) { 1295 dTmp = WEBRTC_SPL_WORD16_MIN; 1296 } else if (dTmp > WEBRTC_SPL_WORD16_MAX) { 1297 dTmp = WEBRTC_SPL_WORD16_MAX; 1298 } 1299 outFrameHB[i] = (short)dTmp; 1300 } 1301 } // end of H band gain computation 1302 // 1303 1304 return 0; 1305 } 1306