1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <math.h> 12 #include <string.h> 13 //#include <stdio.h> 14 #include <stdlib.h> 15 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 16 #include "webrtc/modules/audio_processing/ns/include/noise_suppression.h" 17 #include "webrtc/modules/audio_processing/ns/ns_core.h" 18 #include "webrtc/modules/audio_processing/ns/windows_private.h" 19 #include "webrtc/modules/audio_processing/utility/fft4g.h" 20 21 // Set Feature Extraction Parameters 22 void WebRtcNs_set_feature_extraction_parameters(NSinst_t* inst) { 23 //bin size of histogram 24 inst->featureExtractionParams.binSizeLrt = (float)0.1; 25 inst->featureExtractionParams.binSizeSpecFlat = (float)0.05; 26 inst->featureExtractionParams.binSizeSpecDiff = (float)0.1; 27 28 //range of histogram over which lrt threshold is computed 29 inst->featureExtractionParams.rangeAvgHistLrt = (float)1.0; 30 31 //scale parameters: multiply dominant peaks of the histograms by scale factor to obtain 32 // thresholds for prior model 33 inst->featureExtractionParams.factor1ModelPars = (float)1.20; //for lrt and spectral diff 34 inst->featureExtractionParams.factor2ModelPars = (float)0.9; //for spectral_flatness: 35 // used when noise is flatter than speech 36 37 //peak limit for spectral flatness (varies between 0 and 1) 38 inst->featureExtractionParams.thresPosSpecFlat = (float)0.6; 39 40 //limit on spacing of two highest peaks in histogram: spacing determined by bin size 41 inst->featureExtractionParams.limitPeakSpacingSpecFlat = 42 2 * inst->featureExtractionParams.binSizeSpecFlat; 43 inst->featureExtractionParams.limitPeakSpacingSpecDiff = 44 2 * inst->featureExtractionParams.binSizeSpecDiff; 45 46 //limit on relevance of second peak: 47 inst->featureExtractionParams.limitPeakWeightsSpecFlat = (float)0.5; 48 inst->featureExtractionParams.limitPeakWeightsSpecDiff = (float)0.5; 49 50 // fluctuation limit of lrt feature 51 inst->featureExtractionParams.thresFluctLrt = (float)0.05; 52 53 //limit on the max and min values for the feature thresholds 54 inst->featureExtractionParams.maxLrt = (float)1.0; 55 inst->featureExtractionParams.minLrt = (float)0.20; 56 57 inst->featureExtractionParams.maxSpecFlat = (float)0.95; 58 inst->featureExtractionParams.minSpecFlat = (float)0.10; 59 60 inst->featureExtractionParams.maxSpecDiff = (float)1.0; 61 inst->featureExtractionParams.minSpecDiff = (float)0.16; 62 63 //criteria of weight of histogram peak to accept/reject feature 64 inst->featureExtractionParams.thresWeightSpecFlat = (int)(0.3 65 * (inst->modelUpdatePars[1])); //for spectral flatness 66 inst->featureExtractionParams.thresWeightSpecDiff = (int)(0.3 67 * (inst->modelUpdatePars[1])); //for spectral difference 68 } 69 70 // Initialize state 71 int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) { 72 int i; 73 //We only support 10ms frames 74 75 //check for valid pointer 76 if (inst == NULL) { 77 return -1; 78 } 79 80 // Initialization of struct 81 if (fs == 8000 || fs == 16000 || fs == 32000) { 82 inst->fs = fs; 83 } else { 84 return -1; 85 } 86 inst->windShift = 0; 87 if (fs == 8000) { 88 // We only support 10ms frames 89 inst->blockLen = 80; 90 inst->blockLen10ms = 80; 91 inst->anaLen = 128; 92 inst->window = kBlocks80w128; 93 inst->outLen = 0; 94 } else if (fs == 16000) { 95 // We only support 10ms frames 96 inst->blockLen = 160; 97 inst->blockLen10ms = 160; 98 inst->anaLen = 256; 99 inst->window = kBlocks160w256; 100 inst->outLen = 0; 101 } else if (fs == 32000) { 102 // We only support 10ms frames 103 inst->blockLen = 160; 104 inst->blockLen10ms = 160; 105 inst->anaLen = 256; 106 inst->window = kBlocks160w256; 107 inst->outLen = 0; 108 } 109 inst->magnLen = inst->anaLen / 2 + 1; // Number of frequency bins 110 111 // Initialize fft work arrays. 112 inst->ip[0] = 0; // Setting this triggers initialization. 113 memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 114 WebRtc_rdft(inst->anaLen, 1, inst->dataBuf, inst->ip, inst->wfft); 115 116 memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 117 memset(inst->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 118 119 //for HB processing 120 memset(inst->dataBufHB, 0, sizeof(float) * ANAL_BLOCKL_MAX); 121 122 //for quantile noise estimation 123 memset(inst->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL); 124 for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) { 125 inst->lquantile[i] = (float)8.0; 126 inst->density[i] = (float)0.3; 127 } 128 129 for (i = 0; i < SIMULT; i++) { 130 inst->counter[i] = (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT); 131 } 132 133 inst->updates = 0; 134 135 // Wiener filter initialization 136 for (i = 0; i < HALF_ANAL_BLOCKL; i++) { 137 inst->smooth[i] = (float)1.0; 138 } 139 140 // Set the aggressiveness: default 141 inst->aggrMode = 0; 142 143 //initialize variables for new method 144 inst->priorSpeechProb = (float)0.5; //prior prob for speech/noise 145 for (i = 0; i < HALF_ANAL_BLOCKL; i++) { 146 inst->magnPrev[i] = (float)0.0; //previous mag spectrum 147 inst->noisePrev[i] = (float)0.0; //previous noise-spectrum 148 inst->logLrtTimeAvg[i] = LRT_FEATURE_THR; //smooth LR ratio (same as threshold) 149 inst->magnAvgPause[i] = (float)0.0; //conservative noise spectrum estimate 150 inst->speechProbHB[i] = (float)0.0; //for estimation of HB in second pass 151 inst->initMagnEst[i] = (float)0.0; //initial average mag spectrum 152 } 153 154 //feature quantities 155 inst->featureData[0] = SF_FEATURE_THR; //spectral flatness (start on threshold) 156 inst->featureData[1] = (float)0.0; //spectral entropy: not used in this version 157 inst->featureData[2] = (float)0.0; //spectral variance: not used in this version 158 inst->featureData[3] = LRT_FEATURE_THR; //average lrt factor (start on threshold) 159 inst->featureData[4] = SF_FEATURE_THR; //spectral template diff (start on threshold) 160 inst->featureData[5] = (float)0.0; //normalization for spectral-diff 161 inst->featureData[6] = (float)0.0; //window time-average of input magnitude spectrum 162 163 //histogram quantities: used to estimate/update thresholds for features 164 for (i = 0; i < HIST_PAR_EST; i++) { 165 inst->histLrt[i] = 0; 166 inst->histSpecFlat[i] = 0; 167 inst->histSpecDiff[i] = 0; 168 } 169 170 inst->blockInd = -1; //frame counter 171 inst->priorModelPars[0] = LRT_FEATURE_THR; //default threshold for lrt feature 172 inst->priorModelPars[1] = (float)0.5; //threshold for spectral flatness: 173 // determined on-line 174 inst->priorModelPars[2] = (float)1.0; //sgn_map par for spectral measure: 175 // 1 for flatness measure 176 inst->priorModelPars[3] = (float)0.5; //threshold for template-difference feature: 177 // determined on-line 178 inst->priorModelPars[4] = (float)1.0; //default weighting parameter for lrt feature 179 inst->priorModelPars[5] = (float)0.0; //default weighting parameter for 180 // spectral flatness feature 181 inst->priorModelPars[6] = (float)0.0; //default weighting parameter for 182 // spectral difference feature 183 184 inst->modelUpdatePars[0] = 2; //update flag for parameters: 185 // 0 no update, 1=update once, 2=update every window 186 inst->modelUpdatePars[1] = 500; //window for update 187 inst->modelUpdatePars[2] = 0; //counter for update of conservative noise spectrum 188 //counter if the feature thresholds are updated during the sequence 189 inst->modelUpdatePars[3] = inst->modelUpdatePars[1]; 190 191 inst->signalEnergy = 0.0; 192 inst->sumMagn = 0.0; 193 inst->whiteNoiseLevel = 0.0; 194 inst->pinkNoiseNumerator = 0.0; 195 inst->pinkNoiseExp = 0.0; 196 197 WebRtcNs_set_feature_extraction_parameters(inst); // Set feature configuration 198 199 //default mode 200 WebRtcNs_set_policy_core(inst, 0); 201 202 203 memset(inst->outBuf, 0, sizeof(float) * 3 * BLOCKL_MAX); 204 205 inst->initFlag = 1; 206 return 0; 207 } 208 209 int WebRtcNs_set_policy_core(NSinst_t* inst, int mode) { 210 // allow for modes:0,1,2,3 211 if (mode < 0 || mode > 3) { 212 return (-1); 213 } 214 215 inst->aggrMode = mode; 216 if (mode == 0) { 217 inst->overdrive = (float)1.0; 218 inst->denoiseBound = (float)0.5; 219 inst->gainmap = 0; 220 } else if (mode == 1) { 221 //inst->overdrive = (float)1.25; 222 inst->overdrive = (float)1.0; 223 inst->denoiseBound = (float)0.25; 224 inst->gainmap = 1; 225 } else if (mode == 2) { 226 //inst->overdrive = (float)1.25; 227 inst->overdrive = (float)1.1; 228 inst->denoiseBound = (float)0.125; 229 inst->gainmap = 1; 230 } else if (mode == 3) { 231 //inst->overdrive = (float)1.30; 232 inst->overdrive = (float)1.25; 233 inst->denoiseBound = (float)0.09; 234 inst->gainmap = 1; 235 } 236 return 0; 237 } 238 239 // Estimate noise 240 void WebRtcNs_NoiseEstimation(NSinst_t* inst, float* magn, float* noise) { 241 int i, s, offset; 242 float lmagn[HALF_ANAL_BLOCKL], delta; 243 244 if (inst->updates < END_STARTUP_LONG) { 245 inst->updates++; 246 } 247 248 for (i = 0; i < inst->magnLen; i++) { 249 lmagn[i] = (float)log(magn[i]); 250 } 251 252 // loop over simultaneous estimates 253 for (s = 0; s < SIMULT; s++) { 254 offset = s * inst->magnLen; 255 256 // newquantest(...) 257 for (i = 0; i < inst->magnLen; i++) { 258 // compute delta 259 if (inst->density[offset + i] > 1.0) { 260 delta = FACTOR * (float)1.0 / inst->density[offset + i]; 261 } else { 262 delta = FACTOR; 263 } 264 265 // update log quantile estimate 266 if (lmagn[i] > inst->lquantile[offset + i]) { 267 inst->lquantile[offset + i] += QUANTILE * delta 268 / (float)(inst->counter[s] + 1); 269 } else { 270 inst->lquantile[offset + i] -= ((float)1.0 - QUANTILE) * delta 271 / (float)(inst->counter[s] + 1); 272 } 273 274 // update density estimate 275 if (fabs(lmagn[i] - inst->lquantile[offset + i]) < WIDTH) { 276 inst->density[offset + i] = ((float)inst->counter[s] * inst->density[offset 277 + i] + (float)1.0 / ((float)2.0 * WIDTH)) / (float)(inst->counter[s] + 1); 278 } 279 } // end loop over magnitude spectrum 280 281 if (inst->counter[s] >= END_STARTUP_LONG) { 282 inst->counter[s] = 0; 283 if (inst->updates >= END_STARTUP_LONG) { 284 for (i = 0; i < inst->magnLen; i++) { 285 inst->quantile[i] = (float)exp(inst->lquantile[offset + i]); 286 } 287 } 288 } 289 290 inst->counter[s]++; 291 } // end loop over simultaneous estimates 292 293 // Sequentially update the noise during startup 294 if (inst->updates < END_STARTUP_LONG) { 295 // Use the last "s" to get noise during startup that differ from zero. 296 for (i = 0; i < inst->magnLen; i++) { 297 inst->quantile[i] = (float)exp(inst->lquantile[offset + i]); 298 } 299 } 300 301 for (i = 0; i < inst->magnLen; i++) { 302 noise[i] = inst->quantile[i]; 303 } 304 } 305 306 // Extract thresholds for feature parameters 307 // histograms are computed over some window_size (given by inst->modelUpdatePars[1]) 308 // thresholds and weights are extracted every window 309 // flag 0 means update histogram only, flag 1 means compute the thresholds/weights 310 // threshold and weights are returned in: inst->priorModelPars 311 void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) { 312 int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt; 313 int maxPeak1, maxPeak2; 314 int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, weightPeak2SpecDiff; 315 316 float binMid, featureSum; 317 float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff; 318 float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl; 319 320 //3 features: lrt, flatness, difference 321 //lrt_feature = inst->featureData[3]; 322 //flat_feature = inst->featureData[0]; 323 //diff_feature = inst->featureData[4]; 324 325 //update histograms 326 if (flag == 0) { 327 // LRT 328 if ((inst->featureData[3] < HIST_PAR_EST * inst->featureExtractionParams.binSizeLrt) 329 && (inst->featureData[3] >= 0.0)) { 330 i = (int)(inst->featureData[3] / inst->featureExtractionParams.binSizeLrt); 331 inst->histLrt[i]++; 332 } 333 // Spectral flatness 334 if ((inst->featureData[0] < HIST_PAR_EST 335 * inst->featureExtractionParams.binSizeSpecFlat) 336 && (inst->featureData[0] >= 0.0)) { 337 i = (int)(inst->featureData[0] / inst->featureExtractionParams.binSizeSpecFlat); 338 inst->histSpecFlat[i]++; 339 } 340 // Spectral difference 341 if ((inst->featureData[4] < HIST_PAR_EST 342 * inst->featureExtractionParams.binSizeSpecDiff) 343 && (inst->featureData[4] >= 0.0)) { 344 i = (int)(inst->featureData[4] / inst->featureExtractionParams.binSizeSpecDiff); 345 inst->histSpecDiff[i]++; 346 } 347 } 348 349 // extract parameters for speech/noise probability 350 if (flag == 1) { 351 //lrt feature: compute the average over inst->featureExtractionParams.rangeAvgHistLrt 352 avgHistLrt = 0.0; 353 avgHistLrtCompl = 0.0; 354 avgSquareHistLrt = 0.0; 355 numHistLrt = 0; 356 for (i = 0; i < HIST_PAR_EST; i++) { 357 binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeLrt; 358 if (binMid <= inst->featureExtractionParams.rangeAvgHistLrt) { 359 avgHistLrt += inst->histLrt[i] * binMid; 360 numHistLrt += inst->histLrt[i]; 361 } 362 avgSquareHistLrt += inst->histLrt[i] * binMid * binMid; 363 avgHistLrtCompl += inst->histLrt[i] * binMid; 364 } 365 if (numHistLrt > 0) { 366 avgHistLrt = avgHistLrt / ((float)numHistLrt); 367 } 368 avgHistLrtCompl = avgHistLrtCompl / ((float)inst->modelUpdatePars[1]); 369 avgSquareHistLrt = avgSquareHistLrt / ((float)inst->modelUpdatePars[1]); 370 fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl; 371 // get threshold for lrt feature: 372 if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) { 373 //very low fluct, so likely noise 374 inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt; 375 } else { 376 inst->priorModelPars[0] = inst->featureExtractionParams.factor1ModelPars 377 * avgHistLrt; 378 // check if value is within min/max range 379 if (inst->priorModelPars[0] < inst->featureExtractionParams.minLrt) { 380 inst->priorModelPars[0] = inst->featureExtractionParams.minLrt; 381 } 382 if (inst->priorModelPars[0] > inst->featureExtractionParams.maxLrt) { 383 inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt; 384 } 385 } 386 // done with lrt feature 387 388 // 389 // for spectral flatness and spectral difference: compute the main peaks of histogram 390 maxPeak1 = 0; 391 maxPeak2 = 0; 392 posPeak1SpecFlat = 0.0; 393 posPeak2SpecFlat = 0.0; 394 weightPeak1SpecFlat = 0; 395 weightPeak2SpecFlat = 0; 396 397 // peaks for flatness 398 for (i = 0; i < HIST_PAR_EST; i++) { 399 binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecFlat; 400 if (inst->histSpecFlat[i] > maxPeak1) { 401 // Found new "first" peak 402 maxPeak2 = maxPeak1; 403 weightPeak2SpecFlat = weightPeak1SpecFlat; 404 posPeak2SpecFlat = posPeak1SpecFlat; 405 406 maxPeak1 = inst->histSpecFlat[i]; 407 weightPeak1SpecFlat = inst->histSpecFlat[i]; 408 posPeak1SpecFlat = binMid; 409 } else if (inst->histSpecFlat[i] > maxPeak2) { 410 // Found new "second" peak 411 maxPeak2 = inst->histSpecFlat[i]; 412 weightPeak2SpecFlat = inst->histSpecFlat[i]; 413 posPeak2SpecFlat = binMid; 414 } 415 } 416 417 //compute two peaks for spectral difference 418 maxPeak1 = 0; 419 maxPeak2 = 0; 420 posPeak1SpecDiff = 0.0; 421 posPeak2SpecDiff = 0.0; 422 weightPeak1SpecDiff = 0; 423 weightPeak2SpecDiff = 0; 424 // peaks for spectral difference 425 for (i = 0; i < HIST_PAR_EST; i++) { 426 binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecDiff; 427 if (inst->histSpecDiff[i] > maxPeak1) { 428 // Found new "first" peak 429 maxPeak2 = maxPeak1; 430 weightPeak2SpecDiff = weightPeak1SpecDiff; 431 posPeak2SpecDiff = posPeak1SpecDiff; 432 433 maxPeak1 = inst->histSpecDiff[i]; 434 weightPeak1SpecDiff = inst->histSpecDiff[i]; 435 posPeak1SpecDiff = binMid; 436 } else if (inst->histSpecDiff[i] > maxPeak2) { 437 // Found new "second" peak 438 maxPeak2 = inst->histSpecDiff[i]; 439 weightPeak2SpecDiff = inst->histSpecDiff[i]; 440 posPeak2SpecDiff = binMid; 441 } 442 } 443 444 // for spectrum flatness feature 445 useFeatureSpecFlat = 1; 446 // merge the two peaks if they are close 447 if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) 448 < inst->featureExtractionParams.limitPeakSpacingSpecFlat) 449 && (weightPeak2SpecFlat 450 > inst->featureExtractionParams.limitPeakWeightsSpecFlat 451 * weightPeak1SpecFlat)) { 452 weightPeak1SpecFlat += weightPeak2SpecFlat; 453 posPeak1SpecFlat = (float)0.5 * (posPeak1SpecFlat + posPeak2SpecFlat); 454 } 455 //reject if weight of peaks is not large enough, or peak value too small 456 if (weightPeak1SpecFlat < inst->featureExtractionParams.thresWeightSpecFlat 457 || posPeak1SpecFlat < inst->featureExtractionParams.thresPosSpecFlat) { 458 useFeatureSpecFlat = 0; 459 } 460 // if selected, get the threshold 461 if (useFeatureSpecFlat == 1) { 462 // compute the threshold 463 inst->priorModelPars[1] = inst->featureExtractionParams.factor2ModelPars 464 * posPeak1SpecFlat; 465 //check if value is within min/max range 466 if (inst->priorModelPars[1] < inst->featureExtractionParams.minSpecFlat) { 467 inst->priorModelPars[1] = inst->featureExtractionParams.minSpecFlat; 468 } 469 if (inst->priorModelPars[1] > inst->featureExtractionParams.maxSpecFlat) { 470 inst->priorModelPars[1] = inst->featureExtractionParams.maxSpecFlat; 471 } 472 } 473 // done with flatness feature 474 475 // for template feature 476 useFeatureSpecDiff = 1; 477 // merge the two peaks if they are close 478 if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) 479 < inst->featureExtractionParams.limitPeakSpacingSpecDiff) 480 && (weightPeak2SpecDiff 481 > inst->featureExtractionParams.limitPeakWeightsSpecDiff 482 * weightPeak1SpecDiff)) { 483 weightPeak1SpecDiff += weightPeak2SpecDiff; 484 posPeak1SpecDiff = (float)0.5 * (posPeak1SpecDiff + posPeak2SpecDiff); 485 } 486 // get the threshold value 487 inst->priorModelPars[3] = inst->featureExtractionParams.factor1ModelPars 488 * posPeak1SpecDiff; 489 //reject if weight of peaks is not large enough 490 if (weightPeak1SpecDiff < inst->featureExtractionParams.thresWeightSpecDiff) { 491 useFeatureSpecDiff = 0; 492 } 493 //check if value is within min/max range 494 if (inst->priorModelPars[3] < inst->featureExtractionParams.minSpecDiff) { 495 inst->priorModelPars[3] = inst->featureExtractionParams.minSpecDiff; 496 } 497 if (inst->priorModelPars[3] > inst->featureExtractionParams.maxSpecDiff) { 498 inst->priorModelPars[3] = inst->featureExtractionParams.maxSpecDiff; 499 } 500 // done with spectral difference feature 501 502 // don't use template feature if fluctuation of lrt feature is very low: 503 // most likely just noise state 504 if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) { 505 useFeatureSpecDiff = 0; 506 } 507 508 // select the weights between the features 509 // inst->priorModelPars[4] is weight for lrt: always selected 510 // inst->priorModelPars[5] is weight for spectral flatness 511 // inst->priorModelPars[6] is weight for spectral difference 512 featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff); 513 inst->priorModelPars[4] = (float)1.0 / featureSum; 514 inst->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum; 515 inst->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum; 516 517 // set hists to zero for next update 518 if (inst->modelUpdatePars[0] >= 1) { 519 for (i = 0; i < HIST_PAR_EST; i++) { 520 inst->histLrt[i] = 0; 521 inst->histSpecFlat[i] = 0; 522 inst->histSpecDiff[i] = 0; 523 } 524 } 525 } // end of flag == 1 526 } 527 528 // Compute spectral flatness on input spectrum 529 // magnIn is the magnitude spectrum 530 // spectral flatness is returned in inst->featureData[0] 531 void WebRtcNs_ComputeSpectralFlatness(NSinst_t* inst, float* magnIn) { 532 int i; 533 int shiftLP = 1; //option to remove first bin(s) from spectral measures 534 float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp; 535 536 // comute spectral measures 537 // for flatness 538 avgSpectralFlatnessNum = 0.0; 539 avgSpectralFlatnessDen = inst->sumMagn; 540 for (i = 0; i < shiftLP; i++) { 541 avgSpectralFlatnessDen -= magnIn[i]; 542 } 543 // compute log of ratio of the geometric to arithmetic mean: check for log(0) case 544 for (i = shiftLP; i < inst->magnLen; i++) { 545 if (magnIn[i] > 0.0) { 546 avgSpectralFlatnessNum += (float)log(magnIn[i]); 547 } else { 548 inst->featureData[0] -= SPECT_FL_TAVG * inst->featureData[0]; 549 return; 550 } 551 } 552 //normalize 553 avgSpectralFlatnessDen = avgSpectralFlatnessDen / inst->magnLen; 554 avgSpectralFlatnessNum = avgSpectralFlatnessNum / inst->magnLen; 555 556 //ratio and inverse log: check for case of log(0) 557 spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen; 558 559 //time-avg update of spectral flatness feature 560 inst->featureData[0] += SPECT_FL_TAVG * (spectralTmp - inst->featureData[0]); 561 // done with flatness feature 562 } 563 564 // Compute the difference measure between input spectrum and a template/learned noise spectrum 565 // magnIn is the input spectrum 566 // the reference/template spectrum is inst->magnAvgPause[i] 567 // returns (normalized) spectral difference in inst->featureData[4] 568 void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) { 569 // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause) 570 int i; 571 float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn; 572 573 avgPause = 0.0; 574 avgMagn = inst->sumMagn; 575 // compute average quantities 576 for (i = 0; i < inst->magnLen; i++) { 577 //conservative smooth noise spectrum from pause frames 578 avgPause += inst->magnAvgPause[i]; 579 } 580 avgPause = avgPause / ((float)inst->magnLen); 581 avgMagn = avgMagn / ((float)inst->magnLen); 582 583 covMagnPause = 0.0; 584 varPause = 0.0; 585 varMagn = 0.0; 586 // compute variance and covariance quantities 587 for (i = 0; i < inst->magnLen; i++) { 588 covMagnPause += (magnIn[i] - avgMagn) * (inst->magnAvgPause[i] - avgPause); 589 varPause += (inst->magnAvgPause[i] - avgPause) * (inst->magnAvgPause[i] - avgPause); 590 varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn); 591 } 592 covMagnPause = covMagnPause / ((float)inst->magnLen); 593 varPause = varPause / ((float)inst->magnLen); 594 varMagn = varMagn / ((float)inst->magnLen); 595 // update of average magnitude spectrum 596 inst->featureData[6] += inst->signalEnergy; 597 598 avgDiffNormMagn = varMagn - (covMagnPause * covMagnPause) / (varPause + (float)0.0001); 599 // normalize and compute time-avg update of difference feature 600 avgDiffNormMagn = (float)(avgDiffNormMagn / (inst->featureData[5] + (float)0.0001)); 601 inst->featureData[4] += SPECT_DIFF_TAVG * (avgDiffNormMagn - inst->featureData[4]); 602 } 603 604 // Compute speech/noise probability 605 // speech/noise probability is returned in: probSpeechFinal 606 //magn is the input magnitude spectrum 607 //noise is the noise spectrum 608 //snrLocPrior is the prior snr for each freq. 609 //snr loc_post is the post snr for each freq. 610 void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snrLocPrior, 611 float* snrLocPost) { 612 int i, sgnMap; 613 float invLrt, gainPrior, indPrior; 614 float logLrtTimeAvgKsum, besselTmp; 615 float indicator0, indicator1, indicator2; 616 float tmpFloat1, tmpFloat2; 617 float weightIndPrior0, weightIndPrior1, weightIndPrior2; 618 float threshPrior0, threshPrior1, threshPrior2; 619 float widthPrior, widthPrior0, widthPrior1, widthPrior2; 620 621 widthPrior0 = WIDTH_PR_MAP; 622 widthPrior1 = (float)2.0 * WIDTH_PR_MAP; //width for pause region: 623 // lower range, so increase width in tanh map 624 widthPrior2 = (float)2.0 * WIDTH_PR_MAP; //for spectral-difference measure 625 626 //threshold parameters for features 627 threshPrior0 = inst->priorModelPars[0]; 628 threshPrior1 = inst->priorModelPars[1]; 629 threshPrior2 = inst->priorModelPars[3]; 630 631 //sign for flatness feature 632 sgnMap = (int)(inst->priorModelPars[2]); 633 634 //weight parameters for features 635 weightIndPrior0 = inst->priorModelPars[4]; 636 weightIndPrior1 = inst->priorModelPars[5]; 637 weightIndPrior2 = inst->priorModelPars[6]; 638 639 // compute feature based on average LR factor 640 // this is the average over all frequencies of the smooth log lrt 641 logLrtTimeAvgKsum = 0.0; 642 for (i = 0; i < inst->magnLen; i++) { 643 tmpFloat1 = (float)1.0 + (float)2.0 * snrLocPrior[i]; 644 tmpFloat2 = (float)2.0 * snrLocPrior[i] / (tmpFloat1 + (float)0.0001); 645 besselTmp = (snrLocPost[i] + (float)1.0) * tmpFloat2; 646 inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - (float)log(tmpFloat1) 647 - inst->logLrtTimeAvg[i]); 648 logLrtTimeAvgKsum += inst->logLrtTimeAvg[i]; 649 } 650 logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (inst->magnLen); 651 inst->featureData[3] = logLrtTimeAvgKsum; 652 // done with computation of LR factor 653 654 // 655 //compute the indicator functions 656 // 657 658 // average lrt feature 659 widthPrior = widthPrior0; 660 //use larger width in tanh map for pause regions 661 if (logLrtTimeAvgKsum < threshPrior0) { 662 widthPrior = widthPrior1; 663 } 664 // compute indicator function: sigmoid map 665 indicator0 = (float)0.5 * ((float)tanh(widthPrior * 666 (logLrtTimeAvgKsum - threshPrior0)) + (float)1.0); 667 668 //spectral flatness feature 669 tmpFloat1 = inst->featureData[0]; 670 widthPrior = widthPrior0; 671 //use larger width in tanh map for pause regions 672 if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) { 673 widthPrior = widthPrior1; 674 } 675 if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) { 676 widthPrior = widthPrior1; 677 } 678 // compute indicator function: sigmoid map 679 indicator1 = (float)0.5 * ((float)tanh((float)sgnMap * 680 widthPrior * (threshPrior1 - tmpFloat1)) + (float)1.0); 681 682 //for template spectrum-difference 683 tmpFloat1 = inst->featureData[4]; 684 widthPrior = widthPrior0; 685 //use larger width in tanh map for pause regions 686 if (tmpFloat1 < threshPrior2) { 687 widthPrior = widthPrior2; 688 } 689 // compute indicator function: sigmoid map 690 indicator2 = (float)0.5 * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) 691 + (float)1.0); 692 693 //combine the indicator function with the feature weights 694 indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2 695 * indicator2; 696 // done with computing indicator function 697 698 //compute the prior probability 699 inst->priorSpeechProb += PRIOR_UPDATE * (indPrior - inst->priorSpeechProb); 700 // make sure probabilities are within range: keep floor to 0.01 701 if (inst->priorSpeechProb > 1.0) { 702 inst->priorSpeechProb = (float)1.0; 703 } 704 if (inst->priorSpeechProb < 0.01) { 705 inst->priorSpeechProb = (float)0.01; 706 } 707 708 //final speech probability: combine prior model with LR factor: 709 gainPrior = ((float)1.0 - inst->priorSpeechProb) / (inst->priorSpeechProb + (float)0.0001); 710 for (i = 0; i < inst->magnLen; i++) { 711 invLrt = (float)exp(-inst->logLrtTimeAvg[i]); 712 invLrt = (float)gainPrior * invLrt; 713 probSpeechFinal[i] = (float)1.0 / ((float)1.0 + invLrt); 714 } 715 } 716 717 int WebRtcNs_ProcessCore(NSinst_t* inst, 718 float* speechFrame, 719 float* speechFrameHB, 720 float* outFrame, 721 float* outFrameHB) { 722 // main routine for noise reduction 723 724 int flagHB = 0; 725 int i; 726 const int kStartBand = 5; // Skip first frequency bins during estimation. 727 int updateParsFlag; 728 729 float energy1, energy2, gain, factor, factor1, factor2; 730 float signalEnergy, sumMagn; 731 float snrPrior, currentEstimateStsa; 732 float tmpFloat1, tmpFloat2, tmpFloat3, probSpeech, probNonSpeech; 733 float gammaNoiseTmp, gammaNoiseOld; 734 float noiseUpdateTmp, fTmp; 735 float fout[BLOCKL_MAX]; 736 float winData[ANAL_BLOCKL_MAX]; 737 float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL]; 738 float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL]; 739 float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL]; 740 float probSpeechFinal[HALF_ANAL_BLOCKL] = { 0 }; 741 float previousEstimateStsa[HALF_ANAL_BLOCKL]; 742 float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; 743 // Variables during startup 744 float sum_log_i = 0.0; 745 float sum_log_i_square = 0.0; 746 float sum_log_magn = 0.0; 747 float sum_log_i_log_magn = 0.0; 748 float parametric_noise = 0.0; 749 float parametric_exp = 0.0; 750 float parametric_num = 0.0; 751 752 // SWB variables 753 int deltaBweHB = 1; 754 int deltaGainHB = 1; 755 float decayBweHB = 1.0; 756 float gainMapParHB = 1.0; 757 float gainTimeDomainHB = 1.0; 758 float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB; 759 760 // Check that initiation has been done 761 if (inst->initFlag != 1) { 762 return (-1); 763 } 764 // Check for valid pointers based on sampling rate 765 if (inst->fs == 32000) { 766 if (speechFrameHB == NULL) { 767 return -1; 768 } 769 flagHB = 1; 770 // range for averaging low band quantities for H band gain 771 deltaBweHB = (int)inst->magnLen / 4; 772 deltaGainHB = deltaBweHB; 773 } 774 // 775 updateParsFlag = inst->modelUpdatePars[0]; 776 // 777 778 // update analysis buffer for L band 779 memcpy(inst->dataBuf, inst->dataBuf + inst->blockLen10ms, 780 sizeof(float) * (inst->anaLen - inst->blockLen10ms)); 781 memcpy(inst->dataBuf + inst->anaLen - inst->blockLen10ms, speechFrame, 782 sizeof(float) * inst->blockLen10ms); 783 784 if (flagHB == 1) { 785 // update analysis buffer for H band 786 memcpy(inst->dataBufHB, inst->dataBufHB + inst->blockLen10ms, 787 sizeof(float) * (inst->anaLen - inst->blockLen10ms)); 788 memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms, speechFrameHB, 789 sizeof(float) * inst->blockLen10ms); 790 } 791 792 // check if processing needed 793 if (inst->outLen == 0) { 794 // windowing 795 energy1 = 0.0; 796 for (i = 0; i < inst->anaLen; i++) { 797 winData[i] = inst->window[i] * inst->dataBuf[i]; 798 energy1 += winData[i] * winData[i]; 799 } 800 if (energy1 == 0.0) { 801 // synthesize the special case of zero input 802 // we want to avoid updating statistics in this case: 803 // Updating feature statistics when we have zeros only will cause thresholds to 804 // move towards zero signal situations. This in turn has the effect that once the 805 // signal is "turned on" (non-zero values) everything will be treated as speech 806 // and there is no noise suppression effect. Depending on the duration of the 807 // inactive signal it takes a considerable amount of time for the system to learn 808 // what is noise and what is speech. 809 810 // read out fully processed segment 811 for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) { 812 fout[i - inst->windShift] = inst->syntBuf[i]; 813 } 814 // update synthesis buffer 815 memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen, 816 sizeof(float) * (inst->anaLen - inst->blockLen)); 817 memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0, 818 sizeof(float) * inst->blockLen); 819 820 // out buffer 821 inst->outLen = inst->blockLen - inst->blockLen10ms; 822 if (inst->blockLen > inst->blockLen10ms) { 823 for (i = 0; i < inst->outLen; i++) { 824 inst->outBuf[i] = fout[i + inst->blockLen10ms]; 825 } 826 } 827 for (i = 0; i < inst->blockLen10ms; ++i) 828 outFrame[i] = WEBRTC_SPL_SAT( 829 WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); 830 831 // for time-domain gain of HB 832 if (flagHB == 1) 833 for (i = 0; i < inst->blockLen10ms; ++i) 834 outFrameHB[i] = WEBRTC_SPL_SAT( 835 WEBRTC_SPL_WORD16_MAX, inst->dataBufHB[i], WEBRTC_SPL_WORD16_MIN); 836 837 return 0; 838 } 839 840 // 841 inst->blockInd++; // Update the block index only when we process a block. 842 // FFT 843 WebRtc_rdft(inst->anaLen, 1, winData, inst->ip, inst->wfft); 844 845 imag[0] = 0; 846 real[0] = winData[0]; 847 magn[0] = (float)(fabs(real[0]) + 1.0f); 848 imag[inst->magnLen - 1] = 0; 849 real[inst->magnLen - 1] = winData[1]; 850 magn[inst->magnLen - 1] = (float)(fabs(real[inst->magnLen - 1]) + 1.0f); 851 signalEnergy = (float)(real[0] * real[0]) + 852 (float)(real[inst->magnLen - 1] * real[inst->magnLen - 1]); 853 sumMagn = magn[0] + magn[inst->magnLen - 1]; 854 if (inst->blockInd < END_STARTUP_SHORT) { 855 inst->initMagnEst[0] += magn[0]; 856 inst->initMagnEst[inst->magnLen - 1] += magn[inst->magnLen - 1]; 857 tmpFloat2 = log((float)(inst->magnLen - 1)); 858 sum_log_i = tmpFloat2; 859 sum_log_i_square = tmpFloat2 * tmpFloat2; 860 tmpFloat1 = log(magn[inst->magnLen - 1]); 861 sum_log_magn = tmpFloat1; 862 sum_log_i_log_magn = tmpFloat2 * tmpFloat1; 863 } 864 for (i = 1; i < inst->magnLen - 1; i++) { 865 real[i] = winData[2 * i]; 866 imag[i] = winData[2 * i + 1]; 867 // magnitude spectrum 868 fTmp = real[i] * real[i]; 869 fTmp += imag[i] * imag[i]; 870 signalEnergy += fTmp; 871 magn[i] = ((float)sqrt(fTmp)) + 1.0f; 872 sumMagn += magn[i]; 873 if (inst->blockInd < END_STARTUP_SHORT) { 874 inst->initMagnEst[i] += magn[i]; 875 if (i >= kStartBand) { 876 tmpFloat2 = log((float)i); 877 sum_log_i += tmpFloat2; 878 sum_log_i_square += tmpFloat2 * tmpFloat2; 879 tmpFloat1 = log(magn[i]); 880 sum_log_magn += tmpFloat1; 881 sum_log_i_log_magn += tmpFloat2 * tmpFloat1; 882 } 883 } 884 } 885 signalEnergy = signalEnergy / ((float)inst->magnLen); 886 inst->signalEnergy = signalEnergy; 887 inst->sumMagn = sumMagn; 888 889 //compute spectral flatness on input spectrum 890 WebRtcNs_ComputeSpectralFlatness(inst, magn); 891 // quantile noise estimate 892 WebRtcNs_NoiseEstimation(inst, magn, noise); 893 //compute simplified noise model during startup 894 if (inst->blockInd < END_STARTUP_SHORT) { 895 // Estimate White noise 896 inst->whiteNoiseLevel += sumMagn / ((float)inst->magnLen) * inst->overdrive; 897 // Estimate Pink noise parameters 898 tmpFloat1 = sum_log_i_square * ((float)(inst->magnLen - kStartBand)); 899 tmpFloat1 -= (sum_log_i * sum_log_i); 900 tmpFloat2 = (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn); 901 tmpFloat3 = tmpFloat2 / tmpFloat1; 902 // Constrain the estimated spectrum to be positive 903 if (tmpFloat3 < 0.0f) { 904 tmpFloat3 = 0.0f; 905 } 906 inst->pinkNoiseNumerator += tmpFloat3; 907 tmpFloat2 = (sum_log_i * sum_log_magn); 908 tmpFloat2 -= ((float)(inst->magnLen - kStartBand)) * sum_log_i_log_magn; 909 tmpFloat3 = tmpFloat2 / tmpFloat1; 910 // Constrain the pink noise power to be in the interval [0, 1]; 911 if (tmpFloat3 < 0.0f) { 912 tmpFloat3 = 0.0f; 913 } 914 if (tmpFloat3 > 1.0f) { 915 tmpFloat3 = 1.0f; 916 } 917 inst->pinkNoiseExp += tmpFloat3; 918 919 // Calculate frequency independent parts of parametric noise estimate. 920 if (inst->pinkNoiseExp == 0.0f) { 921 // Use white noise estimate 922 parametric_noise = inst->whiteNoiseLevel; 923 } else { 924 // Use pink noise estimate 925 parametric_num = exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1)); 926 parametric_num *= (float)(inst->blockInd + 1); 927 parametric_exp = inst->pinkNoiseExp / (float)(inst->blockInd + 1); 928 parametric_noise = parametric_num / pow((float)kStartBand, parametric_exp); 929 } 930 for (i = 0; i < inst->magnLen; i++) { 931 // Estimate the background noise using the white and pink noise parameters 932 if ((inst->pinkNoiseExp > 0.0f) && (i >= kStartBand)) { 933 // Use pink noise estimate 934 parametric_noise = parametric_num / pow((float)i, parametric_exp); 935 } 936 theFilterTmp[i] = (inst->initMagnEst[i] - inst->overdrive * parametric_noise); 937 theFilterTmp[i] /= (inst->initMagnEst[i] + (float)0.0001); 938 // Weight quantile noise with modeled noise 939 noise[i] *= (inst->blockInd); 940 tmpFloat2 = parametric_noise * (END_STARTUP_SHORT - inst->blockInd); 941 noise[i] += (tmpFloat2 / (float)(inst->blockInd + 1)); 942 noise[i] /= END_STARTUP_SHORT; 943 } 944 } 945 //compute average signal during END_STARTUP_LONG time: 946 // used to normalize spectral difference measure 947 if (inst->blockInd < END_STARTUP_LONG) { 948 inst->featureData[5] *= inst->blockInd; 949 inst->featureData[5] += signalEnergy; 950 inst->featureData[5] /= (inst->blockInd + 1); 951 } 952 953 #ifdef PROCESS_FLOW_0 954 if (inst->blockInd > END_STARTUP_LONG) { 955 //option: average the quantile noise: for check with AEC2 956 for (i = 0; i < inst->magnLen; i++) { 957 noise[i] = (float)0.6 * inst->noisePrev[i] + (float)0.4 * noise[i]; 958 } 959 for (i = 0; i < inst->magnLen; i++) { 960 // Wiener with over sub-substraction: 961 theFilter[i] = (magn[i] - inst->overdrive * noise[i]) / (magn[i] + (float)0.0001); 962 } 963 } 964 #else 965 //start processing at frames == converged+1 966 // 967 // STEP 1: compute prior and post snr based on quantile noise est 968 // 969 970 // compute DD estimate of prior SNR: needed for new method 971 for (i = 0; i < inst->magnLen; i++) { 972 // post snr 973 snrLocPost[i] = (float)0.0; 974 if (magn[i] > noise[i]) { 975 snrLocPost[i] = magn[i] / (noise[i] + (float)0.0001) - (float)1.0; 976 } 977 // previous post snr 978 // previous estimate: based on previous frame with gain filter 979 previousEstimateStsa[i] = inst->magnPrev[i] / (inst->noisePrev[i] + (float)0.0001) 980 * (inst->smooth[i]); 981 // DD estimate is sum of two terms: current estimate and previous estimate 982 // directed decision update of snrPrior 983 snrLocPrior[i] = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR) 984 * snrLocPost[i]; 985 // post and prior snr needed for step 2 986 } // end of loop over freqs 987 #ifdef PROCESS_FLOW_1 988 for (i = 0; i < inst->magnLen; i++) { 989 // gain filter 990 tmpFloat1 = inst->overdrive + snrLocPrior[i]; 991 tmpFloat2 = (float)snrLocPrior[i] / tmpFloat1; 992 theFilter[i] = (float)tmpFloat2; 993 } // end of loop over freqs 994 #endif 995 // done with step 1: dd computation of prior and post snr 996 997 // 998 //STEP 2: compute speech/noise likelihood 999 // 1000 #ifdef PROCESS_FLOW_2 1001 // compute difference of input spectrum with learned/estimated noise spectrum 1002 WebRtcNs_ComputeSpectralDifference(inst, magn); 1003 // compute histograms for parameter decisions (thresholds and weights for features) 1004 // parameters are extracted once every window time (=inst->modelUpdatePars[1]) 1005 if (updateParsFlag >= 1) { 1006 // counter update 1007 inst->modelUpdatePars[3]--; 1008 // update histogram 1009 if (inst->modelUpdatePars[3] > 0) { 1010 WebRtcNs_FeatureParameterExtraction(inst, 0); 1011 } 1012 // compute model parameters 1013 if (inst->modelUpdatePars[3] == 0) { 1014 WebRtcNs_FeatureParameterExtraction(inst, 1); 1015 inst->modelUpdatePars[3] = inst->modelUpdatePars[1]; 1016 // if wish to update only once, set flag to zero 1017 if (updateParsFlag == 1) { 1018 inst->modelUpdatePars[0] = 0; 1019 } else { 1020 // update every window: 1021 // get normalization for spectral difference for next window estimate 1022 inst->featureData[6] = inst->featureData[6] 1023 / ((float)inst->modelUpdatePars[1]); 1024 inst->featureData[5] = (float)0.5 * (inst->featureData[6] 1025 + inst->featureData[5]); 1026 inst->featureData[6] = (float)0.0; 1027 } 1028 } 1029 } 1030 // compute speech/noise probability 1031 WebRtcNs_SpeechNoiseProb(inst, probSpeechFinal, snrLocPrior, snrLocPost); 1032 // time-avg parameter for noise update 1033 gammaNoiseTmp = NOISE_UPDATE; 1034 for (i = 0; i < inst->magnLen; i++) { 1035 probSpeech = probSpeechFinal[i]; 1036 probNonSpeech = (float)1.0 - probSpeech; 1037 // temporary noise update: 1038 // use it for speech frames if update value is less than previous 1039 noiseUpdateTmp = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp) 1040 * (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); 1041 // 1042 // time-constant based on speech/noise state 1043 gammaNoiseOld = gammaNoiseTmp; 1044 gammaNoiseTmp = NOISE_UPDATE; 1045 // increase gamma (i.e., less noise update) for frame likely to be speech 1046 if (probSpeech > PROB_RANGE) { 1047 gammaNoiseTmp = SPEECH_UPDATE; 1048 } 1049 // conservative noise update 1050 if (probSpeech < PROB_RANGE) { 1051 inst->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - inst->magnAvgPause[i]); 1052 } 1053 // noise update 1054 if (gammaNoiseTmp == gammaNoiseOld) { 1055 noise[i] = noiseUpdateTmp; 1056 } else { 1057 noise[i] = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp) 1058 * (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); 1059 // allow for noise update downwards: 1060 // if noise update decreases the noise, it is safe, so allow it to happen 1061 if (noiseUpdateTmp < noise[i]) { 1062 noise[i] = noiseUpdateTmp; 1063 } 1064 } 1065 } // end of freq loop 1066 // done with step 2: noise update 1067 1068 // 1069 // STEP 3: compute dd update of prior snr and post snr based on new noise estimate 1070 // 1071 for (i = 0; i < inst->magnLen; i++) { 1072 // post and prior snr 1073 currentEstimateStsa = (float)0.0; 1074 if (magn[i] > noise[i]) { 1075 currentEstimateStsa = magn[i] / (noise[i] + (float)0.0001) - (float)1.0; 1076 } 1077 // DD estimate is sume of two terms: current estimate and previous estimate 1078 // directed decision update of snrPrior 1079 snrPrior = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR) 1080 * currentEstimateStsa; 1081 // gain filter 1082 tmpFloat1 = inst->overdrive + snrPrior; 1083 tmpFloat2 = (float)snrPrior / tmpFloat1; 1084 theFilter[i] = (float)tmpFloat2; 1085 } // end of loop over freqs 1086 // done with step3 1087 #endif 1088 #endif 1089 1090 for (i = 0; i < inst->magnLen; i++) { 1091 // flooring bottom 1092 if (theFilter[i] < inst->denoiseBound) { 1093 theFilter[i] = inst->denoiseBound; 1094 } 1095 // flooring top 1096 if (theFilter[i] > (float)1.0) { 1097 theFilter[i] = 1.0; 1098 } 1099 if (inst->blockInd < END_STARTUP_SHORT) { 1100 // flooring bottom 1101 if (theFilterTmp[i] < inst->denoiseBound) { 1102 theFilterTmp[i] = inst->denoiseBound; 1103 } 1104 // flooring top 1105 if (theFilterTmp[i] > (float)1.0) { 1106 theFilterTmp[i] = 1.0; 1107 } 1108 // Weight the two suppression filters 1109 theFilter[i] *= (inst->blockInd); 1110 theFilterTmp[i] *= (END_STARTUP_SHORT - inst->blockInd); 1111 theFilter[i] += theFilterTmp[i]; 1112 theFilter[i] /= (END_STARTUP_SHORT); 1113 } 1114 // smoothing 1115 #ifdef PROCESS_FLOW_0 1116 inst->smooth[i] *= SMOOTH; // value set to 0.7 in define.h file 1117 inst->smooth[i] += ((float)1.0 - SMOOTH) * theFilter[i]; 1118 #else 1119 inst->smooth[i] = theFilter[i]; 1120 #endif 1121 real[i] *= inst->smooth[i]; 1122 imag[i] *= inst->smooth[i]; 1123 } 1124 // keep track of noise and magn spectrum for next frame 1125 for (i = 0; i < inst->magnLen; i++) { 1126 inst->noisePrev[i] = noise[i]; 1127 inst->magnPrev[i] = magn[i]; 1128 } 1129 // back to time domain 1130 winData[0] = real[0]; 1131 winData[1] = real[inst->magnLen - 1]; 1132 for (i = 1; i < inst->magnLen - 1; i++) { 1133 winData[2 * i] = real[i]; 1134 winData[2 * i + 1] = imag[i]; 1135 } 1136 WebRtc_rdft(inst->anaLen, -1, winData, inst->ip, inst->wfft); 1137 1138 for (i = 0; i < inst->anaLen; i++) { 1139 real[i] = 2.0f * winData[i] / inst->anaLen; // fft scaling 1140 } 1141 1142 //scale factor: only do it after END_STARTUP_LONG time 1143 factor = (float)1.0; 1144 if (inst->gainmap == 1 && inst->blockInd > END_STARTUP_LONG) { 1145 factor1 = (float)1.0; 1146 factor2 = (float)1.0; 1147 1148 energy2 = 0.0; 1149 for (i = 0; i < inst->anaLen; i++) { 1150 energy2 += (float)real[i] * (float)real[i]; 1151 } 1152 gain = (float)sqrt(energy2 / (energy1 + (float)1.0)); 1153 1154 #ifdef PROCESS_FLOW_2 1155 // scaling for new version 1156 if (gain > B_LIM) { 1157 factor1 = (float)1.0 + (float)1.3 * (gain - B_LIM); 1158 if (gain * factor1 > (float)1.0) { 1159 factor1 = (float)1.0 / gain; 1160 } 1161 } 1162 if (gain < B_LIM) { 1163 //don't reduce scale too much for pause regions: 1164 // attenuation here should be controlled by flooring 1165 if (gain <= inst->denoiseBound) { 1166 gain = inst->denoiseBound; 1167 } 1168 factor2 = (float)1.0 - (float)0.3 * (B_LIM - gain); 1169 } 1170 //combine both scales with speech/noise prob: 1171 // note prior (priorSpeechProb) is not frequency dependent 1172 factor = inst->priorSpeechProb * factor1 + ((float)1.0 - inst->priorSpeechProb) 1173 * factor2; 1174 #else 1175 if (gain > B_LIM) { 1176 factor = (float)1.0 + (float)1.3 * (gain - B_LIM); 1177 } else { 1178 factor = (float)1.0 + (float)2.0 * (gain - B_LIM); 1179 } 1180 if (gain * factor > (float)1.0) { 1181 factor = (float)1.0 / gain; 1182 } 1183 #endif 1184 } // out of inst->gainmap==1 1185 1186 // synthesis 1187 for (i = 0; i < inst->anaLen; i++) { 1188 inst->syntBuf[i] += factor * inst->window[i] * (float)real[i]; 1189 } 1190 // read out fully processed segment 1191 for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) { 1192 fout[i - inst->windShift] = inst->syntBuf[i]; 1193 } 1194 // update synthesis buffer 1195 memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen, 1196 sizeof(float) * (inst->anaLen - inst->blockLen)); 1197 memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0, 1198 sizeof(float) * inst->blockLen); 1199 1200 // out buffer 1201 inst->outLen = inst->blockLen - inst->blockLen10ms; 1202 if (inst->blockLen > inst->blockLen10ms) { 1203 for (i = 0; i < inst->outLen; i++) { 1204 inst->outBuf[i] = fout[i + inst->blockLen10ms]; 1205 } 1206 } 1207 } // end of if out.len==0 1208 else { 1209 for (i = 0; i < inst->blockLen10ms; i++) { 1210 fout[i] = inst->outBuf[i]; 1211 } 1212 memcpy(inst->outBuf, inst->outBuf + inst->blockLen10ms, 1213 sizeof(float) * (inst->outLen - inst->blockLen10ms)); 1214 memset(inst->outBuf + inst->outLen - inst->blockLen10ms, 0, 1215 sizeof(float) * inst->blockLen10ms); 1216 inst->outLen -= inst->blockLen10ms; 1217 } 1218 1219 for (i = 0; i < inst->blockLen10ms; ++i) 1220 outFrame[i] = WEBRTC_SPL_SAT( 1221 WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); 1222 1223 // for time-domain gain of HB 1224 if (flagHB == 1) { 1225 for (i = 0; i < inst->magnLen; i++) { 1226 inst->speechProbHB[i] = probSpeechFinal[i]; 1227 } 1228 // average speech prob from low band 1229 // avg over second half (i.e., 4->8kHz) of freq. spectrum 1230 avgProbSpeechHB = 0.0; 1231 for (i = inst->magnLen - deltaBweHB - 1; i < inst->magnLen - 1; i++) { 1232 avgProbSpeechHB += inst->speechProbHB[i]; 1233 } 1234 avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB); 1235 // average filter gain from low band 1236 // average over second half (i.e., 4->8kHz) of freq. spectrum 1237 avgFilterGainHB = 0.0; 1238 for (i = inst->magnLen - deltaGainHB - 1; i < inst->magnLen - 1; i++) { 1239 avgFilterGainHB += inst->smooth[i]; 1240 } 1241 avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB)); 1242 avgProbSpeechHBTmp = (float)2.0 * avgProbSpeechHB - (float)1.0; 1243 // gain based on speech prob: 1244 gainModHB = (float)0.5 * ((float)1.0 + (float)tanh(gainMapParHB * avgProbSpeechHBTmp)); 1245 //combine gain with low band gain 1246 gainTimeDomainHB = (float)0.5 * gainModHB + (float)0.5 * avgFilterGainHB; 1247 if (avgProbSpeechHB >= (float)0.5) { 1248 gainTimeDomainHB = (float)0.25 * gainModHB + (float)0.75 * avgFilterGainHB; 1249 } 1250 gainTimeDomainHB = gainTimeDomainHB * decayBweHB; 1251 //make sure gain is within flooring range 1252 // flooring bottom 1253 if (gainTimeDomainHB < inst->denoiseBound) { 1254 gainTimeDomainHB = inst->denoiseBound; 1255 } 1256 // flooring top 1257 if (gainTimeDomainHB > (float)1.0) { 1258 gainTimeDomainHB = 1.0; 1259 } 1260 //apply gain 1261 for (i = 0; i < inst->blockLen10ms; i++) { 1262 float o = gainTimeDomainHB * inst->dataBufHB[i]; 1263 outFrameHB[i] = WEBRTC_SPL_SAT( 1264 WEBRTC_SPL_WORD16_MAX, o, WEBRTC_SPL_WORD16_MIN); 1265 } 1266 } // end of H band gain computation 1267 // 1268 1269 return 0; 1270 } 1271