1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <math.h> 12 #include <string.h> 13 #include <stdlib.h> 14 15 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 16 #include "webrtc/modules/audio_processing/ns/include/noise_suppression.h" 17 #include "webrtc/modules/audio_processing/ns/ns_core.h" 18 #include "webrtc/modules/audio_processing/ns/windows_private.h" 19 #include "webrtc/modules/audio_processing/utility/fft4g.h" 20 21 // Set Feature Extraction Parameters 22 void WebRtcNs_set_feature_extraction_parameters(NSinst_t* inst) { 23 // bin size of histogram 24 inst->featureExtractionParams.binSizeLrt = (float)0.1; 25 inst->featureExtractionParams.binSizeSpecFlat = (float)0.05; 26 inst->featureExtractionParams.binSizeSpecDiff = (float)0.1; 27 28 // range of histogram over which lrt threshold is computed 29 inst->featureExtractionParams.rangeAvgHistLrt = (float)1.0; 30 31 // scale parameters: multiply dominant peaks of the histograms by scale factor 32 // to obtain thresholds for prior model 33 inst->featureExtractionParams.factor1ModelPars = 34 (float)1.20; // for lrt and spectral diff 35 inst->featureExtractionParams.factor2ModelPars = 36 (float)0.9; // for spectral_flatness: 37 // used when noise is flatter than speech 38 39 // peak limit for spectral flatness (varies between 0 and 1) 40 inst->featureExtractionParams.thresPosSpecFlat = (float)0.6; 41 42 // limit on spacing of two highest peaks in histogram: spacing determined by 43 // bin size 44 inst->featureExtractionParams.limitPeakSpacingSpecFlat = 45 2 * inst->featureExtractionParams.binSizeSpecFlat; 46 inst->featureExtractionParams.limitPeakSpacingSpecDiff = 47 2 * inst->featureExtractionParams.binSizeSpecDiff; 48 49 // limit on relevance of second peak: 50 inst->featureExtractionParams.limitPeakWeightsSpecFlat = (float)0.5; 51 inst->featureExtractionParams.limitPeakWeightsSpecDiff = (float)0.5; 52 53 // fluctuation limit of lrt feature 54 inst->featureExtractionParams.thresFluctLrt = (float)0.05; 55 56 // limit on the max and min values for the feature thresholds 57 inst->featureExtractionParams.maxLrt = (float)1.0; 58 inst->featureExtractionParams.minLrt = (float)0.20; 59 60 inst->featureExtractionParams.maxSpecFlat = (float)0.95; 61 inst->featureExtractionParams.minSpecFlat = (float)0.10; 62 63 inst->featureExtractionParams.maxSpecDiff = (float)1.0; 64 inst->featureExtractionParams.minSpecDiff = (float)0.16; 65 66 // criteria of weight of histogram peak to accept/reject feature 67 inst->featureExtractionParams.thresWeightSpecFlat = 68 (int)(0.3 * (inst->modelUpdatePars[1])); // for spectral flatness 69 inst->featureExtractionParams.thresWeightSpecDiff = 70 (int)(0.3 * (inst->modelUpdatePars[1])); // for spectral difference 71 } 72 73 // Initialize state 74 int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) { 75 int i; 76 // We only support 10ms frames 77 78 // check for valid pointer 79 if (inst == NULL) { 80 return -1; 81 } 82 83 // Initialization of struct 84 if (fs == 8000 || fs == 16000 || fs == 32000) { 85 inst->fs = fs; 86 } else { 87 return -1; 88 } 89 inst->windShift = 0; 90 if (fs == 8000) { 91 // We only support 10ms frames 92 inst->blockLen = 80; 93 inst->blockLen10ms = 80; 94 inst->anaLen = 128; 95 inst->window = kBlocks80w128; 96 inst->outLen = 0; 97 } else if (fs == 16000) { 98 // We only support 10ms frames 99 inst->blockLen = 160; 100 inst->blockLen10ms = 160; 101 inst->anaLen = 256; 102 inst->window = kBlocks160w256; 103 inst->outLen = 0; 104 } else if (fs == 32000) { 105 // We only support 10ms frames 106 inst->blockLen = 160; 107 inst->blockLen10ms = 160; 108 inst->anaLen = 256; 109 inst->window = kBlocks160w256; 110 inst->outLen = 0; 111 } 112 inst->magnLen = inst->anaLen / 2 + 1; // Number of frequency bins 113 114 // Initialize fft work arrays. 115 inst->ip[0] = 0; // Setting this triggers initialization. 116 memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 117 WebRtc_rdft(inst->anaLen, 1, inst->dataBuf, inst->ip, inst->wfft); 118 119 memset(inst->analyzeBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 120 memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 121 memset(inst->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 122 123 // for HB processing 124 memset(inst->dataBufHB, 0, sizeof(float) * ANAL_BLOCKL_MAX); 125 126 // for quantile noise estimation 127 memset(inst->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL); 128 for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) { 129 inst->lquantile[i] = (float)8.0; 130 inst->density[i] = (float)0.3; 131 } 132 133 for (i = 0; i < SIMULT; i++) { 134 inst->counter[i] = 135 (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT); 136 } 137 138 inst->updates = 0; 139 140 // Wiener filter initialization 141 for (i = 0; i < HALF_ANAL_BLOCKL; i++) { 142 inst->smooth[i] = (float)1.0; 143 } 144 145 // Set the aggressiveness: default 146 inst->aggrMode = 0; 147 148 // initialize variables for new method 149 inst->priorSpeechProb = (float)0.5; // prior prob for speech/noise 150 for (i = 0; i < HALF_ANAL_BLOCKL; i++) { 151 inst->magnPrev[i] = (float)0.0; // previous mag spectrum 152 inst->noisePrev[i] = (float)0.0; // previous noise-spectrum 153 inst->logLrtTimeAvg[i] = 154 LRT_FEATURE_THR; // smooth LR ratio (same as threshold) 155 inst->magnAvgPause[i] = (float)0.0; // conservative noise spectrum estimate 156 inst->speechProb[i] = (float)0.0; // for estimation of HB in second pass 157 inst->initMagnEst[i] = (float)0.0; // initial average mag spectrum 158 } 159 160 // feature quantities 161 inst->featureData[0] = 162 SF_FEATURE_THR; // spectral flatness (start on threshold) 163 inst->featureData[1] = 164 (float)0.0; // spectral entropy: not used in this version 165 inst->featureData[2] = 166 (float)0.0; // spectral variance: not used in this version 167 inst->featureData[3] = 168 LRT_FEATURE_THR; // average lrt factor (start on threshold) 169 inst->featureData[4] = 170 SF_FEATURE_THR; // spectral template diff (start on threshold) 171 inst->featureData[5] = (float)0.0; // normalization for spectral-diff 172 inst->featureData[6] = 173 (float)0.0; // window time-average of input magnitude spectrum 174 175 // histogram quantities: used to estimate/update thresholds for features 176 for (i = 0; i < HIST_PAR_EST; i++) { 177 inst->histLrt[i] = 0; 178 inst->histSpecFlat[i] = 0; 179 inst->histSpecDiff[i] = 0; 180 } 181 182 inst->blockInd = -1; // frame counter 183 inst->priorModelPars[0] = 184 LRT_FEATURE_THR; // default threshold for lrt feature 185 inst->priorModelPars[1] = (float)0.5; // threshold for spectral flatness: 186 // determined on-line 187 inst->priorModelPars[2] = (float)1.0; // sgn_map par for spectral measure: 188 // 1 for flatness measure 189 inst->priorModelPars[3] = 190 (float)0.5; // threshold for template-difference feature: 191 // determined on-line 192 inst->priorModelPars[4] = 193 (float)1.0; // default weighting parameter for lrt feature 194 inst->priorModelPars[5] = (float)0.0; // default weighting parameter for 195 // spectral flatness feature 196 inst->priorModelPars[6] = (float)0.0; // default weighting parameter for 197 // spectral difference feature 198 199 inst->modelUpdatePars[0] = 2; // update flag for parameters: 200 // 0 no update, 1=update once, 2=update every window 201 inst->modelUpdatePars[1] = 500; // window for update 202 inst->modelUpdatePars[2] = 203 0; // counter for update of conservative noise spectrum 204 // counter if the feature thresholds are updated during the sequence 205 inst->modelUpdatePars[3] = inst->modelUpdatePars[1]; 206 207 inst->signalEnergy = 0.0; 208 inst->sumMagn = 0.0; 209 inst->whiteNoiseLevel = 0.0; 210 inst->pinkNoiseNumerator = 0.0; 211 inst->pinkNoiseExp = 0.0; 212 213 WebRtcNs_set_feature_extraction_parameters(inst); 214 215 // default mode 216 WebRtcNs_set_policy_core(inst, 0); 217 218 memset(inst->outBuf, 0, sizeof(float) * 3 * BLOCKL_MAX); 219 220 inst->initFlag = 1; 221 return 0; 222 } 223 224 int WebRtcNs_set_policy_core(NSinst_t* inst, int mode) { 225 // allow for modes:0,1,2,3 226 if (mode < 0 || mode > 3) { 227 return (-1); 228 } 229 230 inst->aggrMode = mode; 231 if (mode == 0) { 232 inst->overdrive = (float)1.0; 233 inst->denoiseBound = (float)0.5; 234 inst->gainmap = 0; 235 } else if (mode == 1) { 236 // inst->overdrive = (float)1.25; 237 inst->overdrive = (float)1.0; 238 inst->denoiseBound = (float)0.25; 239 inst->gainmap = 1; 240 } else if (mode == 2) { 241 // inst->overdrive = (float)1.25; 242 inst->overdrive = (float)1.1; 243 inst->denoiseBound = (float)0.125; 244 inst->gainmap = 1; 245 } else if (mode == 3) { 246 // inst->overdrive = (float)1.30; 247 inst->overdrive = (float)1.25; 248 inst->denoiseBound = (float)0.09; 249 inst->gainmap = 1; 250 } 251 return 0; 252 } 253 254 // Estimate noise 255 void WebRtcNs_NoiseEstimation(NSinst_t* inst, float* magn, float* noise) { 256 int i, s, offset; 257 float lmagn[HALF_ANAL_BLOCKL], delta; 258 259 if (inst->updates < END_STARTUP_LONG) { 260 inst->updates++; 261 } 262 263 for (i = 0; i < inst->magnLen; i++) { 264 lmagn[i] = (float)log(magn[i]); 265 } 266 267 // loop over simultaneous estimates 268 for (s = 0; s < SIMULT; s++) { 269 offset = s * inst->magnLen; 270 271 // newquantest(...) 272 for (i = 0; i < inst->magnLen; i++) { 273 // compute delta 274 if (inst->density[offset + i] > 1.0) { 275 delta = FACTOR * (float)1.0 / inst->density[offset + i]; 276 } else { 277 delta = FACTOR; 278 } 279 280 // update log quantile estimate 281 if (lmagn[i] > inst->lquantile[offset + i]) { 282 inst->lquantile[offset + i] += 283 QUANTILE * delta / (float)(inst->counter[s] + 1); 284 } else { 285 inst->lquantile[offset + i] -= 286 ((float)1.0 - QUANTILE) * delta / (float)(inst->counter[s] + 1); 287 } 288 289 // update density estimate 290 if (fabs(lmagn[i] - inst->lquantile[offset + i]) < WIDTH) { 291 inst->density[offset + i] = 292 ((float)inst->counter[s] * inst->density[offset + i] + 293 (float)1.0 / ((float)2.0 * WIDTH)) / 294 (float)(inst->counter[s] + 1); 295 } 296 } // end loop over magnitude spectrum 297 298 if (inst->counter[s] >= END_STARTUP_LONG) { 299 inst->counter[s] = 0; 300 if (inst->updates >= END_STARTUP_LONG) { 301 for (i = 0; i < inst->magnLen; i++) { 302 inst->quantile[i] = (float)exp(inst->lquantile[offset + i]); 303 } 304 } 305 } 306 307 inst->counter[s]++; 308 } // end loop over simultaneous estimates 309 310 // Sequentially update the noise during startup 311 if (inst->updates < END_STARTUP_LONG) { 312 // Use the last "s" to get noise during startup that differ from zero. 313 for (i = 0; i < inst->magnLen; i++) { 314 inst->quantile[i] = (float)exp(inst->lquantile[offset + i]); 315 } 316 } 317 318 for (i = 0; i < inst->magnLen; i++) { 319 noise[i] = inst->quantile[i]; 320 } 321 } 322 323 // Extract thresholds for feature parameters 324 // histograms are computed over some window_size (given by 325 // inst->modelUpdatePars[1]) 326 // thresholds and weights are extracted every window 327 // flag 0 means update histogram only, flag 1 means compute the 328 // thresholds/weights 329 // threshold and weights are returned in: inst->priorModelPars 330 void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) { 331 int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt; 332 int maxPeak1, maxPeak2; 333 int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, 334 weightPeak2SpecDiff; 335 336 float binMid, featureSum; 337 float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff; 338 float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl; 339 340 // 3 features: lrt, flatness, difference 341 // lrt_feature = inst->featureData[3]; 342 // flat_feature = inst->featureData[0]; 343 // diff_feature = inst->featureData[4]; 344 345 // update histograms 346 if (flag == 0) { 347 // LRT 348 if ((inst->featureData[3] < 349 HIST_PAR_EST * inst->featureExtractionParams.binSizeLrt) && 350 (inst->featureData[3] >= 0.0)) { 351 i = (int)(inst->featureData[3] / 352 inst->featureExtractionParams.binSizeLrt); 353 inst->histLrt[i]++; 354 } 355 // Spectral flatness 356 if ((inst->featureData[0] < 357 HIST_PAR_EST * inst->featureExtractionParams.binSizeSpecFlat) && 358 (inst->featureData[0] >= 0.0)) { 359 i = (int)(inst->featureData[0] / 360 inst->featureExtractionParams.binSizeSpecFlat); 361 inst->histSpecFlat[i]++; 362 } 363 // Spectral difference 364 if ((inst->featureData[4] < 365 HIST_PAR_EST * inst->featureExtractionParams.binSizeSpecDiff) && 366 (inst->featureData[4] >= 0.0)) { 367 i = (int)(inst->featureData[4] / 368 inst->featureExtractionParams.binSizeSpecDiff); 369 inst->histSpecDiff[i]++; 370 } 371 } 372 373 // extract parameters for speech/noise probability 374 if (flag == 1) { 375 // lrt feature: compute the average over 376 // inst->featureExtractionParams.rangeAvgHistLrt 377 avgHistLrt = 0.0; 378 avgHistLrtCompl = 0.0; 379 avgSquareHistLrt = 0.0; 380 numHistLrt = 0; 381 for (i = 0; i < HIST_PAR_EST; i++) { 382 binMid = 383 ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeLrt; 384 if (binMid <= inst->featureExtractionParams.rangeAvgHistLrt) { 385 avgHistLrt += inst->histLrt[i] * binMid; 386 numHistLrt += inst->histLrt[i]; 387 } 388 avgSquareHistLrt += inst->histLrt[i] * binMid * binMid; 389 avgHistLrtCompl += inst->histLrt[i] * binMid; 390 } 391 if (numHistLrt > 0) { 392 avgHistLrt = avgHistLrt / ((float)numHistLrt); 393 } 394 avgHistLrtCompl = avgHistLrtCompl / ((float)inst->modelUpdatePars[1]); 395 avgSquareHistLrt = avgSquareHistLrt / ((float)inst->modelUpdatePars[1]); 396 fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl; 397 // get threshold for lrt feature: 398 if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) { 399 // very low fluct, so likely noise 400 inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt; 401 } else { 402 inst->priorModelPars[0] = 403 inst->featureExtractionParams.factor1ModelPars * avgHistLrt; 404 // check if value is within min/max range 405 if (inst->priorModelPars[0] < inst->featureExtractionParams.minLrt) { 406 inst->priorModelPars[0] = inst->featureExtractionParams.minLrt; 407 } 408 if (inst->priorModelPars[0] > inst->featureExtractionParams.maxLrt) { 409 inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt; 410 } 411 } 412 // done with lrt feature 413 414 // for spectral flatness and spectral difference: compute the main peaks of 415 // histogram 416 maxPeak1 = 0; 417 maxPeak2 = 0; 418 posPeak1SpecFlat = 0.0; 419 posPeak2SpecFlat = 0.0; 420 weightPeak1SpecFlat = 0; 421 weightPeak2SpecFlat = 0; 422 423 // peaks for flatness 424 for (i = 0; i < HIST_PAR_EST; i++) { 425 binMid = ((float)i + (float)0.5) * 426 inst->featureExtractionParams.binSizeSpecFlat; 427 if (inst->histSpecFlat[i] > maxPeak1) { 428 // Found new "first" peak 429 maxPeak2 = maxPeak1; 430 weightPeak2SpecFlat = weightPeak1SpecFlat; 431 posPeak2SpecFlat = posPeak1SpecFlat; 432 433 maxPeak1 = inst->histSpecFlat[i]; 434 weightPeak1SpecFlat = inst->histSpecFlat[i]; 435 posPeak1SpecFlat = binMid; 436 } else if (inst->histSpecFlat[i] > maxPeak2) { 437 // Found new "second" peak 438 maxPeak2 = inst->histSpecFlat[i]; 439 weightPeak2SpecFlat = inst->histSpecFlat[i]; 440 posPeak2SpecFlat = binMid; 441 } 442 } 443 444 // compute two peaks for spectral difference 445 maxPeak1 = 0; 446 maxPeak2 = 0; 447 posPeak1SpecDiff = 0.0; 448 posPeak2SpecDiff = 0.0; 449 weightPeak1SpecDiff = 0; 450 weightPeak2SpecDiff = 0; 451 // peaks for spectral difference 452 for (i = 0; i < HIST_PAR_EST; i++) { 453 binMid = ((float)i + (float)0.5) * 454 inst->featureExtractionParams.binSizeSpecDiff; 455 if (inst->histSpecDiff[i] > maxPeak1) { 456 // Found new "first" peak 457 maxPeak2 = maxPeak1; 458 weightPeak2SpecDiff = weightPeak1SpecDiff; 459 posPeak2SpecDiff = posPeak1SpecDiff; 460 461 maxPeak1 = inst->histSpecDiff[i]; 462 weightPeak1SpecDiff = inst->histSpecDiff[i]; 463 posPeak1SpecDiff = binMid; 464 } else if (inst->histSpecDiff[i] > maxPeak2) { 465 // Found new "second" peak 466 maxPeak2 = inst->histSpecDiff[i]; 467 weightPeak2SpecDiff = inst->histSpecDiff[i]; 468 posPeak2SpecDiff = binMid; 469 } 470 } 471 472 // for spectrum flatness feature 473 useFeatureSpecFlat = 1; 474 // merge the two peaks if they are close 475 if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) < 476 inst->featureExtractionParams.limitPeakSpacingSpecFlat) && 477 (weightPeak2SpecFlat > 478 inst->featureExtractionParams.limitPeakWeightsSpecFlat * 479 weightPeak1SpecFlat)) { 480 weightPeak1SpecFlat += weightPeak2SpecFlat; 481 posPeak1SpecFlat = (float)0.5 * (posPeak1SpecFlat + posPeak2SpecFlat); 482 } 483 // reject if weight of peaks is not large enough, or peak value too small 484 if (weightPeak1SpecFlat < 485 inst->featureExtractionParams.thresWeightSpecFlat || 486 posPeak1SpecFlat < inst->featureExtractionParams.thresPosSpecFlat) { 487 useFeatureSpecFlat = 0; 488 } 489 // if selected, get the threshold 490 if (useFeatureSpecFlat == 1) { 491 // compute the threshold 492 inst->priorModelPars[1] = 493 inst->featureExtractionParams.factor2ModelPars * posPeak1SpecFlat; 494 // check if value is within min/max range 495 if (inst->priorModelPars[1] < inst->featureExtractionParams.minSpecFlat) { 496 inst->priorModelPars[1] = inst->featureExtractionParams.minSpecFlat; 497 } 498 if (inst->priorModelPars[1] > inst->featureExtractionParams.maxSpecFlat) { 499 inst->priorModelPars[1] = inst->featureExtractionParams.maxSpecFlat; 500 } 501 } 502 // done with flatness feature 503 504 // for template feature 505 useFeatureSpecDiff = 1; 506 // merge the two peaks if they are close 507 if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) < 508 inst->featureExtractionParams.limitPeakSpacingSpecDiff) && 509 (weightPeak2SpecDiff > 510 inst->featureExtractionParams.limitPeakWeightsSpecDiff * 511 weightPeak1SpecDiff)) { 512 weightPeak1SpecDiff += weightPeak2SpecDiff; 513 posPeak1SpecDiff = (float)0.5 * (posPeak1SpecDiff + posPeak2SpecDiff); 514 } 515 // get the threshold value 516 inst->priorModelPars[3] = 517 inst->featureExtractionParams.factor1ModelPars * posPeak1SpecDiff; 518 // reject if weight of peaks is not large enough 519 if (weightPeak1SpecDiff < 520 inst->featureExtractionParams.thresWeightSpecDiff) { 521 useFeatureSpecDiff = 0; 522 } 523 // check if value is within min/max range 524 if (inst->priorModelPars[3] < inst->featureExtractionParams.minSpecDiff) { 525 inst->priorModelPars[3] = inst->featureExtractionParams.minSpecDiff; 526 } 527 if (inst->priorModelPars[3] > inst->featureExtractionParams.maxSpecDiff) { 528 inst->priorModelPars[3] = inst->featureExtractionParams.maxSpecDiff; 529 } 530 // done with spectral difference feature 531 532 // don't use template feature if fluctuation of lrt feature is very low: 533 // most likely just noise state 534 if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) { 535 useFeatureSpecDiff = 0; 536 } 537 538 // select the weights between the features 539 // inst->priorModelPars[4] is weight for lrt: always selected 540 // inst->priorModelPars[5] is weight for spectral flatness 541 // inst->priorModelPars[6] is weight for spectral difference 542 featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff); 543 inst->priorModelPars[4] = (float)1.0 / featureSum; 544 inst->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum; 545 inst->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum; 546 547 // set hists to zero for next update 548 if (inst->modelUpdatePars[0] >= 1) { 549 for (i = 0; i < HIST_PAR_EST; i++) { 550 inst->histLrt[i] = 0; 551 inst->histSpecFlat[i] = 0; 552 inst->histSpecDiff[i] = 0; 553 } 554 } 555 } // end of flag == 1 556 } 557 558 // Compute spectral flatness on input spectrum 559 // magnIn is the magnitude spectrum 560 // spectral flatness is returned in inst->featureData[0] 561 void WebRtcNs_ComputeSpectralFlatness(NSinst_t* inst, float* magnIn) { 562 int i; 563 int shiftLP = 1; // option to remove first bin(s) from spectral measures 564 float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp; 565 566 // comute spectral measures 567 // for flatness 568 avgSpectralFlatnessNum = 0.0; 569 avgSpectralFlatnessDen = inst->sumMagn; 570 for (i = 0; i < shiftLP; i++) { 571 avgSpectralFlatnessDen -= magnIn[i]; 572 } 573 // compute log of ratio of the geometric to arithmetic mean: check for log(0) 574 // case 575 for (i = shiftLP; i < inst->magnLen; i++) { 576 if (magnIn[i] > 0.0) { 577 avgSpectralFlatnessNum += (float)log(magnIn[i]); 578 } else { 579 inst->featureData[0] -= SPECT_FL_TAVG * inst->featureData[0]; 580 return; 581 } 582 } 583 // normalize 584 avgSpectralFlatnessDen = avgSpectralFlatnessDen / inst->magnLen; 585 avgSpectralFlatnessNum = avgSpectralFlatnessNum / inst->magnLen; 586 587 // ratio and inverse log: check for case of log(0) 588 spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen; 589 590 // time-avg update of spectral flatness feature 591 inst->featureData[0] += SPECT_FL_TAVG * (spectralTmp - inst->featureData[0]); 592 // done with flatness feature 593 } 594 595 // Compute the difference measure between input spectrum and a template/learned 596 // noise spectrum 597 // magnIn is the input spectrum 598 // the reference/template spectrum is inst->magnAvgPause[i] 599 // returns (normalized) spectral difference in inst->featureData[4] 600 void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) { 601 // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / 602 // var(magnAvgPause) 603 int i; 604 float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn; 605 606 avgPause = 0.0; 607 avgMagn = inst->sumMagn; 608 // compute average quantities 609 for (i = 0; i < inst->magnLen; i++) { 610 // conservative smooth noise spectrum from pause frames 611 avgPause += inst->magnAvgPause[i]; 612 } 613 avgPause = avgPause / ((float)inst->magnLen); 614 avgMagn = avgMagn / ((float)inst->magnLen); 615 616 covMagnPause = 0.0; 617 varPause = 0.0; 618 varMagn = 0.0; 619 // compute variance and covariance quantities 620 for (i = 0; i < inst->magnLen; i++) { 621 covMagnPause += (magnIn[i] - avgMagn) * (inst->magnAvgPause[i] - avgPause); 622 varPause += 623 (inst->magnAvgPause[i] - avgPause) * (inst->magnAvgPause[i] - avgPause); 624 varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn); 625 } 626 covMagnPause = covMagnPause / ((float)inst->magnLen); 627 varPause = varPause / ((float)inst->magnLen); 628 varMagn = varMagn / ((float)inst->magnLen); 629 // update of average magnitude spectrum 630 inst->featureData[6] += inst->signalEnergy; 631 632 avgDiffNormMagn = 633 varMagn - (covMagnPause * covMagnPause) / (varPause + (float)0.0001); 634 // normalize and compute time-avg update of difference feature 635 avgDiffNormMagn = 636 (float)(avgDiffNormMagn / (inst->featureData[5] + (float)0.0001)); 637 inst->featureData[4] += 638 SPECT_DIFF_TAVG * (avgDiffNormMagn - inst->featureData[4]); 639 } 640 641 // Compute speech/noise probability 642 // speech/noise probability is returned in: probSpeechFinal 643 // magn is the input magnitude spectrum 644 // noise is the noise spectrum 645 // snrLocPrior is the prior snr for each freq. 646 // snr loc_post is the post snr for each freq. 647 void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, 648 float* probSpeechFinal, 649 float* snrLocPrior, 650 float* snrLocPost) { 651 int i, sgnMap; 652 float invLrt, gainPrior, indPrior; 653 float logLrtTimeAvgKsum, besselTmp; 654 float indicator0, indicator1, indicator2; 655 float tmpFloat1, tmpFloat2; 656 float weightIndPrior0, weightIndPrior1, weightIndPrior2; 657 float threshPrior0, threshPrior1, threshPrior2; 658 float widthPrior, widthPrior0, widthPrior1, widthPrior2; 659 660 widthPrior0 = WIDTH_PR_MAP; 661 widthPrior1 = (float)2.0 * WIDTH_PR_MAP; // width for pause region: 662 // lower range, so increase width in tanh map 663 widthPrior2 = (float)2.0 * WIDTH_PR_MAP; // for spectral-difference measure 664 665 // threshold parameters for features 666 threshPrior0 = inst->priorModelPars[0]; 667 threshPrior1 = inst->priorModelPars[1]; 668 threshPrior2 = inst->priorModelPars[3]; 669 670 // sign for flatness feature 671 sgnMap = (int)(inst->priorModelPars[2]); 672 673 // weight parameters for features 674 weightIndPrior0 = inst->priorModelPars[4]; 675 weightIndPrior1 = inst->priorModelPars[5]; 676 weightIndPrior2 = inst->priorModelPars[6]; 677 678 // compute feature based on average LR factor 679 // this is the average over all frequencies of the smooth log lrt 680 logLrtTimeAvgKsum = 0.0; 681 for (i = 0; i < inst->magnLen; i++) { 682 tmpFloat1 = (float)1.0 + (float)2.0 * snrLocPrior[i]; 683 tmpFloat2 = (float)2.0 * snrLocPrior[i] / (tmpFloat1 + (float)0.0001); 684 besselTmp = (snrLocPost[i] + (float)1.0) * tmpFloat2; 685 inst->logLrtTimeAvg[i] += 686 LRT_TAVG * (besselTmp - (float)log(tmpFloat1) - inst->logLrtTimeAvg[i]); 687 logLrtTimeAvgKsum += inst->logLrtTimeAvg[i]; 688 } 689 logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (inst->magnLen); 690 inst->featureData[3] = logLrtTimeAvgKsum; 691 // done with computation of LR factor 692 693 // 694 // compute the indicator functions 695 // 696 697 // average lrt feature 698 widthPrior = widthPrior0; 699 // use larger width in tanh map for pause regions 700 if (logLrtTimeAvgKsum < threshPrior0) { 701 widthPrior = widthPrior1; 702 } 703 // compute indicator function: sigmoid map 704 indicator0 = (float)0.5 * 705 ((float)tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 706 (float)1.0); 707 708 // spectral flatness feature 709 tmpFloat1 = inst->featureData[0]; 710 widthPrior = widthPrior0; 711 // use larger width in tanh map for pause regions 712 if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) { 713 widthPrior = widthPrior1; 714 } 715 if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) { 716 widthPrior = widthPrior1; 717 } 718 // compute indicator function: sigmoid map 719 indicator1 = 720 (float)0.5 * 721 ((float)tanh((float)sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) + 722 (float)1.0); 723 724 // for template spectrum-difference 725 tmpFloat1 = inst->featureData[4]; 726 widthPrior = widthPrior0; 727 // use larger width in tanh map for pause regions 728 if (tmpFloat1 < threshPrior2) { 729 widthPrior = widthPrior2; 730 } 731 // compute indicator function: sigmoid map 732 indicator2 = 733 (float)0.5 * 734 ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) + (float)1.0); 735 736 // combine the indicator function with the feature weights 737 indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + 738 weightIndPrior2 * indicator2; 739 // done with computing indicator function 740 741 // compute the prior probability 742 inst->priorSpeechProb += PRIOR_UPDATE * (indPrior - inst->priorSpeechProb); 743 // make sure probabilities are within range: keep floor to 0.01 744 if (inst->priorSpeechProb > 1.0) { 745 inst->priorSpeechProb = (float)1.0; 746 } 747 if (inst->priorSpeechProb < 0.01) { 748 inst->priorSpeechProb = (float)0.01; 749 } 750 751 // final speech probability: combine prior model with LR factor: 752 gainPrior = ((float)1.0 - inst->priorSpeechProb) / 753 (inst->priorSpeechProb + (float)0.0001); 754 for (i = 0; i < inst->magnLen; i++) { 755 invLrt = (float)exp(-inst->logLrtTimeAvg[i]); 756 invLrt = (float)gainPrior * invLrt; 757 probSpeechFinal[i] = (float)1.0 / ((float)1.0 + invLrt); 758 } 759 } 760 761 int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) { 762 int i; 763 const int kStartBand = 5; // Skip first frequency bins during estimation. 764 int updateParsFlag; 765 float energy; 766 float signalEnergy, sumMagn; 767 float tmpFloat1, tmpFloat2, tmpFloat3, probSpeech, probNonSpeech; 768 float gammaNoiseTmp, gammaNoiseOld; 769 float noiseUpdateTmp, fTmp; 770 float winData[ANAL_BLOCKL_MAX]; 771 float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL]; 772 float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL]; 773 float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; 774 // Variables during startup 775 float sum_log_i = 0.0; 776 float sum_log_i_square = 0.0; 777 float sum_log_magn = 0.0; 778 float sum_log_i_log_magn = 0.0; 779 float parametric_exp = 0.0; 780 float parametric_num = 0.0; 781 782 // Check that initiation has been done 783 if (inst->initFlag != 1) { 784 return (-1); 785 } 786 // 787 updateParsFlag = inst->modelUpdatePars[0]; 788 // 789 790 // update analysis buffer for L band 791 memcpy(inst->analyzeBuf, 792 inst->analyzeBuf + inst->blockLen10ms, 793 sizeof(float) * (inst->anaLen - inst->blockLen10ms)); 794 memcpy(inst->analyzeBuf + inst->anaLen - inst->blockLen10ms, 795 speechFrame, 796 sizeof(float) * inst->blockLen10ms); 797 798 // check if processing needed 799 if (inst->outLen == 0) { 800 // windowing 801 energy = 0.0; 802 for (i = 0; i < inst->anaLen; i++) { 803 winData[i] = inst->window[i] * inst->analyzeBuf[i]; 804 energy += winData[i] * winData[i]; 805 } 806 if (energy == 0.0) { 807 // we want to avoid updating statistics in this case: 808 // Updating feature statistics when we have zeros only will cause 809 // thresholds to move towards zero signal situations. This in turn has the 810 // effect that once the signal is "turned on" (non-zero values) everything 811 // will be treated as speech and there is no noise suppression effect. 812 // Depending on the duration of the inactive signal it takes a 813 // considerable amount of time for the system to learn what is noise and 814 // what is speech. 815 return 0; 816 } 817 818 // 819 inst->blockInd++; // Update the block index only when we process a block. 820 // FFT 821 WebRtc_rdft(inst->anaLen, 1, winData, inst->ip, inst->wfft); 822 823 imag[0] = 0; 824 real[0] = winData[0]; 825 magn[0] = (float)(fabs(real[0]) + 1.0f); 826 imag[inst->magnLen - 1] = 0; 827 real[inst->magnLen - 1] = winData[1]; 828 magn[inst->magnLen - 1] = (float)(fabs(real[inst->magnLen - 1]) + 1.0f); 829 signalEnergy = (float)(real[0] * real[0]) + 830 (float)(real[inst->magnLen - 1] * real[inst->magnLen - 1]); 831 sumMagn = magn[0] + magn[inst->magnLen - 1]; 832 if (inst->blockInd < END_STARTUP_SHORT) { 833 tmpFloat2 = log((float)(inst->magnLen - 1)); 834 sum_log_i = tmpFloat2; 835 sum_log_i_square = tmpFloat2 * tmpFloat2; 836 tmpFloat1 = log(magn[inst->magnLen - 1]); 837 sum_log_magn = tmpFloat1; 838 sum_log_i_log_magn = tmpFloat2 * tmpFloat1; 839 } 840 for (i = 1; i < inst->magnLen - 1; i++) { 841 real[i] = winData[2 * i]; 842 imag[i] = winData[2 * i + 1]; 843 // magnitude spectrum 844 fTmp = real[i] * real[i]; 845 fTmp += imag[i] * imag[i]; 846 signalEnergy += fTmp; 847 magn[i] = ((float)sqrt(fTmp)) + 1.0f; 848 sumMagn += magn[i]; 849 if (inst->blockInd < END_STARTUP_SHORT) { 850 if (i >= kStartBand) { 851 tmpFloat2 = log((float)i); 852 sum_log_i += tmpFloat2; 853 sum_log_i_square += tmpFloat2 * tmpFloat2; 854 tmpFloat1 = log(magn[i]); 855 sum_log_magn += tmpFloat1; 856 sum_log_i_log_magn += tmpFloat2 * tmpFloat1; 857 } 858 } 859 } 860 signalEnergy = signalEnergy / ((float)inst->magnLen); 861 inst->signalEnergy = signalEnergy; 862 inst->sumMagn = sumMagn; 863 864 // compute spectral flatness on input spectrum 865 WebRtcNs_ComputeSpectralFlatness(inst, magn); 866 // quantile noise estimate 867 WebRtcNs_NoiseEstimation(inst, magn, noise); 868 // compute simplified noise model during startup 869 if (inst->blockInd < END_STARTUP_SHORT) { 870 // Estimate White noise 871 inst->whiteNoiseLevel += 872 sumMagn / ((float)inst->magnLen) * inst->overdrive; 873 // Estimate Pink noise parameters 874 tmpFloat1 = sum_log_i_square * ((float)(inst->magnLen - kStartBand)); 875 tmpFloat1 -= (sum_log_i * sum_log_i); 876 tmpFloat2 = 877 (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn); 878 tmpFloat3 = tmpFloat2 / tmpFloat1; 879 // Constrain the estimated spectrum to be positive 880 if (tmpFloat3 < 0.0f) { 881 tmpFloat3 = 0.0f; 882 } 883 inst->pinkNoiseNumerator += tmpFloat3; 884 tmpFloat2 = (sum_log_i * sum_log_magn); 885 tmpFloat2 -= ((float)(inst->magnLen - kStartBand)) * sum_log_i_log_magn; 886 tmpFloat3 = tmpFloat2 / tmpFloat1; 887 // Constrain the pink noise power to be in the interval [0, 1]; 888 if (tmpFloat3 < 0.0f) { 889 tmpFloat3 = 0.0f; 890 } 891 if (tmpFloat3 > 1.0f) { 892 tmpFloat3 = 1.0f; 893 } 894 inst->pinkNoiseExp += tmpFloat3; 895 896 // Calculate frequency independent parts of parametric noise estimate. 897 if (inst->pinkNoiseExp > 0.0f) { 898 // Use pink noise estimate 899 parametric_num = 900 exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1)); 901 parametric_num *= (float)(inst->blockInd + 1); 902 parametric_exp = inst->pinkNoiseExp / (float)(inst->blockInd + 1); 903 } 904 for (i = 0; i < inst->magnLen; i++) { 905 // Estimate the background noise using the white and pink noise 906 // parameters 907 if (inst->pinkNoiseExp == 0.0f) { 908 // Use white noise estimate 909 inst->parametricNoise[i] = inst->whiteNoiseLevel; 910 } else { 911 // Use pink noise estimate 912 float use_band = (float)(i < kStartBand ? kStartBand : i); 913 inst->parametricNoise[i] = 914 parametric_num / pow(use_band, parametric_exp); 915 } 916 // Weight quantile noise with modeled noise 917 noise[i] *= (inst->blockInd); 918 tmpFloat2 = 919 inst->parametricNoise[i] * (END_STARTUP_SHORT - inst->blockInd); 920 noise[i] += (tmpFloat2 / (float)(inst->blockInd + 1)); 921 noise[i] /= END_STARTUP_SHORT; 922 } 923 } 924 // compute average signal during END_STARTUP_LONG time: 925 // used to normalize spectral difference measure 926 if (inst->blockInd < END_STARTUP_LONG) { 927 inst->featureData[5] *= inst->blockInd; 928 inst->featureData[5] += signalEnergy; 929 inst->featureData[5] /= (inst->blockInd + 1); 930 } 931 932 // start processing at frames == converged+1 933 // STEP 1: compute prior and post snr based on quantile noise est 934 // compute DD estimate of prior SNR: needed for new method 935 for (i = 0; i < inst->magnLen; i++) { 936 // post snr 937 snrLocPost[i] = (float)0.0; 938 if (magn[i] > noise[i]) { 939 snrLocPost[i] = magn[i] / (noise[i] + (float)0.0001) - (float)1.0; 940 } 941 // previous post snr 942 // previous estimate: based on previous frame with gain filter 943 inst->previousEstimateStsa[i] = inst->magnPrev[i] / 944 (inst->noisePrev[i] + (float)0.0001) * 945 (inst->smooth[i]); 946 // DD estimate is sum of two terms: current estimate and previous estimate 947 // directed decision update of snrPrior 948 snrLocPrior[i] = DD_PR_SNR * inst->previousEstimateStsa[i] + 949 ((float)1.0 - DD_PR_SNR) * snrLocPost[i]; 950 // post and prior snr needed for step 2 951 } // end of loop over freqs 952 // done with step 1: dd computation of prior and post snr 953 954 // STEP 2: compute speech/noise likelihood 955 // compute difference of input spectrum with learned/estimated noise 956 // spectrum 957 WebRtcNs_ComputeSpectralDifference(inst, magn); 958 // compute histograms for parameter decisions (thresholds and weights for 959 // features) 960 // parameters are extracted once every window time 961 // (=inst->modelUpdatePars[1]) 962 if (updateParsFlag >= 1) { 963 // counter update 964 inst->modelUpdatePars[3]--; 965 // update histogram 966 if (inst->modelUpdatePars[3] > 0) { 967 WebRtcNs_FeatureParameterExtraction(inst, 0); 968 } 969 // compute model parameters 970 if (inst->modelUpdatePars[3] == 0) { 971 WebRtcNs_FeatureParameterExtraction(inst, 1); 972 inst->modelUpdatePars[3] = inst->modelUpdatePars[1]; 973 // if wish to update only once, set flag to zero 974 if (updateParsFlag == 1) { 975 inst->modelUpdatePars[0] = 0; 976 } else { 977 // update every window: 978 // get normalization for spectral difference for next window estimate 979 inst->featureData[6] = 980 inst->featureData[6] / ((float)inst->modelUpdatePars[1]); 981 inst->featureData[5] = 982 (float)0.5 * (inst->featureData[6] + inst->featureData[5]); 983 inst->featureData[6] = (float)0.0; 984 } 985 } 986 } 987 // compute speech/noise probability 988 WebRtcNs_SpeechNoiseProb(inst, inst->speechProb, snrLocPrior, snrLocPost); 989 // time-avg parameter for noise update 990 gammaNoiseTmp = NOISE_UPDATE; 991 for (i = 0; i < inst->magnLen; i++) { 992 probSpeech = inst->speechProb[i]; 993 probNonSpeech = (float)1.0 - probSpeech; 994 // temporary noise update: 995 // use it for speech frames if update value is less than previous 996 noiseUpdateTmp = 997 gammaNoiseTmp * inst->noisePrev[i] + 998 ((float)1.0 - gammaNoiseTmp) * 999 (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); 1000 // 1001 // time-constant based on speech/noise state 1002 gammaNoiseOld = gammaNoiseTmp; 1003 gammaNoiseTmp = NOISE_UPDATE; 1004 // increase gamma (i.e., less noise update) for frame likely to be speech 1005 if (probSpeech > PROB_RANGE) { 1006 gammaNoiseTmp = SPEECH_UPDATE; 1007 } 1008 // conservative noise update 1009 if (probSpeech < PROB_RANGE) { 1010 inst->magnAvgPause[i] += 1011 GAMMA_PAUSE * (magn[i] - inst->magnAvgPause[i]); 1012 } 1013 // noise update 1014 if (gammaNoiseTmp == gammaNoiseOld) { 1015 noise[i] = noiseUpdateTmp; 1016 } else { 1017 noise[i] = 1018 gammaNoiseTmp * inst->noisePrev[i] + 1019 ((float)1.0 - gammaNoiseTmp) * 1020 (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); 1021 // allow for noise update downwards: 1022 // if noise update decreases the noise, it is safe, so allow it to 1023 // happen 1024 if (noiseUpdateTmp < noise[i]) { 1025 noise[i] = noiseUpdateTmp; 1026 } 1027 } 1028 } // end of freq loop 1029 // done with step 2: noise update 1030 1031 // keep track of noise spectrum for next frame 1032 for (i = 0; i < inst->magnLen; i++) { 1033 inst->noisePrev[i] = noise[i]; 1034 } 1035 } // end of if inst->outLen == 0 1036 1037 return 0; 1038 } 1039 1040 int WebRtcNs_ProcessCore(NSinst_t* inst, 1041 float* speechFrame, 1042 float* speechFrameHB, 1043 float* outFrame, 1044 float* outFrameHB) { 1045 // main routine for noise reduction 1046 int flagHB = 0; 1047 int i; 1048 1049 float energy1, energy2, gain, factor, factor1, factor2; 1050 float snrPrior, currentEstimateStsa; 1051 float tmpFloat1, tmpFloat2; 1052 float fTmp; 1053 float fout[BLOCKL_MAX]; 1054 float winData[ANAL_BLOCKL_MAX]; 1055 float magn[HALF_ANAL_BLOCKL]; 1056 float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL]; 1057 float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; 1058 1059 // SWB variables 1060 int deltaBweHB = 1; 1061 int deltaGainHB = 1; 1062 float decayBweHB = 1.0; 1063 float gainMapParHB = 1.0; 1064 float gainTimeDomainHB = 1.0; 1065 float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB; 1066 1067 // Check that initiation has been done 1068 if (inst->initFlag != 1) { 1069 return (-1); 1070 } 1071 // Check for valid pointers based on sampling rate 1072 if (inst->fs == 32000) { 1073 if (speechFrameHB == NULL) { 1074 return -1; 1075 } 1076 flagHB = 1; 1077 // range for averaging low band quantities for H band gain 1078 deltaBweHB = (int)inst->magnLen / 4; 1079 deltaGainHB = deltaBweHB; 1080 } 1081 1082 // update analysis buffer for L band 1083 memcpy(inst->dataBuf, 1084 inst->dataBuf + inst->blockLen10ms, 1085 sizeof(float) * (inst->anaLen - inst->blockLen10ms)); 1086 memcpy(inst->dataBuf + inst->anaLen - inst->blockLen10ms, 1087 speechFrame, 1088 sizeof(float) * inst->blockLen10ms); 1089 1090 if (flagHB == 1) { 1091 // update analysis buffer for H band 1092 memcpy(inst->dataBufHB, 1093 inst->dataBufHB + inst->blockLen10ms, 1094 sizeof(float) * (inst->anaLen - inst->blockLen10ms)); 1095 memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms, 1096 speechFrameHB, 1097 sizeof(float) * inst->blockLen10ms); 1098 } 1099 1100 // check if processing needed 1101 if (inst->outLen == 0) { 1102 // windowing 1103 energy1 = 0.0; 1104 for (i = 0; i < inst->anaLen; i++) { 1105 winData[i] = inst->window[i] * inst->dataBuf[i]; 1106 energy1 += winData[i] * winData[i]; 1107 } 1108 if (energy1 == 0.0) { 1109 // synthesize the special case of zero input 1110 // read out fully processed segment 1111 for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) { 1112 fout[i - inst->windShift] = inst->syntBuf[i]; 1113 } 1114 // update synthesis buffer 1115 memcpy(inst->syntBuf, 1116 inst->syntBuf + inst->blockLen, 1117 sizeof(float) * (inst->anaLen - inst->blockLen)); 1118 memset(inst->syntBuf + inst->anaLen - inst->blockLen, 1119 0, 1120 sizeof(float) * inst->blockLen); 1121 1122 // out buffer 1123 inst->outLen = inst->blockLen - inst->blockLen10ms; 1124 if (inst->blockLen > inst->blockLen10ms) { 1125 for (i = 0; i < inst->outLen; i++) { 1126 inst->outBuf[i] = fout[i + inst->blockLen10ms]; 1127 } 1128 } 1129 for (i = 0; i < inst->blockLen10ms; ++i) 1130 outFrame[i] = WEBRTC_SPL_SAT( 1131 WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); 1132 1133 // for time-domain gain of HB 1134 if (flagHB == 1) 1135 for (i = 0; i < inst->blockLen10ms; ++i) 1136 outFrameHB[i] = WEBRTC_SPL_SAT( 1137 WEBRTC_SPL_WORD16_MAX, inst->dataBufHB[i], WEBRTC_SPL_WORD16_MIN); 1138 1139 return 0; 1140 } 1141 1142 // FFT 1143 WebRtc_rdft(inst->anaLen, 1, winData, inst->ip, inst->wfft); 1144 1145 imag[0] = 0; 1146 real[0] = winData[0]; 1147 magn[0] = (float)(fabs(real[0]) + 1.0f); 1148 imag[inst->magnLen - 1] = 0; 1149 real[inst->magnLen - 1] = winData[1]; 1150 magn[inst->magnLen - 1] = (float)(fabs(real[inst->magnLen - 1]) + 1.0f); 1151 if (inst->blockInd < END_STARTUP_SHORT) { 1152 inst->initMagnEst[0] += magn[0]; 1153 inst->initMagnEst[inst->magnLen - 1] += magn[inst->magnLen - 1]; 1154 } 1155 for (i = 1; i < inst->magnLen - 1; i++) { 1156 real[i] = winData[2 * i]; 1157 imag[i] = winData[2 * i + 1]; 1158 // magnitude spectrum 1159 fTmp = real[i] * real[i]; 1160 fTmp += imag[i] * imag[i]; 1161 magn[i] = ((float)sqrt(fTmp)) + 1.0f; 1162 if (inst->blockInd < END_STARTUP_SHORT) { 1163 inst->initMagnEst[i] += magn[i]; 1164 } 1165 } 1166 1167 // Compute dd update of prior snr and post snr based on new noise estimate 1168 for (i = 0; i < inst->magnLen; i++) { 1169 // post and prior snr 1170 currentEstimateStsa = (float)0.0; 1171 if (magn[i] > inst->noisePrev[i]) { 1172 currentEstimateStsa = 1173 magn[i] / (inst->noisePrev[i] + (float)0.0001) - (float)1.0; 1174 } 1175 // DD estimate is sume of two terms: current estimate and previous 1176 // estimate 1177 // directed decision update of snrPrior 1178 snrPrior = DD_PR_SNR * inst->previousEstimateStsa[i] + 1179 ((float)1.0 - DD_PR_SNR) * currentEstimateStsa; 1180 // gain filter 1181 tmpFloat1 = inst->overdrive + snrPrior; 1182 tmpFloat2 = (float)snrPrior / tmpFloat1; 1183 theFilter[i] = (float)tmpFloat2; 1184 } // end of loop over freqs 1185 1186 for (i = 0; i < inst->magnLen; i++) { 1187 // flooring bottom 1188 if (theFilter[i] < inst->denoiseBound) { 1189 theFilter[i] = inst->denoiseBound; 1190 } 1191 // flooring top 1192 if (theFilter[i] > (float)1.0) { 1193 theFilter[i] = 1.0; 1194 } 1195 if (inst->blockInd < END_STARTUP_SHORT) { 1196 theFilterTmp[i] = 1197 (inst->initMagnEst[i] - inst->overdrive * inst->parametricNoise[i]); 1198 theFilterTmp[i] /= (inst->initMagnEst[i] + (float)0.0001); 1199 // flooring bottom 1200 if (theFilterTmp[i] < inst->denoiseBound) { 1201 theFilterTmp[i] = inst->denoiseBound; 1202 } 1203 // flooring top 1204 if (theFilterTmp[i] > (float)1.0) { 1205 theFilterTmp[i] = 1.0; 1206 } 1207 // Weight the two suppression filters 1208 theFilter[i] *= (inst->blockInd); 1209 theFilterTmp[i] *= (END_STARTUP_SHORT - inst->blockInd); 1210 theFilter[i] += theFilterTmp[i]; 1211 theFilter[i] /= (END_STARTUP_SHORT); 1212 } 1213 // smoothing 1214 inst->smooth[i] = theFilter[i]; 1215 real[i] *= inst->smooth[i]; 1216 imag[i] *= inst->smooth[i]; 1217 } 1218 // keep track of magn spectrum for next frame 1219 for (i = 0; i < inst->magnLen; i++) { 1220 inst->magnPrev[i] = magn[i]; 1221 } 1222 // back to time domain 1223 winData[0] = real[0]; 1224 winData[1] = real[inst->magnLen - 1]; 1225 for (i = 1; i < inst->magnLen - 1; i++) { 1226 winData[2 * i] = real[i]; 1227 winData[2 * i + 1] = imag[i]; 1228 } 1229 WebRtc_rdft(inst->anaLen, -1, winData, inst->ip, inst->wfft); 1230 1231 for (i = 0; i < inst->anaLen; i++) { 1232 real[i] = 2.0f * winData[i] / inst->anaLen; // fft scaling 1233 } 1234 1235 // scale factor: only do it after END_STARTUP_LONG time 1236 factor = (float)1.0; 1237 if (inst->gainmap == 1 && inst->blockInd > END_STARTUP_LONG) { 1238 factor1 = (float)1.0; 1239 factor2 = (float)1.0; 1240 1241 energy2 = 0.0; 1242 for (i = 0; i < inst->anaLen; i++) { 1243 energy2 += (float)real[i] * (float)real[i]; 1244 } 1245 gain = (float)sqrt(energy2 / (energy1 + (float)1.0)); 1246 1247 // scaling for new version 1248 if (gain > B_LIM) { 1249 factor1 = (float)1.0 + (float)1.3 * (gain - B_LIM); 1250 if (gain * factor1 > (float)1.0) { 1251 factor1 = (float)1.0 / gain; 1252 } 1253 } 1254 if (gain < B_LIM) { 1255 // don't reduce scale too much for pause regions: 1256 // attenuation here should be controlled by flooring 1257 if (gain <= inst->denoiseBound) { 1258 gain = inst->denoiseBound; 1259 } 1260 factor2 = (float)1.0 - (float)0.3 * (B_LIM - gain); 1261 } 1262 // combine both scales with speech/noise prob: 1263 // note prior (priorSpeechProb) is not frequency dependent 1264 factor = inst->priorSpeechProb * factor1 + 1265 ((float)1.0 - inst->priorSpeechProb) * factor2; 1266 } // out of inst->gainmap==1 1267 1268 // synthesis 1269 for (i = 0; i < inst->anaLen; i++) { 1270 inst->syntBuf[i] += factor * inst->window[i] * (float)real[i]; 1271 } 1272 // read out fully processed segment 1273 for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) { 1274 fout[i - inst->windShift] = inst->syntBuf[i]; 1275 } 1276 // update synthesis buffer 1277 memcpy(inst->syntBuf, 1278 inst->syntBuf + inst->blockLen, 1279 sizeof(float) * (inst->anaLen - inst->blockLen)); 1280 memset(inst->syntBuf + inst->anaLen - inst->blockLen, 1281 0, 1282 sizeof(float) * inst->blockLen); 1283 1284 // out buffer 1285 inst->outLen = inst->blockLen - inst->blockLen10ms; 1286 if (inst->blockLen > inst->blockLen10ms) { 1287 for (i = 0; i < inst->outLen; i++) { 1288 inst->outBuf[i] = fout[i + inst->blockLen10ms]; 1289 } 1290 } 1291 } // end of if out.len==0 1292 else { 1293 for (i = 0; i < inst->blockLen10ms; i++) { 1294 fout[i] = inst->outBuf[i]; 1295 } 1296 memcpy(inst->outBuf, 1297 inst->outBuf + inst->blockLen10ms, 1298 sizeof(float) * (inst->outLen - inst->blockLen10ms)); 1299 memset(inst->outBuf + inst->outLen - inst->blockLen10ms, 1300 0, 1301 sizeof(float) * inst->blockLen10ms); 1302 inst->outLen -= inst->blockLen10ms; 1303 } 1304 1305 for (i = 0; i < inst->blockLen10ms; ++i) 1306 outFrame[i] = 1307 WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); 1308 1309 // for time-domain gain of HB 1310 if (flagHB == 1) { 1311 // average speech prob from low band 1312 // avg over second half (i.e., 4->8kHz) of freq. spectrum 1313 avgProbSpeechHB = 0.0; 1314 for (i = inst->magnLen - deltaBweHB - 1; i < inst->magnLen - 1; i++) { 1315 avgProbSpeechHB += inst->speechProb[i]; 1316 } 1317 avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB); 1318 // average filter gain from low band 1319 // average over second half (i.e., 4->8kHz) of freq. spectrum 1320 avgFilterGainHB = 0.0; 1321 for (i = inst->magnLen - deltaGainHB - 1; i < inst->magnLen - 1; i++) { 1322 avgFilterGainHB += inst->smooth[i]; 1323 } 1324 avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB)); 1325 avgProbSpeechHBTmp = (float)2.0 * avgProbSpeechHB - (float)1.0; 1326 // gain based on speech prob: 1327 gainModHB = (float)0.5 * 1328 ((float)1.0 + (float)tanh(gainMapParHB * avgProbSpeechHBTmp)); 1329 // combine gain with low band gain 1330 gainTimeDomainHB = (float)0.5 * gainModHB + (float)0.5 * avgFilterGainHB; 1331 if (avgProbSpeechHB >= (float)0.5) { 1332 gainTimeDomainHB = 1333 (float)0.25 * gainModHB + (float)0.75 * avgFilterGainHB; 1334 } 1335 gainTimeDomainHB = gainTimeDomainHB * decayBweHB; 1336 // make sure gain is within flooring range 1337 // flooring bottom 1338 if (gainTimeDomainHB < inst->denoiseBound) { 1339 gainTimeDomainHB = inst->denoiseBound; 1340 } 1341 // flooring top 1342 if (gainTimeDomainHB > (float)1.0) { 1343 gainTimeDomainHB = 1.0; 1344 } 1345 // apply gain 1346 for (i = 0; i < inst->blockLen10ms; i++) { 1347 float o = gainTimeDomainHB * inst->dataBufHB[i]; 1348 outFrameHB[i] = 1349 WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, o, WEBRTC_SPL_WORD16_MIN); 1350 } 1351 } // end of H band gain computation 1352 // 1353 1354 return 0; 1355 } 1356