1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <string.h> 12 #include <math.h> 13 //#include <stdio.h> 14 #include <stdlib.h> 15 #include "noise_suppression.h" 16 #include "ns_core.h" 17 #include "windows_private.h" 18 #include "fft4g.h" 19 #include "signal_processing_library.h" 20 21 // Set Feature Extraction Parameters 22 void WebRtcNs_set_feature_extraction_parameters(NSinst_t *inst) 23 { 24 //bin size of histogram 25 inst->featureExtractionParams.binSizeLrt = (float)0.1; 26 inst->featureExtractionParams.binSizeSpecFlat = (float)0.05; 27 inst->featureExtractionParams.binSizeSpecDiff = (float)0.1; 28 29 //range of histogram over which lrt threshold is computed 30 inst->featureExtractionParams.rangeAvgHistLrt = (float)1.0; 31 32 //scale parameters: multiply dominant peaks of the histograms by scale factor to obtain 33 // thresholds for prior model 34 inst->featureExtractionParams.factor1ModelPars = (float)1.20; //for lrt and spectral diff 35 inst->featureExtractionParams.factor2ModelPars = (float)0.9; //for spectral_flatness: 36 // used when noise is flatter than speech 37 38 //peak limit for spectral flatness (varies between 0 and 1) 39 inst->featureExtractionParams.thresPosSpecFlat = (float)0.6; 40 41 //limit on spacing of two highest peaks in histogram: spacing determined by bin size 42 inst->featureExtractionParams.limitPeakSpacingSpecFlat = 2 43 * inst->featureExtractionParams.binSizeSpecFlat; 44 inst->featureExtractionParams.limitPeakSpacingSpecDiff = 2 45 * inst->featureExtractionParams.binSizeSpecDiff; 46 47 //limit on relevance of second peak: 48 inst->featureExtractionParams.limitPeakWeightsSpecFlat = (float)0.5; 49 inst->featureExtractionParams.limitPeakWeightsSpecDiff = (float)0.5; 50 51 // fluctuation limit of lrt feature 52 inst->featureExtractionParams.thresFluctLrt = (float)0.05; 53 54 //limit on the max and min values for the feature thresholds 55 inst->featureExtractionParams.maxLrt = (float)1.0; 56 inst->featureExtractionParams.minLrt = (float)0.20; 57 58 inst->featureExtractionParams.maxSpecFlat = (float)0.95; 59 inst->featureExtractionParams.minSpecFlat = (float)0.10; 60 61 inst->featureExtractionParams.maxSpecDiff = (float)1.0; 62 inst->featureExtractionParams.minSpecDiff = (float)0.16; 63 64 //criteria of weight of histogram peak to accept/reject feature 65 inst->featureExtractionParams.thresWeightSpecFlat = (int)(0.3 66 * (inst->modelUpdatePars[1])); //for spectral flatness 67 inst->featureExtractionParams.thresWeightSpecDiff = (int)(0.3 68 * (inst->modelUpdatePars[1])); //for spectral difference 69 } 70 71 // Initialize state 72 int WebRtcNs_InitCore(NSinst_t *inst, WebRtc_UWord32 fs) 73 { 74 int i; 75 //We only support 10ms frames 76 77 //check for valid pointer 78 if (inst == NULL) 79 { 80 return -1; 81 } 82 83 // Initialization of struct 84 if (fs == 8000 || fs == 16000 || fs == 32000) 85 { 86 inst->fs = fs; 87 } 88 else 89 { 90 return -1; 91 } 92 inst->windShift = 0; 93 if (fs == 8000) 94 { 95 // We only support 10ms frames 96 inst->blockLen = 80; 97 inst->blockLen10ms = 80; 98 inst->anaLen = 128; 99 inst->window = kBlocks80w128; 100 inst->outLen = 0; 101 } 102 else if (fs == 16000) 103 { 104 // We only support 10ms frames 105 inst->blockLen = 160; 106 inst->blockLen10ms = 160; 107 inst->anaLen = 256; 108 inst->window = kBlocks160w256; 109 inst->outLen = 0; 110 } 111 else if (fs==32000) 112 { 113 // We only support 10ms frames 114 inst->blockLen = 160; 115 inst->blockLen10ms = 160; 116 inst->anaLen = 256; 117 inst->window = kBlocks160w256; 118 inst->outLen = 0; 119 } 120 inst->magnLen = inst->anaLen / 2 + 1; // Number of frequency bins 121 122 // Initialize fft work arrays. 123 inst->ip[0] = 0; // Setting this triggers initialization. 124 memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 125 rdft(inst->anaLen, 1, inst->dataBuf, inst->ip, inst->wfft); 126 127 memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 128 memset(inst->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); 129 130 //for HB processing 131 memset(inst->dataBufHB, 0, sizeof(float) * ANAL_BLOCKL_MAX); 132 133 //for quantile noise estimation 134 memset(inst->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL); 135 for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) 136 { 137 inst->lquantile[i] = (float)8.0; 138 inst->density[i] = (float)0.3; 139 } 140 141 for (i = 0; i < SIMULT; i++) 142 { 143 inst->counter[i] = (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT); 144 } 145 146 inst->updates = 0; 147 148 // Wiener filter initialization 149 for (i = 0; i < HALF_ANAL_BLOCKL; i++) 150 { 151 inst->smooth[i] = (float)1.0; 152 } 153 154 // Set the aggressiveness: default 155 inst->aggrMode = 0; 156 157 //initialize variables for new method 158 inst->priorSpeechProb = (float)0.5; //prior prob for speech/noise 159 for (i = 0; i < HALF_ANAL_BLOCKL; i++) 160 { 161 inst->magnPrev[i] = (float)0.0; //previous mag spectrum 162 inst->noisePrev[i] = (float)0.0; //previous noise-spectrum 163 inst->logLrtTimeAvg[i] = LRT_FEATURE_THR; //smooth LR ratio (same as threshold) 164 inst->magnAvgPause[i] = (float)0.0; //conservative noise spectrum estimate 165 inst->speechProbHB[i] = (float)0.0; //for estimation of HB in second pass 166 inst->initMagnEst[i] = (float)0.0; //initial average mag spectrum 167 } 168 169 //feature quantities 170 inst->featureData[0] = SF_FEATURE_THR; //spectral flatness (start on threshold) 171 inst->featureData[1] = (float)0.0; //spectral entropy: not used in this version 172 inst->featureData[2] = (float)0.0; //spectral variance: not used in this version 173 inst->featureData[3] = LRT_FEATURE_THR; //average lrt factor (start on threshold) 174 inst->featureData[4] = SF_FEATURE_THR; //spectral template diff (start on threshold) 175 inst->featureData[5] = (float)0.0; //normalization for spectral-diff 176 inst->featureData[6] = (float)0.0; //window time-average of input magnitude spectrum 177 178 //histogram quantities: used to estimate/update thresholds for features 179 for (i = 0; i < HIST_PAR_EST; i++) 180 { 181 inst->histLrt[i] = 0; 182 inst->histSpecFlat[i] = 0; 183 inst->histSpecDiff[i] = 0; 184 } 185 186 inst->blockInd = -1; //frame counter 187 inst->priorModelPars[0] = LRT_FEATURE_THR; //default threshold for lrt feature 188 inst->priorModelPars[1] = (float)0.5; //threshold for spectral flatness: 189 // determined on-line 190 inst->priorModelPars[2] = (float)1.0; //sgn_map par for spectral measure: 191 // 1 for flatness measure 192 inst->priorModelPars[3] = (float)0.5; //threshold for template-difference feature: 193 // determined on-line 194 inst->priorModelPars[4] = (float)1.0; //default weighting parameter for lrt feature 195 inst->priorModelPars[5] = (float)0.0; //default weighting parameter for 196 // spectral flatness feature 197 inst->priorModelPars[6] = (float)0.0; //default weighting parameter for 198 // spectral difference feature 199 200 inst->modelUpdatePars[0] = 2; //update flag for parameters: 201 // 0 no update, 1=update once, 2=update every window 202 inst->modelUpdatePars[1] = 500; //window for update 203 inst->modelUpdatePars[2] = 0; //counter for update of conservative noise spectrum 204 //counter if the feature thresholds are updated during the sequence 205 inst->modelUpdatePars[3] = inst->modelUpdatePars[1]; 206 207 inst->signalEnergy = 0.0; 208 inst->sumMagn = 0.0; 209 inst->whiteNoiseLevel = 0.0; 210 inst->pinkNoiseNumerator = 0.0; 211 inst->pinkNoiseExp = 0.0; 212 213 WebRtcNs_set_feature_extraction_parameters(inst); // Set feature configuration 214 215 //default mode 216 WebRtcNs_set_policy_core(inst, 0); 217 218 219 memset(inst->outBuf, 0, sizeof(float) * 3 * BLOCKL_MAX); 220 221 inst->initFlag = 1; 222 return 0; 223 } 224 225 int WebRtcNs_set_policy_core(NSinst_t *inst, int mode) 226 { 227 // allow for modes:0,1,2,3 228 if (mode < 0 || mode > 3) 229 { 230 return (-1); 231 } 232 233 inst->aggrMode = mode; 234 if (mode == 0) 235 { 236 inst->overdrive = (float)1.0; 237 inst->denoiseBound = (float)0.5; 238 inst->gainmap = 0; 239 } 240 else if (mode == 1) 241 { 242 //inst->overdrive = (float)1.25; 243 inst->overdrive = (float)1.0; 244 inst->denoiseBound = (float)0.25; 245 inst->gainmap = 1; 246 } 247 else if (mode == 2) 248 { 249 //inst->overdrive = (float)1.25; 250 inst->overdrive = (float)1.1; 251 inst->denoiseBound = (float)0.125; 252 inst->gainmap = 1; 253 } 254 else if (mode == 3) 255 { 256 //inst->overdrive = (float)1.30; 257 inst->overdrive = (float)1.25; 258 inst->denoiseBound = (float)0.09; 259 inst->gainmap = 1; 260 } 261 return 0; 262 } 263 264 // Estimate noise 265 void WebRtcNs_NoiseEstimation(NSinst_t *inst, float *magn, float *noise) 266 { 267 int i, s, offset; 268 float lmagn[HALF_ANAL_BLOCKL], delta; 269 270 if (inst->updates < END_STARTUP_LONG) 271 { 272 inst->updates++; 273 } 274 275 for (i = 0; i < inst->magnLen; i++) 276 { 277 lmagn[i] = (float)log(magn[i]); 278 } 279 280 // loop over simultaneous estimates 281 for (s = 0; s < SIMULT; s++) 282 { 283 offset = s * inst->magnLen; 284 285 // newquantest(...) 286 for (i = 0; i < inst->magnLen; i++) 287 { 288 // compute delta 289 if (inst->density[offset + i] > 1.0) 290 { 291 delta = FACTOR * (float)1.0 / inst->density[offset + i]; 292 } 293 else 294 { 295 delta = FACTOR; 296 } 297 298 // update log quantile estimate 299 if (lmagn[i] > inst->lquantile[offset + i]) 300 { 301 inst->lquantile[offset + i] += QUANTILE * delta 302 / (float)(inst->counter[s] + 1); 303 } 304 else 305 { 306 inst->lquantile[offset + i] -= ((float)1.0 - QUANTILE) * delta 307 / (float)(inst->counter[s] + 1); 308 } 309 310 // update density estimate 311 if (fabs(lmagn[i] - inst->lquantile[offset + i]) < WIDTH) 312 { 313 inst->density[offset + i] = ((float)inst->counter[s] * inst->density[offset 314 + i] + (float)1.0 / ((float)2.0 * WIDTH)) / (float)(inst->counter[s] 315 + 1); 316 } 317 } // end loop over magnitude spectrum 318 319 if (inst->counter[s] >= END_STARTUP_LONG) 320 { 321 inst->counter[s] = 0; 322 if (inst->updates >= END_STARTUP_LONG) 323 { 324 for (i = 0; i < inst->magnLen; i++) 325 { 326 inst->quantile[i] = (float)exp(inst->lquantile[offset + i]); 327 } 328 } 329 } 330 331 inst->counter[s]++; 332 } // end loop over simultaneous estimates 333 334 // Sequentially update the noise during startup 335 if (inst->updates < END_STARTUP_LONG) 336 { 337 // Use the last "s" to get noise during startup that differ from zero. 338 for (i = 0; i < inst->magnLen; i++) 339 { 340 inst->quantile[i] = (float)exp(inst->lquantile[offset + i]); 341 } 342 } 343 344 for (i = 0; i < inst->magnLen; i++) 345 { 346 noise[i] = inst->quantile[i]; 347 } 348 } 349 350 // Extract thresholds for feature parameters 351 // histograms are computed over some window_size (given by inst->modelUpdatePars[1]) 352 // thresholds and weights are extracted every window 353 // flag 0 means update histogram only, flag 1 means compute the thresholds/weights 354 // threshold and weights are returned in: inst->priorModelPars 355 void WebRtcNs_FeatureParameterExtraction(NSinst_t *inst, int flag) 356 { 357 int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt; 358 int maxPeak1, maxPeak2; 359 int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, weightPeak2SpecDiff; 360 361 float binMid, featureSum; 362 float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff; 363 float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl; 364 365 //3 features: lrt, flatness, difference 366 //lrt_feature = inst->featureData[3]; 367 //flat_feature = inst->featureData[0]; 368 //diff_feature = inst->featureData[4]; 369 370 //update histograms 371 if (flag == 0) 372 { 373 // LRT 374 if ((inst->featureData[3] < HIST_PAR_EST * inst->featureExtractionParams.binSizeLrt) 375 && (inst->featureData[3] >= 0.0)) 376 { 377 i = (int)(inst->featureData[3] / inst->featureExtractionParams.binSizeLrt); 378 inst->histLrt[i]++; 379 } 380 // Spectral flatness 381 if ((inst->featureData[0] < HIST_PAR_EST 382 * inst->featureExtractionParams.binSizeSpecFlat) 383 && (inst->featureData[0] >= 0.0)) 384 { 385 i = (int)(inst->featureData[0] / inst->featureExtractionParams.binSizeSpecFlat); 386 inst->histSpecFlat[i]++; 387 } 388 // Spectral difference 389 if ((inst->featureData[4] < HIST_PAR_EST 390 * inst->featureExtractionParams.binSizeSpecDiff) 391 && (inst->featureData[4] >= 0.0)) 392 { 393 i = (int)(inst->featureData[4] / inst->featureExtractionParams.binSizeSpecDiff); 394 inst->histSpecDiff[i]++; 395 } 396 } 397 398 // extract parameters for speech/noise probability 399 if (flag == 1) 400 { 401 //lrt feature: compute the average over inst->featureExtractionParams.rangeAvgHistLrt 402 avgHistLrt = 0.0; 403 avgHistLrtCompl = 0.0; 404 avgSquareHistLrt = 0.0; 405 numHistLrt = 0; 406 for (i = 0; i < HIST_PAR_EST; i++) 407 { 408 binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeLrt; 409 if (binMid <= inst->featureExtractionParams.rangeAvgHistLrt) 410 { 411 avgHistLrt += inst->histLrt[i] * binMid; 412 numHistLrt += inst->histLrt[i]; 413 } 414 avgSquareHistLrt += inst->histLrt[i] * binMid * binMid; 415 avgHistLrtCompl += inst->histLrt[i] * binMid; 416 } 417 if (numHistLrt > 0) 418 { 419 avgHistLrt = avgHistLrt / ((float)numHistLrt); 420 } 421 avgHistLrtCompl = avgHistLrtCompl / ((float)inst->modelUpdatePars[1]); 422 avgSquareHistLrt = avgSquareHistLrt / ((float)inst->modelUpdatePars[1]); 423 fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl; 424 // get threshold for lrt feature: 425 if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) 426 { 427 //very low fluct, so likely noise 428 inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt; 429 } 430 else 431 { 432 inst->priorModelPars[0] = inst->featureExtractionParams.factor1ModelPars 433 * avgHistLrt; 434 // check if value is within min/max range 435 if (inst->priorModelPars[0] < inst->featureExtractionParams.minLrt) 436 { 437 inst->priorModelPars[0] = inst->featureExtractionParams.minLrt; 438 } 439 if (inst->priorModelPars[0] > inst->featureExtractionParams.maxLrt) 440 { 441 inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt; 442 } 443 } 444 // done with lrt feature 445 446 // 447 // for spectral flatness and spectral difference: compute the main peaks of histogram 448 maxPeak1 = 0; 449 maxPeak2 = 0; 450 posPeak1SpecFlat = 0.0; 451 posPeak2SpecFlat = 0.0; 452 weightPeak1SpecFlat = 0; 453 weightPeak2SpecFlat = 0; 454 455 // peaks for flatness 456 for (i = 0; i < HIST_PAR_EST; i++) 457 { 458 binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecFlat; 459 if (inst->histSpecFlat[i] > maxPeak1) 460 { 461 // Found new "first" peak 462 maxPeak2 = maxPeak1; 463 weightPeak2SpecFlat = weightPeak1SpecFlat; 464 posPeak2SpecFlat = posPeak1SpecFlat; 465 466 maxPeak1 = inst->histSpecFlat[i]; 467 weightPeak1SpecFlat = inst->histSpecFlat[i]; 468 posPeak1SpecFlat = binMid; 469 } 470 else if (inst->histSpecFlat[i] > maxPeak2) 471 { 472 // Found new "second" peak 473 maxPeak2 = inst->histSpecFlat[i]; 474 weightPeak2SpecFlat = inst->histSpecFlat[i]; 475 posPeak2SpecFlat = binMid; 476 } 477 } 478 479 //compute two peaks for spectral difference 480 maxPeak1 = 0; 481 maxPeak2 = 0; 482 posPeak1SpecDiff = 0.0; 483 posPeak2SpecDiff = 0.0; 484 weightPeak1SpecDiff = 0; 485 weightPeak2SpecDiff = 0; 486 // peaks for spectral difference 487 for (i = 0; i < HIST_PAR_EST; i++) 488 { 489 binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecDiff; 490 if (inst->histSpecDiff[i] > maxPeak1) 491 { 492 // Found new "first" peak 493 maxPeak2 = maxPeak1; 494 weightPeak2SpecDiff = weightPeak1SpecDiff; 495 posPeak2SpecDiff = posPeak1SpecDiff; 496 497 maxPeak1 = inst->histSpecDiff[i]; 498 weightPeak1SpecDiff = inst->histSpecDiff[i]; 499 posPeak1SpecDiff = binMid; 500 } 501 else if (inst->histSpecDiff[i] > maxPeak2) 502 { 503 // Found new "second" peak 504 maxPeak2 = inst->histSpecDiff[i]; 505 weightPeak2SpecDiff = inst->histSpecDiff[i]; 506 posPeak2SpecDiff = binMid; 507 } 508 } 509 510 // for spectrum flatness feature 511 useFeatureSpecFlat = 1; 512 // merge the two peaks if they are close 513 if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) 514 < inst->featureExtractionParams.limitPeakSpacingSpecFlat) 515 && (weightPeak2SpecFlat 516 > inst->featureExtractionParams.limitPeakWeightsSpecFlat 517 * weightPeak1SpecFlat)) 518 { 519 weightPeak1SpecFlat += weightPeak2SpecFlat; 520 posPeak1SpecFlat = (float)0.5 * (posPeak1SpecFlat + posPeak2SpecFlat); 521 } 522 //reject if weight of peaks is not large enough, or peak value too small 523 if (weightPeak1SpecFlat < inst->featureExtractionParams.thresWeightSpecFlat 524 || posPeak1SpecFlat < inst->featureExtractionParams.thresPosSpecFlat) 525 { 526 useFeatureSpecFlat = 0; 527 } 528 // if selected, get the threshold 529 if (useFeatureSpecFlat == 1) 530 { 531 // compute the threshold 532 inst->priorModelPars[1] = inst->featureExtractionParams.factor2ModelPars 533 * posPeak1SpecFlat; 534 //check if value is within min/max range 535 if (inst->priorModelPars[1] < inst->featureExtractionParams.minSpecFlat) 536 { 537 inst->priorModelPars[1] = inst->featureExtractionParams.minSpecFlat; 538 } 539 if (inst->priorModelPars[1] > inst->featureExtractionParams.maxSpecFlat) 540 { 541 inst->priorModelPars[1] = inst->featureExtractionParams.maxSpecFlat; 542 } 543 } 544 // done with flatness feature 545 546 // for template feature 547 useFeatureSpecDiff = 1; 548 // merge the two peaks if they are close 549 if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) 550 < inst->featureExtractionParams.limitPeakSpacingSpecDiff) 551 && (weightPeak2SpecDiff 552 > inst->featureExtractionParams.limitPeakWeightsSpecDiff 553 * weightPeak1SpecDiff)) 554 { 555 weightPeak1SpecDiff += weightPeak2SpecDiff; 556 posPeak1SpecDiff = (float)0.5 * (posPeak1SpecDiff + posPeak2SpecDiff); 557 } 558 // get the threshold value 559 inst->priorModelPars[3] = inst->featureExtractionParams.factor1ModelPars 560 * posPeak1SpecDiff; 561 //reject if weight of peaks is not large enough 562 if (weightPeak1SpecDiff < inst->featureExtractionParams.thresWeightSpecDiff) 563 { 564 useFeatureSpecDiff = 0; 565 } 566 //check if value is within min/max range 567 if (inst->priorModelPars[3] < inst->featureExtractionParams.minSpecDiff) 568 { 569 inst->priorModelPars[3] = inst->featureExtractionParams.minSpecDiff; 570 } 571 if (inst->priorModelPars[3] > inst->featureExtractionParams.maxSpecDiff) 572 { 573 inst->priorModelPars[3] = inst->featureExtractionParams.maxSpecDiff; 574 } 575 // done with spectral difference feature 576 577 // don't use template feature if fluctuation of lrt feature is very low: 578 // most likely just noise state 579 if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) 580 { 581 useFeatureSpecDiff = 0; 582 } 583 584 // select the weights between the features 585 // inst->priorModelPars[4] is weight for lrt: always selected 586 // inst->priorModelPars[5] is weight for spectral flatness 587 // inst->priorModelPars[6] is weight for spectral difference 588 featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff); 589 inst->priorModelPars[4] = (float)1.0 / featureSum; 590 inst->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum; 591 inst->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum; 592 593 // set hists to zero for next update 594 if (inst->modelUpdatePars[0] >= 1) 595 { 596 for (i = 0; i < HIST_PAR_EST; i++) 597 { 598 inst->histLrt[i] = 0; 599 inst->histSpecFlat[i] = 0; 600 inst->histSpecDiff[i] = 0; 601 } 602 } 603 } // end of flag == 1 604 } 605 606 // Compute spectral flatness on input spectrum 607 // magnIn is the magnitude spectrum 608 // spectral flatness is returned in inst->featureData[0] 609 void WebRtcNs_ComputeSpectralFlatness(NSinst_t *inst, float *magnIn) 610 { 611 int i; 612 int shiftLP = 1; //option to remove first bin(s) from spectral measures 613 float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp; 614 615 // comute spectral measures 616 // for flatness 617 avgSpectralFlatnessNum = 0.0; 618 avgSpectralFlatnessDen = inst->sumMagn; 619 for (i = 0; i < shiftLP; i++) 620 { 621 avgSpectralFlatnessDen -= magnIn[i]; 622 } 623 // compute log of ratio of the geometric to arithmetic mean: check for log(0) case 624 for (i = shiftLP; i < inst->magnLen; i++) 625 { 626 if (magnIn[i] > 0.0) 627 { 628 avgSpectralFlatnessNum += (float)log(magnIn[i]); 629 } 630 else 631 { 632 inst->featureData[0] -= SPECT_FL_TAVG * inst->featureData[0]; 633 return; 634 } 635 } 636 //normalize 637 avgSpectralFlatnessDen = avgSpectralFlatnessDen / inst->magnLen; 638 avgSpectralFlatnessNum = avgSpectralFlatnessNum / inst->magnLen; 639 640 //ratio and inverse log: check for case of log(0) 641 spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen; 642 643 //time-avg update of spectral flatness feature 644 inst->featureData[0] += SPECT_FL_TAVG * (spectralTmp - inst->featureData[0]); 645 // done with flatness feature 646 } 647 648 // Compute the difference measure between input spectrum and a template/learned noise spectrum 649 // magnIn is the input spectrum 650 // the reference/template spectrum is inst->magnAvgPause[i] 651 // returns (normalized) spectral difference in inst->featureData[4] 652 void WebRtcNs_ComputeSpectralDifference(NSinst_t *inst, float *magnIn) 653 { 654 // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause) 655 int i; 656 float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn; 657 658 avgPause = 0.0; 659 avgMagn = inst->sumMagn; 660 // compute average quantities 661 for (i = 0; i < inst->magnLen; i++) 662 { 663 //conservative smooth noise spectrum from pause frames 664 avgPause += inst->magnAvgPause[i]; 665 } 666 avgPause = avgPause / ((float)inst->magnLen); 667 avgMagn = avgMagn / ((float)inst->magnLen); 668 669 covMagnPause = 0.0; 670 varPause = 0.0; 671 varMagn = 0.0; 672 // compute variance and covariance quantities 673 for (i = 0; i < inst->magnLen; i++) 674 { 675 covMagnPause += (magnIn[i] - avgMagn) * (inst->magnAvgPause[i] - avgPause); 676 varPause += (inst->magnAvgPause[i] - avgPause) * (inst->magnAvgPause[i] - avgPause); 677 varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn); 678 } 679 covMagnPause = covMagnPause / ((float)inst->magnLen); 680 varPause = varPause / ((float)inst->magnLen); 681 varMagn = varMagn / ((float)inst->magnLen); 682 // update of average magnitude spectrum 683 inst->featureData[6] += inst->signalEnergy; 684 685 avgDiffNormMagn = varMagn - (covMagnPause * covMagnPause) / (varPause + (float)0.0001); 686 // normalize and compute time-avg update of difference feature 687 avgDiffNormMagn = (float)(avgDiffNormMagn / (inst->featureData[5] + (float)0.0001)); 688 inst->featureData[4] += SPECT_DIFF_TAVG * (avgDiffNormMagn - inst->featureData[4]); 689 } 690 691 // Compute speech/noise probability 692 // speech/noise probability is returned in: probSpeechFinal 693 //magn is the input magnitude spectrum 694 //noise is the noise spectrum 695 //snrLocPrior is the prior snr for each freq. 696 //snr loc_post is the post snr for each freq. 697 void WebRtcNs_SpeechNoiseProb(NSinst_t *inst, float *probSpeechFinal, float *snrLocPrior, 698 float *snrLocPost) 699 { 700 int i, sgnMap; 701 float invLrt, gainPrior, indPrior; 702 float logLrtTimeAvgKsum, besselTmp; 703 float indicator0, indicator1, indicator2; 704 float tmpFloat1, tmpFloat2; 705 float weightIndPrior0, weightIndPrior1, weightIndPrior2; 706 float threshPrior0, threshPrior1, threshPrior2; 707 float widthPrior, widthPrior0, widthPrior1, widthPrior2; 708 709 widthPrior0 = WIDTH_PR_MAP; 710 widthPrior1 = (float)2.0 * WIDTH_PR_MAP; //width for pause region: 711 // lower range, so increase width in tanh map 712 widthPrior2 = (float)2.0 * WIDTH_PR_MAP; //for spectral-difference measure 713 714 //threshold parameters for features 715 threshPrior0 = inst->priorModelPars[0]; 716 threshPrior1 = inst->priorModelPars[1]; 717 threshPrior2 = inst->priorModelPars[3]; 718 719 //sign for flatness feature 720 sgnMap = (int)(inst->priorModelPars[2]); 721 722 //weight parameters for features 723 weightIndPrior0 = inst->priorModelPars[4]; 724 weightIndPrior1 = inst->priorModelPars[5]; 725 weightIndPrior2 = inst->priorModelPars[6]; 726 727 // compute feature based on average LR factor 728 // this is the average over all frequencies of the smooth log lrt 729 logLrtTimeAvgKsum = 0.0; 730 for (i = 0; i < inst->magnLen; i++) 731 { 732 tmpFloat1 = (float)1.0 + (float)2.0 * snrLocPrior[i]; 733 tmpFloat2 = (float)2.0 * snrLocPrior[i] / (tmpFloat1 + (float)0.0001); 734 besselTmp = (snrLocPost[i] + (float)1.0) * tmpFloat2; 735 inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - (float)log(tmpFloat1) 736 - inst->logLrtTimeAvg[i]); 737 logLrtTimeAvgKsum += inst->logLrtTimeAvg[i]; 738 } 739 logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (inst->magnLen); 740 inst->featureData[3] = logLrtTimeAvgKsum; 741 // done with computation of LR factor 742 743 // 744 //compute the indicator functions 745 // 746 747 // average lrt feature 748 widthPrior = widthPrior0; 749 //use larger width in tanh map for pause regions 750 if (logLrtTimeAvgKsum < threshPrior0) 751 { 752 widthPrior = widthPrior1; 753 } 754 // compute indicator function: sigmoid map 755 indicator0 = (float)0.5 * ((float)tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) 756 + (float)1.0); 757 758 //spectral flatness feature 759 tmpFloat1 = inst->featureData[0]; 760 widthPrior = widthPrior0; 761 //use larger width in tanh map for pause regions 762 if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) 763 { 764 widthPrior = widthPrior1; 765 } 766 if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) 767 { 768 widthPrior = widthPrior1; 769 } 770 // compute indicator function: sigmoid map 771 indicator1 = (float)0.5 * ((float)tanh( 772 (float)sgnMap * widthPrior * (threshPrior1 773 - tmpFloat1)) + (float)1.0); 774 775 //for template spectrum-difference 776 tmpFloat1 = inst->featureData[4]; 777 widthPrior = widthPrior0; 778 //use larger width in tanh map for pause regions 779 if (tmpFloat1 < threshPrior2) 780 { 781 widthPrior = widthPrior2; 782 } 783 // compute indicator function: sigmoid map 784 indicator2 = (float)0.5 * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) 785 + (float)1.0); 786 787 //combine the indicator function with the feature weights 788 indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2 789 * indicator2; 790 // done with computing indicator function 791 792 //compute the prior probability 793 inst->priorSpeechProb += PRIOR_UPDATE * (indPrior - inst->priorSpeechProb); 794 // make sure probabilities are within range: keep floor to 0.01 795 if (inst->priorSpeechProb > 1.0) 796 { 797 inst->priorSpeechProb = (float)1.0; 798 } 799 if (inst->priorSpeechProb < 0.01) 800 { 801 inst->priorSpeechProb = (float)0.01; 802 } 803 804 //final speech probability: combine prior model with LR factor: 805 gainPrior = ((float)1.0 - inst->priorSpeechProb) / (inst->priorSpeechProb + (float)0.0001); 806 for (i = 0; i < inst->magnLen; i++) 807 { 808 invLrt = (float)exp(-inst->logLrtTimeAvg[i]); 809 invLrt = (float)gainPrior * invLrt; 810 probSpeechFinal[i] = (float)1.0 / ((float)1.0 + invLrt); 811 } 812 } 813 814 int WebRtcNs_ProcessCore(NSinst_t *inst, 815 short *speechFrame, 816 short *speechFrameHB, 817 short *outFrame, 818 short *outFrameHB) 819 { 820 // main routine for noise reduction 821 822 int flagHB = 0; 823 int i; 824 const int kStartBand = 5; // Skip first frequency bins during estimation. 825 int updateParsFlag; 826 827 float energy1, energy2, gain, factor, factor1, factor2; 828 float signalEnergy, sumMagn; 829 float snrPrior, currentEstimateStsa; 830 float tmpFloat1, tmpFloat2, tmpFloat3, probSpeech, probNonSpeech; 831 float gammaNoiseTmp, gammaNoiseOld; 832 float noiseUpdateTmp, fTmp, dTmp; 833 float fin[BLOCKL_MAX], fout[BLOCKL_MAX]; 834 float winData[ANAL_BLOCKL_MAX]; 835 float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL]; 836 float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL]; 837 float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL]; 838 float probSpeechFinal[HALF_ANAL_BLOCKL], previousEstimateStsa[HALF_ANAL_BLOCKL]; 839 float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; 840 // Variables during startup 841 float sum_log_i = 0.0; 842 float sum_log_i_square = 0.0; 843 float sum_log_magn = 0.0; 844 float sum_log_i_log_magn = 0.0; 845 float parametric_noise = 0.0; 846 float parametric_exp = 0.0; 847 float parametric_num = 0.0; 848 849 // SWB variables 850 int deltaBweHB = 1; 851 int deltaGainHB = 1; 852 float decayBweHB = 1.0; 853 float gainMapParHB = 1.0; 854 float gainTimeDomainHB = 1.0; 855 float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB; 856 857 // Check that initiation has been done 858 if (inst->initFlag != 1) 859 { 860 return (-1); 861 } 862 // Check for valid pointers based on sampling rate 863 if (inst->fs == 32000) 864 { 865 if (speechFrameHB == NULL) 866 { 867 return -1; 868 } 869 flagHB = 1; 870 // range for averaging low band quantities for H band gain 871 deltaBweHB = (int)inst->magnLen / 4; 872 deltaGainHB = deltaBweHB; 873 } 874 // 875 updateParsFlag = inst->modelUpdatePars[0]; 876 // 877 878 //for LB do all processing 879 // convert to float 880 for (i = 0; i < inst->blockLen10ms; i++) 881 { 882 fin[i] = (float)speechFrame[i]; 883 } 884 // update analysis buffer for L band 885 memcpy(inst->dataBuf, inst->dataBuf + inst->blockLen10ms, 886 sizeof(float) * (inst->anaLen - inst->blockLen10ms)); 887 memcpy(inst->dataBuf + inst->anaLen - inst->blockLen10ms, fin, 888 sizeof(float) * inst->blockLen10ms); 889 890 if (flagHB == 1) 891 { 892 // convert to float 893 for (i = 0; i < inst->blockLen10ms; i++) 894 { 895 fin[i] = (float)speechFrameHB[i]; 896 } 897 // update analysis buffer for H band 898 memcpy(inst->dataBufHB, inst->dataBufHB + inst->blockLen10ms, 899 sizeof(float) * (inst->anaLen - inst->blockLen10ms)); 900 memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms, fin, 901 sizeof(float) * inst->blockLen10ms); 902 } 903 904 // check if processing needed 905 if (inst->outLen == 0) 906 { 907 // windowing 908 energy1 = 0.0; 909 for (i = 0; i < inst->anaLen; i++) 910 { 911 winData[i] = inst->window[i] * inst->dataBuf[i]; 912 energy1 += winData[i] * winData[i]; 913 } 914 if (energy1 == 0.0) 915 { 916 // synthesize the special case of zero input 917 // we want to avoid updating statistics in this case: 918 // Updating feature statistics when we have zeros only will cause thresholds to 919 // move towards zero signal situations. This in turn has the effect that once the 920 // signal is "turned on" (non-zero values) everything will be treated as speech 921 // and there is no noise suppression effect. Depending on the duration of the 922 // inactive signal it takes a considerable amount of time for the system to learn 923 // what is noise and what is speech. 924 925 // read out fully processed segment 926 for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) 927 { 928 fout[i - inst->windShift] = inst->syntBuf[i]; 929 } 930 // update synthesis buffer 931 memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen, 932 sizeof(float) * (inst->anaLen - inst->blockLen)); 933 memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0, 934 sizeof(float) * inst->blockLen); 935 936 // out buffer 937 inst->outLen = inst->blockLen - inst->blockLen10ms; 938 if (inst->blockLen > inst->blockLen10ms) 939 { 940 for (i = 0; i < inst->outLen; i++) 941 { 942 inst->outBuf[i] = fout[i + inst->blockLen10ms]; 943 } 944 } 945 // convert to short 946 for (i = 0; i < inst->blockLen10ms; i++) 947 { 948 dTmp = fout[i]; 949 if (dTmp < WEBRTC_SPL_WORD16_MIN) 950 { 951 dTmp = WEBRTC_SPL_WORD16_MIN; 952 } 953 else if (dTmp > WEBRTC_SPL_WORD16_MAX) 954 { 955 dTmp = WEBRTC_SPL_WORD16_MAX; 956 } 957 outFrame[i] = (short)dTmp; 958 } 959 960 // for time-domain gain of HB 961 if (flagHB == 1) 962 { 963 for (i = 0; i < inst->blockLen10ms; i++) 964 { 965 dTmp = inst->dataBufHB[i]; 966 if (dTmp < WEBRTC_SPL_WORD16_MIN) 967 { 968 dTmp = WEBRTC_SPL_WORD16_MIN; 969 } 970 else if (dTmp > WEBRTC_SPL_WORD16_MAX) 971 { 972 dTmp = WEBRTC_SPL_WORD16_MAX; 973 } 974 outFrameHB[i] = (short)dTmp; 975 } 976 } // end of H band gain computation 977 // 978 return 0; 979 } 980 981 // 982 inst->blockInd++; // Update the block index only when we process a block. 983 // FFT 984 rdft(inst->anaLen, 1, winData, inst->ip, inst->wfft); 985 986 imag[0] = 0; 987 real[0] = winData[0]; 988 magn[0] = (float)(fabs(real[0]) + 1.0f); 989 imag[inst->magnLen - 1] = 0; 990 real[inst->magnLen - 1] = winData[1]; 991 magn[inst->magnLen - 1] = (float)(fabs(real[inst->magnLen - 1]) + 1.0f); 992 signalEnergy = (float)(real[0] * real[0]) + (float)(real[inst->magnLen - 1] 993 * real[inst->magnLen - 1]); 994 sumMagn = magn[0] + magn[inst->magnLen - 1]; 995 if (inst->blockInd < END_STARTUP_SHORT) 996 { 997 inst->initMagnEst[0] += magn[0]; 998 inst->initMagnEst[inst->magnLen - 1] += magn[inst->magnLen - 1]; 999 tmpFloat2 = log((float)(inst->magnLen - 1)); 1000 sum_log_i = tmpFloat2; 1001 sum_log_i_square = tmpFloat2 * tmpFloat2; 1002 tmpFloat1 = log(magn[inst->magnLen - 1]); 1003 sum_log_magn = tmpFloat1; 1004 sum_log_i_log_magn = tmpFloat2 * tmpFloat1; 1005 } 1006 for (i = 1; i < inst->magnLen - 1; i++) 1007 { 1008 real[i] = winData[2 * i]; 1009 imag[i] = winData[2 * i + 1]; 1010 // magnitude spectrum 1011 fTmp = real[i] * real[i]; 1012 fTmp += imag[i] * imag[i]; 1013 signalEnergy += fTmp; 1014 magn[i] = ((float)sqrt(fTmp)) + 1.0f; 1015 sumMagn += magn[i]; 1016 if (inst->blockInd < END_STARTUP_SHORT) 1017 { 1018 inst->initMagnEst[i] += magn[i]; 1019 if (i >= kStartBand) 1020 { 1021 tmpFloat2 = log((float)i); 1022 sum_log_i += tmpFloat2; 1023 sum_log_i_square += tmpFloat2 * tmpFloat2; 1024 tmpFloat1 = log(magn[i]); 1025 sum_log_magn += tmpFloat1; 1026 sum_log_i_log_magn += tmpFloat2 * tmpFloat1; 1027 } 1028 } 1029 } 1030 signalEnergy = signalEnergy / ((float)inst->magnLen); 1031 inst->signalEnergy = signalEnergy; 1032 inst->sumMagn = sumMagn; 1033 1034 //compute spectral flatness on input spectrum 1035 WebRtcNs_ComputeSpectralFlatness(inst, magn); 1036 // quantile noise estimate 1037 WebRtcNs_NoiseEstimation(inst, magn, noise); 1038 //compute simplified noise model during startup 1039 if (inst->blockInd < END_STARTUP_SHORT) 1040 { 1041 // Estimate White noise 1042 inst->whiteNoiseLevel += sumMagn / ((float)inst->magnLen) * inst->overdrive; 1043 // Estimate Pink noise parameters 1044 tmpFloat1 = sum_log_i_square * ((float)(inst->magnLen - kStartBand)); 1045 tmpFloat1 -= (sum_log_i * sum_log_i); 1046 tmpFloat2 = (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn); 1047 tmpFloat3 = tmpFloat2 / tmpFloat1; 1048 // Constrain the estimated spectrum to be positive 1049 if (tmpFloat3 < 0.0f) 1050 { 1051 tmpFloat3 = 0.0f; 1052 } 1053 inst->pinkNoiseNumerator += tmpFloat3; 1054 tmpFloat2 = (sum_log_i * sum_log_magn); 1055 tmpFloat2 -= ((float)(inst->magnLen - kStartBand)) * sum_log_i_log_magn; 1056 tmpFloat3 = tmpFloat2 / tmpFloat1; 1057 // Constrain the pink noise power to be in the interval [0, 1]; 1058 if (tmpFloat3 < 0.0f) 1059 { 1060 tmpFloat3 = 0.0f; 1061 } 1062 if (tmpFloat3 > 1.0f) 1063 { 1064 tmpFloat3 = 1.0f; 1065 } 1066 inst->pinkNoiseExp += tmpFloat3; 1067 1068 // Calculate frequency independent parts of parametric noise estimate. 1069 if (inst->pinkNoiseExp == 0.0f) 1070 { 1071 // Use white noise estimate 1072 parametric_noise = inst->whiteNoiseLevel; 1073 } 1074 else 1075 { 1076 // Use pink noise estimate 1077 parametric_num = exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1)); 1078 parametric_num *= (float)(inst->blockInd + 1); 1079 parametric_exp = inst->pinkNoiseExp / (float)(inst->blockInd + 1); 1080 parametric_noise = parametric_num / pow((float)kStartBand, parametric_exp); 1081 } 1082 for (i = 0; i < inst->magnLen; i++) 1083 { 1084 // Estimate the background noise using the white and pink noise parameters 1085 if ((inst->pinkNoiseExp > 0.0f) && (i >= kStartBand)) 1086 { 1087 // Use pink noise estimate 1088 parametric_noise = parametric_num / pow((float)i, parametric_exp); 1089 } 1090 theFilterTmp[i] = (inst->initMagnEst[i] - inst->overdrive * parametric_noise); 1091 theFilterTmp[i] /= (inst->initMagnEst[i] + (float)0.0001); 1092 // Weight quantile noise with modeled noise 1093 noise[i] *= (inst->blockInd); 1094 tmpFloat2 = parametric_noise * (END_STARTUP_SHORT - inst->blockInd); 1095 noise[i] += (tmpFloat2 / (float)(inst->blockInd + 1)); 1096 noise[i] /= END_STARTUP_SHORT; 1097 } 1098 } 1099 //compute average signal during END_STARTUP_LONG time: 1100 // used to normalize spectral difference measure 1101 if (inst->blockInd < END_STARTUP_LONG) 1102 { 1103 inst->featureData[5] *= inst->blockInd; 1104 inst->featureData[5] += signalEnergy; 1105 inst->featureData[5] /= (inst->blockInd + 1); 1106 } 1107 1108 #ifdef PROCESS_FLOW_0 1109 if (inst->blockInd > END_STARTUP_LONG) 1110 { 1111 //option: average the quantile noise: for check with AEC2 1112 for (i = 0; i < inst->magnLen; i++) 1113 { 1114 noise[i] = (float)0.6 * inst->noisePrev[i] + (float)0.4 * noise[i]; 1115 } 1116 for (i = 0; i < inst->magnLen; i++) 1117 { 1118 // Wiener with over sub-substraction: 1119 theFilter[i] = (magn[i] - inst->overdrive * noise[i]) / (magn[i] + (float)0.0001); 1120 } 1121 } 1122 #else 1123 //start processing at frames == converged+1 1124 // 1125 // STEP 1: compute prior and post snr based on quantile noise est 1126 // 1127 1128 // compute DD estimate of prior SNR: needed for new method 1129 for (i = 0; i < inst->magnLen; i++) 1130 { 1131 // post snr 1132 snrLocPost[i] = (float)0.0; 1133 if (magn[i] > noise[i]) 1134 { 1135 snrLocPost[i] = magn[i] / (noise[i] + (float)0.0001) - (float)1.0; 1136 } 1137 // previous post snr 1138 // previous estimate: based on previous frame with gain filter 1139 previousEstimateStsa[i] = inst->magnPrev[i] / (inst->noisePrev[i] + (float)0.0001) 1140 * (inst->smooth[i]); 1141 // DD estimate is sum of two terms: current estimate and previous estimate 1142 // directed decision update of snrPrior 1143 snrLocPrior[i] = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR) 1144 * snrLocPost[i]; 1145 // post and prior snr needed for step 2 1146 } // end of loop over freqs 1147 #ifdef PROCESS_FLOW_1 1148 for (i = 0; i < inst->magnLen; i++) 1149 { 1150 // gain filter 1151 tmpFloat1 = inst->overdrive + snrLocPrior[i]; 1152 tmpFloat2 = (float)snrLocPrior[i] / tmpFloat1; 1153 theFilter[i] = (float)tmpFloat2; 1154 } // end of loop over freqs 1155 #endif 1156 // done with step 1: dd computation of prior and post snr 1157 1158 // 1159 //STEP 2: compute speech/noise likelihood 1160 // 1161 #ifdef PROCESS_FLOW_2 1162 // compute difference of input spectrum with learned/estimated noise spectrum 1163 WebRtcNs_ComputeSpectralDifference(inst, magn); 1164 // compute histograms for parameter decisions (thresholds and weights for features) 1165 // parameters are extracted once every window time (=inst->modelUpdatePars[1]) 1166 if (updateParsFlag >= 1) 1167 { 1168 // counter update 1169 inst->modelUpdatePars[3]--; 1170 // update histogram 1171 if (inst->modelUpdatePars[3] > 0) 1172 { 1173 WebRtcNs_FeatureParameterExtraction(inst, 0); 1174 } 1175 // compute model parameters 1176 if (inst->modelUpdatePars[3] == 0) 1177 { 1178 WebRtcNs_FeatureParameterExtraction(inst, 1); 1179 inst->modelUpdatePars[3] = inst->modelUpdatePars[1]; 1180 // if wish to update only once, set flag to zero 1181 if (updateParsFlag == 1) 1182 { 1183 inst->modelUpdatePars[0] = 0; 1184 } 1185 else 1186 { 1187 // update every window: 1188 // get normalization for spectral difference for next window estimate 1189 inst->featureData[6] = inst->featureData[6] 1190 / ((float)inst->modelUpdatePars[1]); 1191 inst->featureData[5] = (float)0.5 * (inst->featureData[6] 1192 + inst->featureData[5]); 1193 inst->featureData[6] = (float)0.0; 1194 } 1195 } 1196 } 1197 // compute speech/noise probability 1198 WebRtcNs_SpeechNoiseProb(inst, probSpeechFinal, snrLocPrior, snrLocPost); 1199 // time-avg parameter for noise update 1200 gammaNoiseTmp = NOISE_UPDATE; 1201 for (i = 0; i < inst->magnLen; i++) 1202 { 1203 probSpeech = probSpeechFinal[i]; 1204 probNonSpeech = (float)1.0 - probSpeech; 1205 // temporary noise update: 1206 // use it for speech frames if update value is less than previous 1207 noiseUpdateTmp = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp) 1208 * (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); 1209 // 1210 // time-constant based on speech/noise state 1211 gammaNoiseOld = gammaNoiseTmp; 1212 gammaNoiseTmp = NOISE_UPDATE; 1213 // increase gamma (i.e., less noise update) for frame likely to be speech 1214 if (probSpeech > PROB_RANGE) 1215 { 1216 gammaNoiseTmp = SPEECH_UPDATE; 1217 } 1218 // conservative noise update 1219 if (probSpeech < PROB_RANGE) 1220 { 1221 inst->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - inst->magnAvgPause[i]); 1222 } 1223 // noise update 1224 if (gammaNoiseTmp == gammaNoiseOld) 1225 { 1226 noise[i] = noiseUpdateTmp; 1227 } 1228 else 1229 { 1230 noise[i] = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp) 1231 * (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); 1232 // allow for noise update downwards: 1233 // if noise update decreases the noise, it is safe, so allow it to happen 1234 if (noiseUpdateTmp < noise[i]) 1235 { 1236 noise[i] = noiseUpdateTmp; 1237 } 1238 } 1239 } // end of freq loop 1240 // done with step 2: noise update 1241 1242 // 1243 // STEP 3: compute dd update of prior snr and post snr based on new noise estimate 1244 // 1245 for (i = 0; i < inst->magnLen; i++) 1246 { 1247 // post and prior snr 1248 currentEstimateStsa = (float)0.0; 1249 if (magn[i] > noise[i]) 1250 { 1251 currentEstimateStsa = magn[i] / (noise[i] + (float)0.0001) - (float)1.0; 1252 } 1253 // DD estimate is sume of two terms: current estimate and previous estimate 1254 // directed decision update of snrPrior 1255 snrPrior = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR) 1256 * currentEstimateStsa; 1257 // gain filter 1258 tmpFloat1 = inst->overdrive + snrPrior; 1259 tmpFloat2 = (float)snrPrior / tmpFloat1; 1260 theFilter[i] = (float)tmpFloat2; 1261 } // end of loop over freqs 1262 // done with step3 1263 #endif 1264 #endif 1265 1266 for (i = 0; i < inst->magnLen; i++) 1267 { 1268 // flooring bottom 1269 if (theFilter[i] < inst->denoiseBound) 1270 { 1271 theFilter[i] = inst->denoiseBound; 1272 } 1273 // flooring top 1274 if (theFilter[i] > (float)1.0) 1275 { 1276 theFilter[i] = 1.0; 1277 } 1278 if (inst->blockInd < END_STARTUP_SHORT) 1279 { 1280 // flooring bottom 1281 if (theFilterTmp[i] < inst->denoiseBound) 1282 { 1283 theFilterTmp[i] = inst->denoiseBound; 1284 } 1285 // flooring top 1286 if (theFilterTmp[i] > (float)1.0) 1287 { 1288 theFilterTmp[i] = 1.0; 1289 } 1290 // Weight the two suppression filters 1291 theFilter[i] *= (inst->blockInd); 1292 theFilterTmp[i] *= (END_STARTUP_SHORT - inst->blockInd); 1293 theFilter[i] += theFilterTmp[i]; 1294 theFilter[i] /= (END_STARTUP_SHORT); 1295 } 1296 // smoothing 1297 #ifdef PROCESS_FLOW_0 1298 inst->smooth[i] *= SMOOTH; // value set to 0.7 in define.h file 1299 inst->smooth[i] += ((float)1.0 - SMOOTH) * theFilter[i]; 1300 #else 1301 inst->smooth[i] = theFilter[i]; 1302 #endif 1303 real[i] *= inst->smooth[i]; 1304 imag[i] *= inst->smooth[i]; 1305 } 1306 // keep track of noise and magn spectrum for next frame 1307 for (i = 0; i < inst->magnLen; i++) 1308 { 1309 inst->noisePrev[i] = noise[i]; 1310 inst->magnPrev[i] = magn[i]; 1311 } 1312 // back to time domain 1313 winData[0] = real[0]; 1314 winData[1] = real[inst->magnLen - 1]; 1315 for (i = 1; i < inst->magnLen - 1; i++) 1316 { 1317 winData[2 * i] = real[i]; 1318 winData[2 * i + 1] = imag[i]; 1319 } 1320 rdft(inst->anaLen, -1, winData, inst->ip, inst->wfft); 1321 1322 for (i = 0; i < inst->anaLen; i++) 1323 { 1324 real[i] = 2.0f * winData[i] / inst->anaLen; // fft scaling 1325 } 1326 1327 //scale factor: only do it after END_STARTUP_LONG time 1328 factor = (float)1.0; 1329 if (inst->gainmap == 1 && inst->blockInd > END_STARTUP_LONG) 1330 { 1331 factor1 = (float)1.0; 1332 factor2 = (float)1.0; 1333 1334 energy2 = 0.0; 1335 for (i = 0; i < inst->anaLen;i++) 1336 { 1337 energy2 += (float)real[i] * (float)real[i]; 1338 } 1339 gain = (float)sqrt(energy2 / (energy1 + (float)1.0)); 1340 1341 #ifdef PROCESS_FLOW_2 1342 // scaling for new version 1343 if (gain > B_LIM) 1344 { 1345 factor1 = (float)1.0 + (float)1.3 * (gain - B_LIM); 1346 if (gain * factor1 > (float)1.0) 1347 { 1348 factor1 = (float)1.0 / gain; 1349 } 1350 } 1351 if (gain < B_LIM) 1352 { 1353 //don't reduce scale too much for pause regions: 1354 // attenuation here should be controlled by flooring 1355 if (gain <= inst->denoiseBound) 1356 { 1357 gain = inst->denoiseBound; 1358 } 1359 factor2 = (float)1.0 - (float)0.3 * (B_LIM - gain); 1360 } 1361 //combine both scales with speech/noise prob: 1362 // note prior (priorSpeechProb) is not frequency dependent 1363 factor = inst->priorSpeechProb * factor1 + ((float)1.0 - inst->priorSpeechProb) 1364 * factor2; 1365 #else 1366 if (gain > B_LIM) 1367 { 1368 factor = (float)1.0 + (float)1.3 * (gain - B_LIM); 1369 } 1370 else 1371 { 1372 factor = (float)1.0 + (float)2.0 * (gain - B_LIM); 1373 } 1374 if (gain * factor > (float)1.0) 1375 { 1376 factor = (float)1.0 / gain; 1377 } 1378 #endif 1379 } // out of inst->gainmap==1 1380 1381 // synthesis 1382 for (i = 0; i < inst->anaLen; i++) 1383 { 1384 inst->syntBuf[i] += factor * inst->window[i] * (float)real[i]; 1385 } 1386 // read out fully processed segment 1387 for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) 1388 { 1389 fout[i - inst->windShift] = inst->syntBuf[i]; 1390 } 1391 // update synthesis buffer 1392 memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen, 1393 sizeof(float) * (inst->anaLen - inst->blockLen)); 1394 memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0, 1395 sizeof(float) * inst->blockLen); 1396 1397 // out buffer 1398 inst->outLen = inst->blockLen - inst->blockLen10ms; 1399 if (inst->blockLen > inst->blockLen10ms) 1400 { 1401 for (i = 0; i < inst->outLen; i++) 1402 { 1403 inst->outBuf[i] = fout[i + inst->blockLen10ms]; 1404 } 1405 } 1406 } // end of if out.len==0 1407 else 1408 { 1409 for (i = 0; i < inst->blockLen10ms; i++) 1410 { 1411 fout[i] = inst->outBuf[i]; 1412 } 1413 memcpy(inst->outBuf, inst->outBuf + inst->blockLen10ms, 1414 sizeof(float) * (inst->outLen - inst->blockLen10ms)); 1415 memset(inst->outBuf + inst->outLen - inst->blockLen10ms, 0, 1416 sizeof(float) * inst->blockLen10ms); 1417 inst->outLen -= inst->blockLen10ms; 1418 } 1419 1420 // convert to short 1421 for (i = 0; i < inst->blockLen10ms; i++) 1422 { 1423 dTmp = fout[i]; 1424 if (dTmp < WEBRTC_SPL_WORD16_MIN) 1425 { 1426 dTmp = WEBRTC_SPL_WORD16_MIN; 1427 } 1428 else if (dTmp > WEBRTC_SPL_WORD16_MAX) 1429 { 1430 dTmp = WEBRTC_SPL_WORD16_MAX; 1431 } 1432 outFrame[i] = (short)dTmp; 1433 } 1434 1435 // for time-domain gain of HB 1436 if (flagHB == 1) 1437 { 1438 for (i = 0; i < inst->magnLen; i++) 1439 { 1440 inst->speechProbHB[i] = probSpeechFinal[i]; 1441 } 1442 if (inst->blockInd > END_STARTUP_LONG) 1443 { 1444 // average speech prob from low band 1445 // avg over second half (i.e., 4->8kHz) of freq. spectrum 1446 avgProbSpeechHB = 0.0; 1447 for (i = inst->magnLen - deltaBweHB - 1; i < inst->magnLen - 1; i++) 1448 { 1449 avgProbSpeechHB += inst->speechProbHB[i]; 1450 } 1451 avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB); 1452 // average filter gain from low band 1453 // average over second half (i.e., 4->8kHz) of freq. spectrum 1454 avgFilterGainHB = 0.0; 1455 for (i = inst->magnLen - deltaGainHB - 1; i < inst->magnLen - 1; i++) 1456 { 1457 avgFilterGainHB += inst->smooth[i]; 1458 } 1459 avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB)); 1460 avgProbSpeechHBTmp = (float)2.0 * avgProbSpeechHB - (float)1.0; 1461 // gain based on speech prob: 1462 gainModHB = (float)0.5 * ((float)1.0 + (float)tanh(gainMapParHB * avgProbSpeechHBTmp)); 1463 //combine gain with low band gain 1464 gainTimeDomainHB = (float)0.5 * gainModHB + (float)0.5 * avgFilterGainHB; 1465 if (avgProbSpeechHB >= (float)0.5) 1466 { 1467 gainTimeDomainHB = (float)0.25 * gainModHB + (float)0.75 * avgFilterGainHB; 1468 } 1469 gainTimeDomainHB = gainTimeDomainHB * decayBweHB; 1470 } // end of converged 1471 //make sure gain is within flooring range 1472 // flooring bottom 1473 if (gainTimeDomainHB < inst->denoiseBound) 1474 { 1475 gainTimeDomainHB = inst->denoiseBound; 1476 } 1477 // flooring top 1478 if (gainTimeDomainHB > (float)1.0) 1479 { 1480 gainTimeDomainHB = 1.0; 1481 } 1482 //apply gain 1483 for (i = 0; i < inst->blockLen10ms; i++) 1484 { 1485 dTmp = gainTimeDomainHB * inst->dataBufHB[i]; 1486 if (dTmp < WEBRTC_SPL_WORD16_MIN) 1487 { 1488 dTmp = WEBRTC_SPL_WORD16_MIN; 1489 } 1490 else if (dTmp > WEBRTC_SPL_WORD16_MAX) 1491 { 1492 dTmp = WEBRTC_SPL_WORD16_MAX; 1493 } 1494 outFrameHB[i] = (short)dTmp; 1495 } 1496 } // end of H band gain computation 1497 // 1498 1499 return 0; 1500 } 1501