1 /* Copyright (C) 2003 Epic Games (written by Jean-Marc Valin) 2 Copyright (C) 2004-2006 Epic Games 3 4 File: preprocess.c 5 Preprocessor with denoising based on the algorithm by Ephraim and Malah 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are 9 met: 10 11 1. Redistributions of source code must retain the above copyright notice, 12 this list of conditions and the following disclaimer. 13 14 2. Redistributions in binary form must reproduce the above copyright 15 notice, this list of conditions and the following disclaimer in the 16 documentation and/or other materials provided with the distribution. 17 18 3. The name of the author may not be used to endorse or promote products 19 derived from this software without specific prior written permission. 20 21 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 25 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 30 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 35 /* 36 Recommended papers: 37 38 Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error 39 short-time spectral amplitude estimator". IEEE Transactions on Acoustics, 40 Speech and Signal Processing, vol. ASSP-32, no. 6, pp. 1109-1121, 1984. 41 42 Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error 43 log-spectral amplitude estimator". IEEE Transactions on Acoustics, Speech and 44 Signal Processing, vol. ASSP-33, no. 2, pp. 443-445, 1985. 45 46 I. Cohen and B. Berdugo, "Speech enhancement for non-stationary noise environments". 47 Signal Processing, vol. 81, no. 2, pp. 2403-2418, 2001. 48 49 Stefan Gustafsson, Rainer Martin, Peter Jax, and Peter Vary. "A psychoacoustic 50 approach to combined acoustic echo cancellation and noise reduction". IEEE 51 Transactions on Speech and Audio Processing, 2002. 52 53 J.-M. Valin, J. Rouat, and F. Michaud, "Microphone array post-filter for separation 54 of simultaneous non-stationary sources". In Proceedings IEEE International 55 Conference on Acoustics, Speech, and Signal Processing, 2004. 56 */ 57 58 #ifdef HAVE_CONFIG_H 59 #include "config.h" 60 #endif 61 62 #include <math.h> 63 #include "speex/speex_preprocess.h" 64 #include "speex/speex_echo.h" 65 #include "arch.h" 66 #include "fftwrap.h" 67 #include "filterbank.h" 68 #include "math_approx.h" 69 #include "os_support.h" 70 71 #define LOUDNESS_EXP 5.f 72 #define AMP_SCALE .001f 73 #define AMP_SCALE_1 1000.f 74 75 #define NB_BANDS 24 76 77 #define SPEECH_PROB_START_DEFAULT QCONST16(0.35f,15) 78 #define SPEECH_PROB_CONTINUE_DEFAULT QCONST16(0.20f,15) 79 #define NOISE_SUPPRESS_DEFAULT -15 80 #define ECHO_SUPPRESS_DEFAULT -40 81 #define ECHO_SUPPRESS_ACTIVE_DEFAULT -15 82 83 #ifndef NULL 84 #define NULL 0 85 #endif 86 87 #define SQR(x) ((x)*(x)) 88 #define SQR16(x) (MULT16_16((x),(x))) 89 #define SQR16_Q15(x) (MULT16_16_Q15((x),(x))) 90 91 #ifdef FIXED_POINT 92 static inline spx_word16_t DIV32_16_Q8(spx_word32_t a, spx_word32_t b) 93 { 94 if (SHR32(a,7) >= b) 95 { 96 return 32767; 97 } else { 98 if (b>=QCONST32(1,23)) 99 { 100 a = SHR32(a,8); 101 b = SHR32(b,8); 102 } 103 if (b>=QCONST32(1,19)) 104 { 105 a = SHR32(a,4); 106 b = SHR32(b,4); 107 } 108 if (b>=QCONST32(1,15)) 109 { 110 a = SHR32(a,4); 111 b = SHR32(b,4); 112 } 113 a = SHL32(a,8); 114 return PDIV32_16(a,b); 115 } 116 117 } 118 static inline spx_word16_t DIV32_16_Q15(spx_word32_t a, spx_word32_t b) 119 { 120 if (SHR32(a,15) >= b) 121 { 122 return 32767; 123 } else { 124 if (b>=QCONST32(1,23)) 125 { 126 a = SHR32(a,8); 127 b = SHR32(b,8); 128 } 129 if (b>=QCONST32(1,19)) 130 { 131 a = SHR32(a,4); 132 b = SHR32(b,4); 133 } 134 if (b>=QCONST32(1,15)) 135 { 136 a = SHR32(a,4); 137 b = SHR32(b,4); 138 } 139 a = SHL32(a,15)-a; 140 return DIV32_16(a,b); 141 } 142 } 143 #define SNR_SCALING 256.f 144 #define SNR_SCALING_1 0.0039062f 145 #define SNR_SHIFT 8 146 147 #define FRAC_SCALING 32767.f 148 #define FRAC_SCALING_1 3.0518e-05 149 #define FRAC_SHIFT 1 150 151 #define EXPIN_SCALING 2048.f 152 #define EXPIN_SCALING_1 0.00048828f 153 #define EXPIN_SHIFT 11 154 #define EXPOUT_SCALING_1 1.5259e-05 155 156 #define NOISE_SHIFT 7 157 158 #else 159 160 #define DIV32_16_Q8(a,b) ((a)/(b)) 161 #define DIV32_16_Q15(a,b) ((a)/(b)) 162 #define SNR_SCALING 1.f 163 #define SNR_SCALING_1 1.f 164 #define SNR_SHIFT 0 165 #define FRAC_SCALING 1.f 166 #define FRAC_SCALING_1 1.f 167 #define FRAC_SHIFT 0 168 #define NOISE_SHIFT 0 169 170 #define EXPIN_SCALING 1.f 171 #define EXPIN_SCALING_1 1.f 172 #define EXPOUT_SCALING_1 1.f 173 174 #endif 175 176 /** Speex pre-processor state. */ 177 struct SpeexPreprocessState_ { 178 /* Basic info */ 179 int frame_size; /**< Number of samples processed each time */ 180 int ps_size; /**< Number of points in the power spectrum */ 181 int sampling_rate; /**< Sampling rate of the input/output */ 182 int nbands; 183 FilterBank *bank; 184 185 /* Parameters */ 186 int denoise_enabled; 187 int vad_enabled; 188 int dereverb_enabled; 189 spx_word16_t reverb_decay; 190 spx_word16_t reverb_level; 191 spx_word16_t speech_prob_start; 192 spx_word16_t speech_prob_continue; 193 int noise_suppress; 194 int echo_suppress; 195 int echo_suppress_active; 196 SpeexEchoState *echo_state; 197 198 spx_word16_t speech_prob; /**< Probability last frame was speech */ 199 200 /* DSP-related arrays */ 201 spx_word16_t *frame; /**< Processing frame (2*ps_size) */ 202 spx_word16_t *ft; /**< Processing frame in freq domain (2*ps_size) */ 203 spx_word32_t *ps; /**< Current power spectrum */ 204 spx_word16_t *gain2; /**< Adjusted gains */ 205 spx_word16_t *gain_floor; /**< Minimum gain allowed */ 206 spx_word16_t *window; /**< Analysis/Synthesis window */ 207 spx_word32_t *noise; /**< Noise estimate */ 208 spx_word32_t *reverb_estimate; /**< Estimate of reverb energy */ 209 spx_word32_t *old_ps; /**< Power spectrum for last frame */ 210 spx_word16_t *gain; /**< Ephraim Malah gain */ 211 spx_word16_t *prior; /**< A-priori SNR */ 212 spx_word16_t *post; /**< A-posteriori SNR */ 213 214 spx_word32_t *S; /**< Smoothed power spectrum */ 215 spx_word32_t *Smin; /**< See Cohen paper */ 216 spx_word32_t *Stmp; /**< See Cohen paper */ 217 int *update_prob; /**< Probability of speech presence for noise update */ 218 219 spx_word16_t *zeta; /**< Smoothed a priori SNR */ 220 spx_word32_t *echo_noise; 221 spx_word32_t *residual_echo; 222 223 /* Misc */ 224 spx_word16_t *inbuf; /**< Input buffer (overlapped analysis) */ 225 spx_word16_t *outbuf; /**< Output buffer (for overlap and add) */ 226 227 /* AGC stuff, only for floating point for now */ 228 #ifndef FIXED_POINT 229 int agc_enabled; 230 float agc_level; 231 float loudness_accum; 232 float *loudness_weight; /**< Perceptual loudness curve */ 233 float loudness; /**< Loudness estimate */ 234 float agc_gain; /**< Current AGC gain */ 235 float max_gain; /**< Maximum gain allowed */ 236 float max_increase_step; /**< Maximum increase in gain from one frame to another */ 237 float max_decrease_step; /**< Maximum decrease in gain from one frame to another */ 238 float prev_loudness; /**< Loudness of previous frame */ 239 float init_max; /**< Current gain limit during initialisation */ 240 #endif 241 int nb_adapt; /**< Number of frames used for adaptation so far */ 242 int was_speech; 243 int min_count; /**< Number of frames processed so far */ 244 void *fft_lookup; /**< Lookup table for the FFT */ 245 #ifdef FIXED_POINT 246 int frame_shift; 247 #endif 248 }; 249 250 251 static void conj_window(spx_word16_t *w, int len) 252 { 253 int i; 254 for (i=0;i<len;i++) 255 { 256 spx_word16_t tmp; 257 #ifdef FIXED_POINT 258 spx_word16_t x = DIV32_16(MULT16_16(32767,i),len); 259 #else 260 spx_word16_t x = DIV32_16(MULT16_16(QCONST16(4.f,13),i),len); 261 #endif 262 int inv=0; 263 if (x<QCONST16(1.f,13)) 264 { 265 } else if (x<QCONST16(2.f,13)) 266 { 267 x=QCONST16(2.f,13)-x; 268 inv=1; 269 } else if (x<QCONST16(3.f,13)) 270 { 271 x=x-QCONST16(2.f,13); 272 inv=1; 273 } else { 274 x=QCONST16(2.f,13)-x+QCONST16(2.f,13); /* 4 - x */ 275 } 276 x = MULT16_16_Q14(QCONST16(1.271903f,14), x); 277 tmp = SQR16_Q15(QCONST16(.5f,15)-MULT16_16_P15(QCONST16(.5f,15),spx_cos_norm(SHL32(EXTEND32(x),2)))); 278 if (inv) 279 tmp=SUB16(Q15_ONE,tmp); 280 w[i]=spx_sqrt(SHL32(EXTEND32(tmp),15)); 281 } 282 } 283 284 285 #ifdef FIXED_POINT 286 /* This function approximates the gain function 287 y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x) 288 which multiplied by xi/(1+xi) is the optimal gain 289 in the loudness domain ( sqrt[amplitude] ) 290 Input in Q11 format, output in Q15 291 */ 292 static inline spx_word32_t hypergeom_gain(spx_word32_t xx) 293 { 294 int ind; 295 spx_word16_t frac; 296 /* Q13 table */ 297 static const spx_word16_t table[21] = { 298 6730, 8357, 9868, 11267, 12563, 13770, 14898, 299 15959, 16961, 17911, 18816, 19682, 20512, 21311, 300 22082, 22827, 23549, 24250, 24931, 25594, 26241}; 301 ind = SHR32(xx,10); 302 if (ind<0) 303 return Q15_ONE; 304 if (ind>19) 305 return ADD32(EXTEND32(Q15_ONE),EXTEND32(DIV32_16(QCONST32(.1296,23), SHR32(xx,EXPIN_SHIFT-SNR_SHIFT)))); 306 frac = SHL32(xx-SHL32(ind,10),5); 307 return SHL32(DIV32_16(PSHR32(MULT16_16(Q15_ONE-frac,table[ind]) + MULT16_16(frac,table[ind+1]),7),(spx_sqrt(SHL32(xx,15)+6711))),7); 308 } 309 310 static inline spx_word16_t qcurve(spx_word16_t x) 311 { 312 x = MAX16(x, 1); 313 return DIV32_16(SHL32(EXTEND32(32767),9),ADD16(512,MULT16_16_Q15(QCONST16(.60f,15),DIV32_16(32767,x)))); 314 } 315 316 /* Compute the gain floor based on different floors for the background noise and residual echo */ 317 static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len) 318 { 319 int i; 320 321 if (noise_suppress > effective_echo_suppress) 322 { 323 spx_word16_t noise_gain, gain_ratio; 324 noise_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),noise_suppress)),1))); 325 gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),effective_echo_suppress-noise_suppress)),1))); 326 327 /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */ 328 for (i=0;i<len;i++) 329 gain_floor[i] = MULT16_16_Q15(noise_gain, 330 spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(PSHR32(noise[i],NOISE_SHIFT) + MULT16_32_Q15(gain_ratio,echo[i]), 331 (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15))); 332 } else { 333 spx_word16_t echo_gain, gain_ratio; 334 echo_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),effective_echo_suppress)),1))); 335 gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),noise_suppress-effective_echo_suppress)),1))); 336 337 /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */ 338 for (i=0;i<len;i++) 339 gain_floor[i] = MULT16_16_Q15(echo_gain, 340 spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(MULT16_32_Q15(gain_ratio,PSHR32(noise[i],NOISE_SHIFT)) + echo[i], 341 (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15))); 342 } 343 } 344 345 #else 346 /* This function approximates the gain function 347 y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x) 348 which multiplied by xi/(1+xi) is the optimal gain 349 in the loudness domain ( sqrt[amplitude] ) 350 */ 351 static inline spx_word32_t hypergeom_gain(spx_word32_t xx) 352 { 353 int ind; 354 float integer, frac; 355 float x; 356 static const float table[21] = { 357 0.82157f, 1.02017f, 1.20461f, 1.37534f, 1.53363f, 1.68092f, 1.81865f, 358 1.94811f, 2.07038f, 2.18638f, 2.29688f, 2.40255f, 2.50391f, 2.60144f, 359 2.69551f, 2.78647f, 2.87458f, 2.96015f, 3.04333f, 3.12431f, 3.20326f}; 360 x = EXPIN_SCALING_1*xx; 361 integer = floor(2*x); 362 ind = (int)integer; 363 if (ind<0) 364 return FRAC_SCALING; 365 if (ind>19) 366 return FRAC_SCALING*(1+.1296/x); 367 frac = 2*x-integer; 368 return FRAC_SCALING*((1-frac)*table[ind] + frac*table[ind+1])/sqrt(x+.0001f); 369 } 370 371 static inline spx_word16_t qcurve(spx_word16_t x) 372 { 373 return 1.f/(1.f+.15f/(SNR_SCALING_1*x)); 374 } 375 376 static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len) 377 { 378 int i; 379 float echo_floor; 380 float noise_floor; 381 382 noise_floor = exp(.2302585f*noise_suppress); 383 echo_floor = exp(.2302585f*effective_echo_suppress); 384 385 /* Compute the gain floor based on different floors for the background noise and residual echo */ 386 for (i=0;i<len;i++) 387 gain_floor[i] = FRAC_SCALING*sqrt(noise_floor*PSHR32(noise[i],NOISE_SHIFT) + echo_floor*echo[i])/sqrt(1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]); 388 } 389 390 #endif 391 EXPORT SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate) 392 { 393 int i; 394 int N, N3, N4, M; 395 396 SpeexPreprocessState *st = (SpeexPreprocessState *)speex_alloc(sizeof(SpeexPreprocessState)); 397 st->frame_size = frame_size; 398 399 /* Round ps_size down to the nearest power of two */ 400 #if 0 401 i=1; 402 st->ps_size = st->frame_size; 403 while(1) 404 { 405 if (st->ps_size & ~i) 406 { 407 st->ps_size &= ~i; 408 i<<=1; 409 } else { 410 break; 411 } 412 } 413 414 415 if (st->ps_size < 3*st->frame_size/4) 416 st->ps_size = st->ps_size * 3 / 2; 417 #else 418 st->ps_size = st->frame_size; 419 #endif 420 421 N = st->ps_size; 422 N3 = 2*N - st->frame_size; 423 N4 = st->frame_size - N3; 424 425 st->sampling_rate = sampling_rate; 426 st->denoise_enabled = 1; 427 st->vad_enabled = 0; 428 st->dereverb_enabled = 0; 429 st->reverb_decay = 0; 430 st->reverb_level = 0; 431 st->noise_suppress = NOISE_SUPPRESS_DEFAULT; 432 st->echo_suppress = ECHO_SUPPRESS_DEFAULT; 433 st->echo_suppress_active = ECHO_SUPPRESS_ACTIVE_DEFAULT; 434 435 st->speech_prob_start = SPEECH_PROB_START_DEFAULT; 436 st->speech_prob_continue = SPEECH_PROB_CONTINUE_DEFAULT; 437 438 st->echo_state = NULL; 439 440 st->nbands = NB_BANDS; 441 M = st->nbands; 442 st->bank = filterbank_new(M, sampling_rate, N, 1); 443 444 st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); 445 st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); 446 st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); 447 448 st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); 449 st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); 450 st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); 451 st->residual_echo = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); 452 st->reverb_estimate = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); 453 st->old_ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); 454 st->prior = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); 455 st->post = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); 456 st->gain = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); 457 st->gain2 = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); 458 st->gain_floor = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); 459 st->zeta = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); 460 461 st->S = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); 462 st->Smin = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); 463 st->Stmp = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); 464 st->update_prob = (int*)speex_alloc(N*sizeof(int)); 465 466 st->inbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t)); 467 st->outbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t)); 468 469 conj_window(st->window, 2*N3); 470 for (i=2*N3;i<2*st->ps_size;i++) 471 st->window[i]=Q15_ONE; 472 473 if (N4>0) 474 { 475 for (i=N3-1;i>=0;i--) 476 { 477 st->window[i+N3+N4]=st->window[i+N3]; 478 st->window[i+N3]=1; 479 } 480 } 481 for (i=0;i<N+M;i++) 482 { 483 st->noise[i]=QCONST32(1.f,NOISE_SHIFT); 484 st->reverb_estimate[i]=0; 485 st->old_ps[i]=1; 486 st->gain[i]=Q15_ONE; 487 st->post[i]=SHL16(1, SNR_SHIFT); 488 st->prior[i]=SHL16(1, SNR_SHIFT); 489 } 490 491 for (i=0;i<N;i++) 492 st->update_prob[i] = 1; 493 for (i=0;i<N3;i++) 494 { 495 st->inbuf[i]=0; 496 st->outbuf[i]=0; 497 } 498 #ifndef FIXED_POINT 499 st->agc_enabled = 0; 500 st->agc_level = 8000; 501 st->loudness_weight = (float*)speex_alloc(N*sizeof(float)); 502 for (i=0;i<N;i++) 503 { 504 float ff=((float)i)*.5*sampling_rate/((float)N); 505 /*st->loudness_weight[i] = .5f*(1.f/(1.f+ff/8000.f))+1.f*exp(-.5f*(ff-3800.f)*(ff-3800.f)/9e5f);*/ 506 st->loudness_weight[i] = .35f-.35f*ff/16000.f+.73f*exp(-.5f*(ff-3800)*(ff-3800)/9e5f); 507 if (st->loudness_weight[i]<.01f) 508 st->loudness_weight[i]=.01f; 509 st->loudness_weight[i] *= st->loudness_weight[i]; 510 } 511 /*st->loudness = pow(AMP_SCALE*st->agc_level,LOUDNESS_EXP);*/ 512 st->loudness = 1e-15; 513 st->agc_gain = 1; 514 st->max_gain = 30; 515 st->max_increase_step = exp(0.11513f * 12.*st->frame_size / st->sampling_rate); 516 st->max_decrease_step = exp(-0.11513f * 40.*st->frame_size / st->sampling_rate); 517 st->prev_loudness = 1; 518 st->init_max = 1; 519 #endif 520 st->was_speech = 0; 521 522 st->fft_lookup = spx_fft_init(2*N); 523 524 st->nb_adapt=0; 525 st->min_count=0; 526 return st; 527 } 528 529 EXPORT void speex_preprocess_state_destroy(SpeexPreprocessState *st) 530 { 531 speex_free(st->frame); 532 speex_free(st->ft); 533 speex_free(st->ps); 534 speex_free(st->gain2); 535 speex_free(st->gain_floor); 536 speex_free(st->window); 537 speex_free(st->noise); 538 speex_free(st->reverb_estimate); 539 speex_free(st->old_ps); 540 speex_free(st->gain); 541 speex_free(st->prior); 542 speex_free(st->post); 543 #ifndef FIXED_POINT 544 speex_free(st->loudness_weight); 545 #endif 546 speex_free(st->echo_noise); 547 speex_free(st->residual_echo); 548 549 speex_free(st->S); 550 speex_free(st->Smin); 551 speex_free(st->Stmp); 552 speex_free(st->update_prob); 553 speex_free(st->zeta); 554 555 speex_free(st->inbuf); 556 speex_free(st->outbuf); 557 558 spx_fft_destroy(st->fft_lookup); 559 filterbank_destroy(st->bank); 560 speex_free(st); 561 } 562 563 /* FIXME: The AGC doesn't work yet with fixed-point*/ 564 #ifndef FIXED_POINT 565 static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx_word16_t *ft) 566 { 567 int i; 568 int N = st->ps_size; 569 float target_gain; 570 float loudness=1.f; 571 float rate; 572 573 for (i=2;i<N;i++) 574 { 575 loudness += 2.f*N*st->ps[i]* st->loudness_weight[i]; 576 } 577 loudness=sqrt(loudness); 578 /*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) && 579 loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/ 580 if (Pframe>.3f) 581 { 582 /*rate=2.0f*Pframe*Pframe/(1+st->nb_loudness_adapt);*/ 583 rate = .03*Pframe*Pframe; 584 st->loudness = (1-rate)*st->loudness + (rate)*pow(AMP_SCALE*loudness, LOUDNESS_EXP); 585 st->loudness_accum = (1-rate)*st->loudness_accum + rate; 586 if (st->init_max < st->max_gain && st->nb_adapt > 20) 587 st->init_max *= 1.f + .1f*Pframe*Pframe; 588 } 589 /*printf ("%f %f %f %f\n", Pframe, loudness, pow(st->loudness, 1.0f/LOUDNESS_EXP), st->loudness2);*/ 590 591 target_gain = AMP_SCALE*st->agc_level*pow(st->loudness/(1e-4+st->loudness_accum), -1.0f/LOUDNESS_EXP); 592 593 if ((Pframe>.5 && st->nb_adapt > 20) || target_gain < st->agc_gain) 594 { 595 if (target_gain > st->max_increase_step*st->agc_gain) 596 target_gain = st->max_increase_step*st->agc_gain; 597 if (target_gain < st->max_decrease_step*st->agc_gain && loudness < 10*st->prev_loudness) 598 target_gain = st->max_decrease_step*st->agc_gain; 599 if (target_gain > st->max_gain) 600 target_gain = st->max_gain; 601 if (target_gain > st->init_max) 602 target_gain = st->init_max; 603 604 st->agc_gain = target_gain; 605 } 606 /*fprintf (stderr, "%f %f %f\n", loudness, (float)AMP_SCALE_1*pow(st->loudness, 1.0f/LOUDNESS_EXP), st->agc_gain);*/ 607 608 for (i=0;i<2*N;i++) 609 ft[i] *= st->agc_gain; 610 st->prev_loudness = loudness; 611 } 612 #endif 613 614 static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x) 615 { 616 int i; 617 int N = st->ps_size; 618 int N3 = 2*N - st->frame_size; 619 int N4 = st->frame_size - N3; 620 spx_word32_t *ps=st->ps; 621 622 /* 'Build' input frame */ 623 for (i=0;i<N3;i++) 624 st->frame[i]=st->inbuf[i]; 625 for (i=0;i<st->frame_size;i++) 626 st->frame[N3+i]=x[i]; 627 628 /* Update inbuf */ 629 for (i=0;i<N3;i++) 630 st->inbuf[i]=x[N4+i]; 631 632 /* Windowing */ 633 for (i=0;i<2*N;i++) 634 st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]); 635 636 #ifdef FIXED_POINT 637 { 638 spx_word16_t max_val=0; 639 for (i=0;i<2*N;i++) 640 max_val = MAX16(max_val, ABS16(st->frame[i])); 641 st->frame_shift = 14-spx_ilog2(EXTEND32(max_val)); 642 for (i=0;i<2*N;i++) 643 st->frame[i] = SHL16(st->frame[i], st->frame_shift); 644 } 645 #endif 646 647 /* Perform FFT */ 648 spx_fft(st->fft_lookup, st->frame, st->ft); 649 650 /* Power spectrum */ 651 ps[0]=MULT16_16(st->ft[0],st->ft[0]); 652 for (i=1;i<N;i++) 653 ps[i]=MULT16_16(st->ft[2*i-1],st->ft[2*i-1]) + MULT16_16(st->ft[2*i],st->ft[2*i]); 654 for (i=0;i<N;i++) 655 st->ps[i] = PSHR32(st->ps[i], 2*st->frame_shift); 656 657 filterbank_compute_bank32(st->bank, ps, ps+N); 658 } 659 660 static void update_noise_prob(SpeexPreprocessState *st) 661 { 662 int i; 663 int min_range; 664 int N = st->ps_size; 665 666 for (i=1;i<N-1;i++) 667 st->S[i] = MULT16_32_Q15(QCONST16(.8f,15),st->S[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i-1]) 668 + MULT16_32_Q15(QCONST16(.1f,15),st->ps[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i+1]); 669 st->S[0] = MULT16_32_Q15(QCONST16(.8f,15),st->S[0]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[0]); 670 st->S[N-1] = MULT16_32_Q15(QCONST16(.8f,15),st->S[N-1]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[N-1]); 671 672 if (st->nb_adapt==1) 673 { 674 for (i=0;i<N;i++) 675 st->Smin[i] = st->Stmp[i] = 0; 676 } 677 678 if (st->nb_adapt < 100) 679 min_range = 15; 680 else if (st->nb_adapt < 1000) 681 min_range = 50; 682 else if (st->nb_adapt < 10000) 683 min_range = 150; 684 else 685 min_range = 300; 686 if (st->min_count > min_range) 687 { 688 st->min_count = 0; 689 for (i=0;i<N;i++) 690 { 691 st->Smin[i] = MIN32(st->Stmp[i], st->S[i]); 692 st->Stmp[i] = st->S[i]; 693 } 694 } else { 695 for (i=0;i<N;i++) 696 { 697 st->Smin[i] = MIN32(st->Smin[i], st->S[i]); 698 st->Stmp[i] = MIN32(st->Stmp[i], st->S[i]); 699 } 700 } 701 for (i=0;i<N;i++) 702 { 703 if (MULT16_32_Q15(QCONST16(.4f,15),st->S[i]) > st->Smin[i]) 704 st->update_prob[i] = 1; 705 else 706 st->update_prob[i] = 0; 707 /*fprintf (stderr, "%f ", st->S[i]/st->Smin[i]);*/ 708 /*fprintf (stderr, "%f ", st->update_prob[i]);*/ 709 } 710 711 } 712 713 #define NOISE_OVERCOMPENS 1. 714 715 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len); 716 717 EXPORT int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo) 718 { 719 return speex_preprocess_run(st, x); 720 } 721 722 EXPORT int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) 723 { 724 int i; 725 int M; 726 int N = st->ps_size; 727 int N3 = 2*N - st->frame_size; 728 int N4 = st->frame_size - N3; 729 spx_word32_t *ps=st->ps; 730 spx_word32_t Zframe; 731 spx_word16_t Pframe; 732 spx_word16_t beta, beta_1; 733 spx_word16_t effective_echo_suppress; 734 735 st->nb_adapt++; 736 if (st->nb_adapt>20000) 737 st->nb_adapt = 20000; 738 st->min_count++; 739 740 beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt)); 741 beta_1 = Q15_ONE-beta; 742 M = st->nbands; 743 /* Deal with residual echo if provided */ 744 if (st->echo_state) 745 { 746 speex_echo_get_residual(st->echo_state, st->residual_echo, N); 747 #ifndef FIXED_POINT 748 /* If there are NaNs or ridiculous values, it'll show up in the DC and we just reset everything to zero */ 749 if (!(st->residual_echo[0] >=0 && st->residual_echo[0]<N*1e9f)) 750 { 751 for (i=0;i<N;i++) 752 st->residual_echo[i] = 0; 753 } 754 #endif 755 for (i=0;i<N;i++) 756 st->echo_noise[i] = MAX32(MULT16_32_Q15(QCONST16(.6f,15),st->echo_noise[i]), st->residual_echo[i]); 757 filterbank_compute_bank32(st->bank, st->echo_noise, st->echo_noise+N); 758 } else { 759 for (i=0;i<N+M;i++) 760 st->echo_noise[i] = 0; 761 } 762 preprocess_analysis(st, x); 763 764 update_noise_prob(st); 765 766 /* Noise estimation always updated for the 10 first frames */ 767 /*if (st->nb_adapt<10) 768 { 769 for (i=1;i<N-1;i++) 770 st->update_prob[i] = 0; 771 } 772 */ 773 774 /* Update the noise estimate for the frequencies where it can be */ 775 for (i=0;i<N;i++) 776 { 777 if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i], NOISE_SHIFT)) 778 st->noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,st->noise[i]) + MULT16_32_Q15(beta,SHL32(st->ps[i],NOISE_SHIFT))); 779 } 780 filterbank_compute_bank32(st->bank, st->noise, st->noise+N); 781 782 /* Special case for first frame */ 783 if (st->nb_adapt==1) 784 for (i=0;i<N+M;i++) 785 st->old_ps[i] = ps[i]; 786 787 /* Compute a posteriori SNR */ 788 for (i=0;i<N+M;i++) 789 { 790 spx_word16_t gamma; 791 792 /* Total noise estimate including residual echo and reverberation */ 793 spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]); 794 795 /* A posteriori SNR = ps/noise - 1*/ 796 st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT)); 797 st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT)); 798 799 /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */ 800 gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise)))); 801 802 /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */ 803 st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15)); 804 st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT)); 805 } 806 807 /*print_vec(st->post, N+M, "");*/ 808 809 /* Recursive average of the a priori SNR. A bit smoothed for the psd components */ 810 st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15); 811 for (i=1;i<N-1;i++) 812 st->zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.15f,15),st->prior[i])), 813 MULT16_16(QCONST16(.075f,15),st->prior[i-1])), MULT16_16(QCONST16(.075f,15),st->prior[i+1])),15); 814 for (i=N-1;i<N+M;i++) 815 st->zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.3f,15),st->prior[i])),15); 816 817 /* Speech probability of presence for the entire frame is based on the average filterbank a priori SNR */ 818 Zframe = 0; 819 for (i=N;i<N+M;i++) 820 Zframe = ADD32(Zframe, EXTEND32(st->zeta[i])); 821 Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands))); 822 823 effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15)); 824 825 compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M); 826 827 /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale) 828 Technically this is actually wrong because the EM gaim assumes a slightly different probability 829 distribution */ 830 for (i=N;i<N+M;i++) 831 { 832 /* See EM and Cohen papers*/ 833 spx_word32_t theta; 834 /* Gain from hypergeometric function */ 835 spx_word32_t MM; 836 /* Weiner filter gain */ 837 spx_word16_t prior_ratio; 838 /* a priority probability of speech presence based on Bark sub-band alone */ 839 spx_word16_t P1; 840 /* Speech absence a priori probability (considering sub-band and frame) */ 841 spx_word16_t q; 842 #ifdef FIXED_POINT 843 spx_word16_t tmp; 844 #endif 845 846 prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT))); 847 theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT)); 848 849 MM = hypergeom_gain(theta); 850 /* Gain with bound */ 851 st->gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM))); 852 /* Save old Bark power spectrum */ 853 st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]); 854 855 P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (st->zeta[i])); 856 q = Q15_ONE-MULT16_16_Q15(Pframe,P1); 857 #ifdef FIXED_POINT 858 theta = MIN32(theta, EXTEND32(32767)); 859 /*Q8*/tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+st->prior[i]),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1)))); 860 tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp); /* Prevent overflows in the next line*/ 861 /*Q8*/tmp = EXTRACT16(PSHR32(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8)); 862 st->gain2[i]=DIV32_16(SHL32(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp)); 863 #else 864 st->gain2[i]=1/(1.f + (q/(1.f-q))*(1+st->prior[i])*exp(-theta)); 865 #endif 866 } 867 /* Convert the EM gains and speech prob to linear frequency */ 868 filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2); 869 filterbank_compute_psd16(st->bank,st->gain+N, st->gain); 870 871 /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */ 872 if (1) 873 { 874 filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor); 875 876 /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */ 877 for (i=0;i<N;i++) 878 { 879 spx_word32_t MM; 880 spx_word32_t theta; 881 spx_word16_t prior_ratio; 882 spx_word16_t tmp; 883 spx_word16_t p; 884 spx_word16_t g; 885 886 /* Wiener filter gain */ 887 prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT))); 888 theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT)); 889 890 /* Optimal estimator for loudness domain */ 891 MM = hypergeom_gain(theta); 892 /* EM gain with bound */ 893 g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM))); 894 /* Interpolated speech probability of presence */ 895 p = st->gain2[i]; 896 897 /* Constrain the gain to be close to the Bark scale gain */ 898 if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i]) 899 g = MULT16_16(3,st->gain[i]); 900 st->gain[i] = g; 901 902 /* Save old power spectrum */ 903 st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]); 904 905 /* Apply gain floor */ 906 if (st->gain[i] < st->gain_floor[i]) 907 st->gain[i] = st->gain_floor[i]; 908 909 /* Exponential decay model for reverberation (unused) */ 910 /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/ 911 912 /* Take into account speech probability of presence (loudness domain MMSE estimator) */ 913 /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */ 914 tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15))); 915 st->gain2[i]=SQR16_Q15(tmp); 916 917 /* Use this if you want a log-domain MMSE estimator instead */ 918 /*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/ 919 } 920 } else { 921 for (i=N;i<N+M;i++) 922 { 923 spx_word16_t tmp; 924 spx_word16_t p = st->gain2[i]; 925 st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]); 926 tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15))); 927 st->gain2[i]=SQR16_Q15(tmp); 928 } 929 filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2); 930 } 931 932 /* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */ 933 if (!st->denoise_enabled) 934 { 935 for (i=0;i<N+M;i++) 936 st->gain2[i]=Q15_ONE; 937 } 938 939 /* Apply computed gain */ 940 for (i=1;i<N;i++) 941 { 942 st->ft[2*i-1] = MULT16_16_P15(st->gain2[i],st->ft[2*i-1]); 943 st->ft[2*i] = MULT16_16_P15(st->gain2[i],st->ft[2*i]); 944 } 945 st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]); 946 st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]); 947 948 /*FIXME: This *will* not work for fixed-point */ 949 #ifndef FIXED_POINT 950 if (st->agc_enabled) 951 speex_compute_agc(st, Pframe, st->ft); 952 #endif 953 954 /* Inverse FFT with 1/N scaling */ 955 spx_ifft(st->fft_lookup, st->ft, st->frame); 956 /* Scale back to original (lower) amplitude */ 957 for (i=0;i<2*N;i++) 958 st->frame[i] = PSHR16(st->frame[i], st->frame_shift); 959 960 /*FIXME: This *will* not work for fixed-point */ 961 #ifndef FIXED_POINT 962 if (st->agc_enabled) 963 { 964 float max_sample=0; 965 for (i=0;i<2*N;i++) 966 if (fabs(st->frame[i])>max_sample) 967 max_sample = fabs(st->frame[i]); 968 if (max_sample>28000.f) 969 { 970 float damp = 28000.f/max_sample; 971 for (i=0;i<2*N;i++) 972 st->frame[i] *= damp; 973 } 974 } 975 #endif 976 977 /* Synthesis window (for WOLA) */ 978 for (i=0;i<2*N;i++) 979 st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]); 980 981 /* Perform overlap and add */ 982 for (i=0;i<N3;i++) 983 x[i] = st->outbuf[i] + st->frame[i]; 984 for (i=0;i<N4;i++) 985 x[N3+i] = st->frame[N3+i]; 986 987 /* Update outbuf */ 988 for (i=0;i<N3;i++) 989 st->outbuf[i] = st->frame[st->frame_size+i]; 990 991 /* FIXME: This VAD is a kludge */ 992 st->speech_prob = Pframe; 993 if (st->vad_enabled) 994 { 995 if (st->speech_prob > st->speech_prob_start || (st->was_speech && st->speech_prob > st->speech_prob_continue)) 996 { 997 st->was_speech=1; 998 return 1; 999 } else 1000 { 1001 st->was_speech=0; 1002 return 0; 1003 } 1004 } else { 1005 return 1; 1006 } 1007 } 1008 1009 EXPORT void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x) 1010 { 1011 int i; 1012 int N = st->ps_size; 1013 int N3 = 2*N - st->frame_size; 1014 int M; 1015 spx_word32_t *ps=st->ps; 1016 1017 M = st->nbands; 1018 st->min_count++; 1019 1020 preprocess_analysis(st, x); 1021 1022 update_noise_prob(st); 1023 1024 for (i=1;i<N-1;i++) 1025 { 1026 if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT)) 1027 { 1028 st->noise[i] = MULT16_32_Q15(QCONST16(.95f,15),st->noise[i]) + MULT16_32_Q15(QCONST16(.05f,15),SHL32(st->ps[i],NOISE_SHIFT)); 1029 } 1030 } 1031 1032 for (i=0;i<N3;i++) 1033 st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]); 1034 1035 /* Save old power spectrum */ 1036 for (i=0;i<N+M;i++) 1037 st->old_ps[i] = ps[i]; 1038 1039 for (i=0;i<N;i++) 1040 st->reverb_estimate[i] = MULT16_32_Q15(st->reverb_decay, st->reverb_estimate[i]); 1041 } 1042 1043 1044 EXPORT int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr) 1045 { 1046 int i; 1047 SpeexPreprocessState *st; 1048 st=(SpeexPreprocessState*)state; 1049 switch(request) 1050 { 1051 case SPEEX_PREPROCESS_SET_DENOISE: 1052 st->denoise_enabled = (*(spx_int32_t*)ptr); 1053 break; 1054 case SPEEX_PREPROCESS_GET_DENOISE: 1055 (*(spx_int32_t*)ptr) = st->denoise_enabled; 1056 break; 1057 #ifndef FIXED_POINT 1058 case SPEEX_PREPROCESS_SET_AGC: 1059 st->agc_enabled = (*(spx_int32_t*)ptr); 1060 break; 1061 case SPEEX_PREPROCESS_GET_AGC: 1062 (*(spx_int32_t*)ptr) = st->agc_enabled; 1063 break; 1064 #ifndef DISABLE_FLOAT_API 1065 case SPEEX_PREPROCESS_SET_AGC_LEVEL: 1066 st->agc_level = (*(float*)ptr); 1067 if (st->agc_level<1) 1068 st->agc_level=1; 1069 if (st->agc_level>32768) 1070 st->agc_level=32768; 1071 break; 1072 case SPEEX_PREPROCESS_GET_AGC_LEVEL: 1073 (*(float*)ptr) = st->agc_level; 1074 break; 1075 #endif /* #ifndef DISABLE_FLOAT_API */ 1076 case SPEEX_PREPROCESS_SET_AGC_INCREMENT: 1077 st->max_increase_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate); 1078 break; 1079 case SPEEX_PREPROCESS_GET_AGC_INCREMENT: 1080 (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_increase_step)*st->sampling_rate/st->frame_size); 1081 break; 1082 case SPEEX_PREPROCESS_SET_AGC_DECREMENT: 1083 st->max_decrease_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate); 1084 break; 1085 case SPEEX_PREPROCESS_GET_AGC_DECREMENT: 1086 (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_decrease_step)*st->sampling_rate/st->frame_size); 1087 break; 1088 case SPEEX_PREPROCESS_SET_AGC_MAX_GAIN: 1089 st->max_gain = exp(0.11513f * (*(spx_int32_t*)ptr)); 1090 break; 1091 case SPEEX_PREPROCESS_GET_AGC_MAX_GAIN: 1092 (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_gain)); 1093 break; 1094 #endif 1095 case SPEEX_PREPROCESS_SET_VAD: 1096 speex_warning("The VAD has been replaced by a hack pending a complete rewrite"); 1097 st->vad_enabled = (*(spx_int32_t*)ptr); 1098 break; 1099 case SPEEX_PREPROCESS_GET_VAD: 1100 (*(spx_int32_t*)ptr) = st->vad_enabled; 1101 break; 1102 1103 case SPEEX_PREPROCESS_SET_DEREVERB: 1104 st->dereverb_enabled = (*(spx_int32_t*)ptr); 1105 for (i=0;i<st->ps_size;i++) 1106 st->reverb_estimate[i]=0; 1107 break; 1108 case SPEEX_PREPROCESS_GET_DEREVERB: 1109 (*(spx_int32_t*)ptr) = st->dereverb_enabled; 1110 break; 1111 1112 case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL: 1113 /* FIXME: Re-enable when de-reverberation is actually enabled again */ 1114 /*st->reverb_level = (*(float*)ptr);*/ 1115 break; 1116 case SPEEX_PREPROCESS_GET_DEREVERB_LEVEL: 1117 /* FIXME: Re-enable when de-reverberation is actually enabled again */ 1118 /*(*(float*)ptr) = st->reverb_level;*/ 1119 break; 1120 1121 case SPEEX_PREPROCESS_SET_DEREVERB_DECAY: 1122 /* FIXME: Re-enable when de-reverberation is actually enabled again */ 1123 /*st->reverb_decay = (*(float*)ptr);*/ 1124 break; 1125 case SPEEX_PREPROCESS_GET_DEREVERB_DECAY: 1126 /* FIXME: Re-enable when de-reverberation is actually enabled again */ 1127 /*(*(float*)ptr) = st->reverb_decay;*/ 1128 break; 1129 1130 case SPEEX_PREPROCESS_SET_PROB_START: 1131 *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr)); 1132 st->speech_prob_start = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100); 1133 break; 1134 case SPEEX_PREPROCESS_GET_PROB_START: 1135 (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_start, 100); 1136 break; 1137 1138 case SPEEX_PREPROCESS_SET_PROB_CONTINUE: 1139 *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr)); 1140 st->speech_prob_continue = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100); 1141 break; 1142 case SPEEX_PREPROCESS_GET_PROB_CONTINUE: 1143 (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_continue, 100); 1144 break; 1145 1146 case SPEEX_PREPROCESS_SET_NOISE_SUPPRESS: 1147 st->noise_suppress = -ABS(*(spx_int32_t*)ptr); 1148 break; 1149 case SPEEX_PREPROCESS_GET_NOISE_SUPPRESS: 1150 (*(spx_int32_t*)ptr) = st->noise_suppress; 1151 break; 1152 case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS: 1153 st->echo_suppress = -ABS(*(spx_int32_t*)ptr); 1154 break; 1155 case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS: 1156 (*(spx_int32_t*)ptr) = st->echo_suppress; 1157 break; 1158 case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE: 1159 st->echo_suppress_active = -ABS(*(spx_int32_t*)ptr); 1160 break; 1161 case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE: 1162 (*(spx_int32_t*)ptr) = st->echo_suppress_active; 1163 break; 1164 case SPEEX_PREPROCESS_SET_ECHO_STATE: 1165 st->echo_state = (SpeexEchoState*)ptr; 1166 break; 1167 case SPEEX_PREPROCESS_GET_ECHO_STATE: 1168 (*(SpeexEchoState**)ptr) = (SpeexEchoState*)st->echo_state; 1169 break; 1170 #ifndef FIXED_POINT 1171 case SPEEX_PREPROCESS_GET_AGC_LOUDNESS: 1172 (*(spx_int32_t*)ptr) = pow(st->loudness, 1.0/LOUDNESS_EXP); 1173 break; 1174 case SPEEX_PREPROCESS_GET_AGC_GAIN: 1175 (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->agc_gain)); 1176 break; 1177 #endif 1178 case SPEEX_PREPROCESS_GET_PSD_SIZE: 1179 case SPEEX_PREPROCESS_GET_NOISE_PSD_SIZE: 1180 (*(spx_int32_t*)ptr) = st->ps_size; 1181 break; 1182 case SPEEX_PREPROCESS_GET_PSD: 1183 for(i=0;i<st->ps_size;i++) 1184 ((spx_int32_t *)ptr)[i] = (spx_int32_t) st->ps[i]; 1185 break; 1186 case SPEEX_PREPROCESS_GET_NOISE_PSD: 1187 for(i=0;i<st->ps_size;i++) 1188 ((spx_int32_t *)ptr)[i] = (spx_int32_t) PSHR32(st->noise[i], NOISE_SHIFT); 1189 break; 1190 case SPEEX_PREPROCESS_GET_PROB: 1191 (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob, 100); 1192 break; 1193 #ifndef FIXED_POINT 1194 case SPEEX_PREPROCESS_SET_AGC_TARGET: 1195 st->agc_level = (*(spx_int32_t*)ptr); 1196 if (st->agc_level<1) 1197 st->agc_level=1; 1198 if (st->agc_level>32768) 1199 st->agc_level=32768; 1200 break; 1201 case SPEEX_PREPROCESS_GET_AGC_TARGET: 1202 (*(spx_int32_t*)ptr) = st->agc_level; 1203 break; 1204 #endif 1205 default: 1206 speex_warning_int("Unknown speex_preprocess_ctl request: ", request); 1207 return -1; 1208 } 1209 return 0; 1210 } 1211 1212 #ifdef FIXED_DEBUG 1213 long long spx_mips=0; 1214 #endif 1215 1216