1 /* Copyright (C) 2003 Epic Games (written by Jean-Marc Valin) 2 Copyright (C) 2004-2006 Epic Games 3 4 File: preprocess.c 5 Preprocessor with denoising based on the algorithm by Ephraim and Malah 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are 9 met: 10 11 1. Redistributions of source code must retain the above copyright notice, 12 this list of conditions and the following disclaimer. 13 14 2. Redistributions in binary form must reproduce the above copyright 15 notice, this list of conditions and the following disclaimer in the 16 documentation and/or other materials provided with the distribution. 17 18 3. The name of the author may not be used to endorse or promote products 19 derived from this software without specific prior written permission. 20 21 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 25 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 30 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 35 /* 36 Recommended papers: 37 38 Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error 39 short-time spectral amplitude estimator". IEEE Transactions on Acoustics, 40 Speech and Signal Processing, vol. ASSP-32, no. 6, pp. 1109-1121, 1984. 41 42 Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error 43 log-spectral amplitude estimator". IEEE Transactions on Acoustics, Speech and 44 Signal Processing, vol. ASSP-33, no. 2, pp. 443-445, 1985. 45 46 I. Cohen and B. Berdugo, "Speech enhancement for non-stationary noise environments". 47 Signal Processing, vol. 81, no. 2, pp. 2403-2418, 2001. 48 49 Stefan Gustafsson, Rainer Martin, Peter Jax, and Peter Vary. "A psychoacoustic 50 approach to combined acoustic echo cancellation and noise reduction". IEEE 51 Transactions on Speech and Audio Processing, 2002. 52 53 J.-M. Valin, J. Rouat, and F. Michaud, "Microphone array post-filter for separation 54 of simultaneous non-stationary sources". In Proceedings IEEE International 55 Conference on Acoustics, Speech, and Signal Processing, 2004. 56 */ 57 58 #ifdef HAVE_CONFIG_H 59 #include "config.h" 60 #endif 61 62 #include <math.h> 63 #include "speex/speex_preprocess.h" 64 #include "speex/speex_echo.h" 65 #include "arch.h" 66 #include "fftwrap.h" 67 #include "filterbank.h" 68 #include "math_approx.h" 69 #include "os_support.h" 70 71 #ifndef M_PI 72 #define M_PI 3.14159263 73 #endif 74 75 #define LOUDNESS_EXP 5.f 76 #define AMP_SCALE .001f 77 #define AMP_SCALE_1 1000.f 78 79 #define NB_BANDS 24 80 81 #define SPEECH_PROB_START_DEFAULT QCONST16(0.35f,15) 82 #define SPEECH_PROB_CONTINUE_DEFAULT QCONST16(0.20f,15) 83 #define NOISE_SUPPRESS_DEFAULT -15 84 #define ECHO_SUPPRESS_DEFAULT -40 85 #define ECHO_SUPPRESS_ACTIVE_DEFAULT -15 86 87 #ifndef NULL 88 #define NULL 0 89 #endif 90 91 #define SQR(x) ((x)*(x)) 92 #define SQR16(x) (MULT16_16((x),(x))) 93 #define SQR16_Q15(x) (MULT16_16_Q15((x),(x))) 94 95 #ifdef FIXED_POINT 96 static inline spx_word16_t DIV32_16_Q8(spx_word32_t a, spx_word32_t b) 97 { 98 if (SHR32(a,7) >= b) 99 { 100 return 32767; 101 } else { 102 if (b>=QCONST32(1,23)) 103 { 104 a = SHR32(a,8); 105 b = SHR32(b,8); 106 } 107 if (b>=QCONST32(1,19)) 108 { 109 a = SHR32(a,4); 110 b = SHR32(b,4); 111 } 112 if (b>=QCONST32(1,15)) 113 { 114 a = SHR32(a,4); 115 b = SHR32(b,4); 116 } 117 a = SHL32(a,8); 118 return PDIV32_16(a,b); 119 } 120 121 } 122 static inline spx_word16_t DIV32_16_Q15(spx_word32_t a, spx_word32_t b) 123 { 124 if (SHR32(a,15) >= b) 125 { 126 return 32767; 127 } else { 128 if (b>=QCONST32(1,23)) 129 { 130 a = SHR32(a,8); 131 b = SHR32(b,8); 132 } 133 if (b>=QCONST32(1,19)) 134 { 135 a = SHR32(a,4); 136 b = SHR32(b,4); 137 } 138 if (b>=QCONST32(1,15)) 139 { 140 a = SHR32(a,4); 141 b = SHR32(b,4); 142 } 143 a = SHL32(a,15)-a; 144 return DIV32_16(a,b); 145 } 146 } 147 #define SNR_SCALING 256.f 148 #define SNR_SCALING_1 0.0039062f 149 #define SNR_SHIFT 8 150 151 #define FRAC_SCALING 32767.f 152 #define FRAC_SCALING_1 3.0518e-05 153 #define FRAC_SHIFT 1 154 155 #define EXPIN_SCALING 2048.f 156 #define EXPIN_SCALING_1 0.00048828f 157 #define EXPIN_SHIFT 11 158 #define EXPOUT_SCALING_1 1.5259e-05 159 160 #define NOISE_SHIFT 7 161 162 #else 163 164 #define DIV32_16_Q8(a,b) ((a)/(b)) 165 #define DIV32_16_Q15(a,b) ((a)/(b)) 166 #define SNR_SCALING 1.f 167 #define SNR_SCALING_1 1.f 168 #define SNR_SHIFT 0 169 #define FRAC_SCALING 1.f 170 #define FRAC_SCALING_1 1.f 171 #define FRAC_SHIFT 0 172 #define NOISE_SHIFT 0 173 174 #define EXPIN_SCALING 1.f 175 #define EXPIN_SCALING_1 1.f 176 #define EXPOUT_SCALING_1 1.f 177 178 #endif 179 180 /** Speex pre-processor state. */ 181 struct SpeexPreprocessState_ { 182 /* Basic info */ 183 int frame_size; /**< Number of samples processed each time */ 184 int ps_size; /**< Number of points in the power spectrum */ 185 int sampling_rate; /**< Sampling rate of the input/output */ 186 int nbands; 187 FilterBank *bank; 188 189 /* Parameters */ 190 int denoise_enabled; 191 int vad_enabled; 192 int dereverb_enabled; 193 spx_word16_t reverb_decay; 194 spx_word16_t reverb_level; 195 spx_word16_t speech_prob_start; 196 spx_word16_t speech_prob_continue; 197 int noise_suppress; 198 int echo_suppress; 199 int echo_suppress_active; 200 SpeexEchoState *echo_state; 201 202 spx_word16_t speech_prob; /**< Probability last frame was speech */ 203 204 /* DSP-related arrays */ 205 spx_word16_t *frame; /**< Processing frame (2*ps_size) */ 206 spx_word16_t *ft; /**< Processing frame in freq domain (2*ps_size) */ 207 spx_word32_t *ps; /**< Current power spectrum */ 208 spx_word16_t *gain2; /**< Adjusted gains */ 209 spx_word16_t *gain_floor; /**< Minimum gain allowed */ 210 spx_word16_t *window; /**< Analysis/Synthesis window */ 211 spx_word32_t *noise; /**< Noise estimate */ 212 spx_word32_t *reverb_estimate; /**< Estimate of reverb energy */ 213 spx_word32_t *old_ps; /**< Power spectrum for last frame */ 214 spx_word16_t *gain; /**< Ephraim Malah gain */ 215 spx_word16_t *prior; /**< A-priori SNR */ 216 spx_word16_t *post; /**< A-posteriori SNR */ 217 218 spx_word32_t *S; /**< Smoothed power spectrum */ 219 spx_word32_t *Smin; /**< See Cohen paper */ 220 spx_word32_t *Stmp; /**< See Cohen paper */ 221 int *update_prob; /**< Probability of speech presence for noise update */ 222 223 spx_word16_t *zeta; /**< Smoothed a priori SNR */ 224 spx_word32_t *echo_noise; 225 spx_word32_t *residual_echo; 226 227 /* Misc */ 228 spx_word16_t *inbuf; /**< Input buffer (overlapped analysis) */ 229 spx_word16_t *outbuf; /**< Output buffer (for overlap and add) */ 230 231 /* AGC stuff, only for floating point for now */ 232 #ifndef FIXED_POINT 233 int agc_enabled; 234 float agc_level; 235 float loudness_accum; 236 float *loudness_weight; /**< Perceptual loudness curve */ 237 float loudness; /**< Loudness estimate */ 238 float agc_gain; /**< Current AGC gain */ 239 float max_gain; /**< Maximum gain allowed */ 240 float max_increase_step; /**< Maximum increase in gain from one frame to another */ 241 float max_decrease_step; /**< Maximum decrease in gain from one frame to another */ 242 float prev_loudness; /**< Loudness of previous frame */ 243 float init_max; /**< Current gain limit during initialisation */ 244 #endif 245 int nb_adapt; /**< Number of frames used for adaptation so far */ 246 int was_speech; 247 int min_count; /**< Number of frames processed so far */ 248 void *fft_lookup; /**< Lookup table for the FFT */ 249 #ifdef FIXED_POINT 250 int frame_shift; 251 #endif 252 }; 253 254 255 static void conj_window(spx_word16_t *w, int len) 256 { 257 int i; 258 for (i=0;i<len;i++) 259 { 260 spx_word16_t tmp; 261 #ifdef FIXED_POINT 262 spx_word16_t x = DIV32_16(MULT16_16(32767,i),len); 263 #else 264 spx_word16_t x = DIV32_16(MULT16_16(QCONST16(4.f,13),i),len); 265 #endif 266 int inv=0; 267 if (x<QCONST16(1.f,13)) 268 { 269 } else if (x<QCONST16(2.f,13)) 270 { 271 x=QCONST16(2.f,13)-x; 272 inv=1; 273 } else if (x<QCONST16(3.f,13)) 274 { 275 x=x-QCONST16(2.f,13); 276 inv=1; 277 } else { 278 x=QCONST16(2.f,13)-x+QCONST16(2.f,13); /* 4 - x */ 279 } 280 x = MULT16_16_Q14(QCONST16(1.271903f,14), x); 281 tmp = SQR16_Q15(QCONST16(.5f,15)-MULT16_16_P15(QCONST16(.5f,15),spx_cos_norm(SHL32(EXTEND32(x),2)))); 282 if (inv) 283 tmp=SUB16(Q15_ONE,tmp); 284 w[i]=spx_sqrt(SHL32(EXTEND32(tmp),15)); 285 } 286 } 287 288 289 #ifdef FIXED_POINT 290 /* This function approximates the gain function 291 y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x) 292 which multiplied by xi/(1+xi) is the optimal gain 293 in the loudness domain ( sqrt[amplitude] ) 294 Input in Q11 format, output in Q15 295 */ 296 static inline spx_word32_t hypergeom_gain(spx_word32_t xx) 297 { 298 int ind; 299 spx_word16_t frac; 300 /* Q13 table */ 301 static const spx_word16_t table[21] = { 302 6730, 8357, 9868, 11267, 12563, 13770, 14898, 303 15959, 16961, 17911, 18816, 19682, 20512, 21311, 304 22082, 22827, 23549, 24250, 24931, 25594, 26241}; 305 ind = SHR32(xx,10); 306 if (ind<0) 307 return Q15_ONE; 308 if (ind>19) 309 return ADD32(EXTEND32(Q15_ONE),EXTEND32(DIV32_16(QCONST32(.1296,23), SHR32(xx,EXPIN_SHIFT-SNR_SHIFT)))); 310 frac = SHL32(xx-SHL32(ind,10),5); 311 return SHL32(DIV32_16(PSHR32(MULT16_16(Q15_ONE-frac,table[ind]) + MULT16_16(frac,table[ind+1]),7),(spx_sqrt(SHL32(xx,15)+6711))),7); 312 } 313 314 static inline spx_word16_t qcurve(spx_word16_t x) 315 { 316 x = MAX16(x, 1); 317 return DIV32_16(SHL32(EXTEND32(32767),9),ADD16(512,MULT16_16_Q15(QCONST16(.60f,15),DIV32_16(32767,x)))); 318 } 319 320 /* Compute the gain floor based on different floors for the background noise and residual echo */ 321 static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len) 322 { 323 int i; 324 325 if (noise_suppress > effective_echo_suppress) 326 { 327 spx_word16_t noise_gain, gain_ratio; 328 noise_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),noise_suppress)),1))); 329 gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),effective_echo_suppress-noise_suppress)),1))); 330 331 /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */ 332 for (i=0;i<len;i++) 333 gain_floor[i] = MULT16_16_Q15(noise_gain, 334 spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(PSHR32(noise[i],NOISE_SHIFT) + MULT16_32_Q15(gain_ratio,echo[i]), 335 (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15))); 336 } else { 337 spx_word16_t echo_gain, gain_ratio; 338 echo_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),effective_echo_suppress)),1))); 339 gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),noise_suppress-effective_echo_suppress)),1))); 340 341 /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */ 342 for (i=0;i<len;i++) 343 gain_floor[i] = MULT16_16_Q15(echo_gain, 344 spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(MULT16_32_Q15(gain_ratio,PSHR32(noise[i],NOISE_SHIFT)) + echo[i], 345 (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15))); 346 } 347 } 348 349 #else 350 /* This function approximates the gain function 351 y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x) 352 which multiplied by xi/(1+xi) is the optimal gain 353 in the loudness domain ( sqrt[amplitude] ) 354 */ 355 static inline spx_word32_t hypergeom_gain(spx_word32_t xx) 356 { 357 int ind; 358 float integer, frac; 359 float x; 360 static const float table[21] = { 361 0.82157f, 1.02017f, 1.20461f, 1.37534f, 1.53363f, 1.68092f, 1.81865f, 362 1.94811f, 2.07038f, 2.18638f, 2.29688f, 2.40255f, 2.50391f, 2.60144f, 363 2.69551f, 2.78647f, 2.87458f, 2.96015f, 3.04333f, 3.12431f, 3.20326f}; 364 x = EXPIN_SCALING_1*xx; 365 integer = floor(2*x); 366 ind = (int)integer; 367 if (ind<0) 368 return FRAC_SCALING; 369 if (ind>19) 370 return FRAC_SCALING*(1+.1296/x); 371 frac = 2*x-integer; 372 return FRAC_SCALING*((1-frac)*table[ind] + frac*table[ind+1])/sqrt(x+.0001f); 373 } 374 375 static inline spx_word16_t qcurve(spx_word16_t x) 376 { 377 return 1.f/(1.f+.15f/(SNR_SCALING_1*x)); 378 } 379 380 static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len) 381 { 382 int i; 383 float echo_floor; 384 float noise_floor; 385 386 noise_floor = exp(.2302585f*noise_suppress); 387 echo_floor = exp(.2302585f*effective_echo_suppress); 388 389 /* Compute the gain floor based on different floors for the background noise and residual echo */ 390 for (i=0;i<len;i++) 391 gain_floor[i] = FRAC_SCALING*sqrt(noise_floor*PSHR32(noise[i],NOISE_SHIFT) + echo_floor*echo[i])/sqrt(1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]); 392 } 393 394 #endif 395 EXPORT SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate) 396 { 397 int i; 398 int N, N3, N4, M; 399 400 SpeexPreprocessState *st = (SpeexPreprocessState *)speex_alloc(sizeof(SpeexPreprocessState)); 401 st->frame_size = frame_size; 402 403 /* Round ps_size down to the nearest power of two */ 404 #if 0 405 i=1; 406 st->ps_size = st->frame_size; 407 while(1) 408 { 409 if (st->ps_size & ~i) 410 { 411 st->ps_size &= ~i; 412 i<<=1; 413 } else { 414 break; 415 } 416 } 417 418 419 if (st->ps_size < 3*st->frame_size/4) 420 st->ps_size = st->ps_size * 3 / 2; 421 #else 422 st->ps_size = st->frame_size; 423 #endif 424 425 N = st->ps_size; 426 N3 = 2*N - st->frame_size; 427 N4 = st->frame_size - N3; 428 429 st->sampling_rate = sampling_rate; 430 st->denoise_enabled = 1; 431 st->vad_enabled = 0; 432 st->dereverb_enabled = 0; 433 st->reverb_decay = 0; 434 st->reverb_level = 0; 435 st->noise_suppress = NOISE_SUPPRESS_DEFAULT; 436 st->echo_suppress = ECHO_SUPPRESS_DEFAULT; 437 st->echo_suppress_active = ECHO_SUPPRESS_ACTIVE_DEFAULT; 438 439 st->speech_prob_start = SPEECH_PROB_START_DEFAULT; 440 st->speech_prob_continue = SPEECH_PROB_CONTINUE_DEFAULT; 441 442 st->echo_state = NULL; 443 444 st->nbands = NB_BANDS; 445 M = st->nbands; 446 st->bank = filterbank_new(M, sampling_rate, N, 1); 447 448 st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); 449 st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); 450 st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); 451 452 st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); 453 st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); 454 st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); 455 st->residual_echo = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); 456 st->reverb_estimate = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); 457 st->old_ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); 458 st->prior = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); 459 st->post = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); 460 st->gain = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); 461 st->gain2 = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); 462 st->gain_floor = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); 463 st->zeta = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); 464 465 st->S = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); 466 st->Smin = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); 467 st->Stmp = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); 468 st->update_prob = (int*)speex_alloc(N*sizeof(int)); 469 470 st->inbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t)); 471 st->outbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t)); 472 473 conj_window(st->window, 2*N3); 474 for (i=2*N3;i<2*st->ps_size;i++) 475 st->window[i]=Q15_ONE; 476 477 if (N4>0) 478 { 479 for (i=N3-1;i>=0;i--) 480 { 481 st->window[i+N3+N4]=st->window[i+N3]; 482 st->window[i+N3]=1; 483 } 484 } 485 for (i=0;i<N+M;i++) 486 { 487 st->noise[i]=QCONST32(1.f,NOISE_SHIFT); 488 st->reverb_estimate[i]=0; 489 st->old_ps[i]=1; 490 st->gain[i]=Q15_ONE; 491 st->post[i]=SHL16(1, SNR_SHIFT); 492 st->prior[i]=SHL16(1, SNR_SHIFT); 493 } 494 495 for (i=0;i<N;i++) 496 st->update_prob[i] = 1; 497 for (i=0;i<N3;i++) 498 { 499 st->inbuf[i]=0; 500 st->outbuf[i]=0; 501 } 502 #ifndef FIXED_POINT 503 st->agc_enabled = 0; 504 st->agc_level = 8000; 505 st->loudness_weight = (float*)speex_alloc(N*sizeof(float)); 506 for (i=0;i<N;i++) 507 { 508 float ff=((float)i)*.5*sampling_rate/((float)N); 509 /*st->loudness_weight[i] = .5f*(1.f/(1.f+ff/8000.f))+1.f*exp(-.5f*(ff-3800.f)*(ff-3800.f)/9e5f);*/ 510 st->loudness_weight[i] = .35f-.35f*ff/16000.f+.73f*exp(-.5f*(ff-3800)*(ff-3800)/9e5f); 511 if (st->loudness_weight[i]<.01f) 512 st->loudness_weight[i]=.01f; 513 st->loudness_weight[i] *= st->loudness_weight[i]; 514 } 515 /*st->loudness = pow(AMP_SCALE*st->agc_level,LOUDNESS_EXP);*/ 516 st->loudness = 1e-15; 517 st->agc_gain = 1; 518 st->max_gain = 30; 519 st->max_increase_step = exp(0.11513f * 12.*st->frame_size / st->sampling_rate); 520 st->max_decrease_step = exp(-0.11513f * 40.*st->frame_size / st->sampling_rate); 521 st->prev_loudness = 1; 522 st->init_max = 1; 523 #endif 524 st->was_speech = 0; 525 526 st->fft_lookup = spx_fft_init(2*N); 527 528 st->nb_adapt=0; 529 st->min_count=0; 530 return st; 531 } 532 533 EXPORT void speex_preprocess_state_destroy(SpeexPreprocessState *st) 534 { 535 speex_free(st->frame); 536 speex_free(st->ft); 537 speex_free(st->ps); 538 speex_free(st->gain2); 539 speex_free(st->gain_floor); 540 speex_free(st->window); 541 speex_free(st->noise); 542 speex_free(st->reverb_estimate); 543 speex_free(st->old_ps); 544 speex_free(st->gain); 545 speex_free(st->prior); 546 speex_free(st->post); 547 #ifndef FIXED_POINT 548 speex_free(st->loudness_weight); 549 #endif 550 speex_free(st->echo_noise); 551 speex_free(st->residual_echo); 552 553 speex_free(st->S); 554 speex_free(st->Smin); 555 speex_free(st->Stmp); 556 speex_free(st->update_prob); 557 speex_free(st->zeta); 558 559 speex_free(st->inbuf); 560 speex_free(st->outbuf); 561 562 spx_fft_destroy(st->fft_lookup); 563 filterbank_destroy(st->bank); 564 speex_free(st); 565 } 566 567 /* FIXME: The AGC doesn't work yet with fixed-point*/ 568 #ifndef FIXED_POINT 569 static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx_word16_t *ft) 570 { 571 int i; 572 int N = st->ps_size; 573 float target_gain; 574 float loudness=1.f; 575 float rate; 576 577 for (i=2;i<N;i++) 578 { 579 loudness += 2.f*N*st->ps[i]* st->loudness_weight[i]; 580 } 581 loudness=sqrt(loudness); 582 /*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) && 583 loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/ 584 if (Pframe>.3f) 585 { 586 /*rate=2.0f*Pframe*Pframe/(1+st->nb_loudness_adapt);*/ 587 rate = .03*Pframe*Pframe; 588 st->loudness = (1-rate)*st->loudness + (rate)*pow(AMP_SCALE*loudness, LOUDNESS_EXP); 589 st->loudness_accum = (1-rate)*st->loudness_accum + rate; 590 if (st->init_max < st->max_gain && st->nb_adapt > 20) 591 st->init_max *= 1.f + .1f*Pframe*Pframe; 592 } 593 /*printf ("%f %f %f %f\n", Pframe, loudness, pow(st->loudness, 1.0f/LOUDNESS_EXP), st->loudness2);*/ 594 595 target_gain = AMP_SCALE*st->agc_level*pow(st->loudness/(1e-4+st->loudness_accum), -1.0f/LOUDNESS_EXP); 596 597 if ((Pframe>.5 && st->nb_adapt > 20) || target_gain < st->agc_gain) 598 { 599 if (target_gain > st->max_increase_step*st->agc_gain) 600 target_gain = st->max_increase_step*st->agc_gain; 601 if (target_gain < st->max_decrease_step*st->agc_gain && loudness < 10*st->prev_loudness) 602 target_gain = st->max_decrease_step*st->agc_gain; 603 if (target_gain > st->max_gain) 604 target_gain = st->max_gain; 605 if (target_gain > st->init_max) 606 target_gain = st->init_max; 607 608 st->agc_gain = target_gain; 609 } 610 /*fprintf (stderr, "%f %f %f\n", loudness, (float)AMP_SCALE_1*pow(st->loudness, 1.0f/LOUDNESS_EXP), st->agc_gain);*/ 611 612 for (i=0;i<2*N;i++) 613 ft[i] *= st->agc_gain; 614 st->prev_loudness = loudness; 615 } 616 #endif 617 618 static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x) 619 { 620 int i; 621 int N = st->ps_size; 622 int N3 = 2*N - st->frame_size; 623 int N4 = st->frame_size - N3; 624 spx_word32_t *ps=st->ps; 625 626 /* 'Build' input frame */ 627 for (i=0;i<N3;i++) 628 st->frame[i]=st->inbuf[i]; 629 for (i=0;i<st->frame_size;i++) 630 st->frame[N3+i]=x[i]; 631 632 /* Update inbuf */ 633 for (i=0;i<N3;i++) 634 st->inbuf[i]=x[N4+i]; 635 636 /* Windowing */ 637 for (i=0;i<2*N;i++) 638 st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]); 639 640 #ifdef FIXED_POINT 641 { 642 spx_word16_t max_val=0; 643 for (i=0;i<2*N;i++) 644 max_val = MAX16(max_val, ABS16(st->frame[i])); 645 st->frame_shift = 14-spx_ilog2(EXTEND32(max_val)); 646 for (i=0;i<2*N;i++) 647 st->frame[i] = SHL16(st->frame[i], st->frame_shift); 648 } 649 #endif 650 651 /* Perform FFT */ 652 spx_fft(st->fft_lookup, st->frame, st->ft); 653 654 /* Power spectrum */ 655 ps[0]=MULT16_16(st->ft[0],st->ft[0]); 656 for (i=1;i<N;i++) 657 ps[i]=MULT16_16(st->ft[2*i-1],st->ft[2*i-1]) + MULT16_16(st->ft[2*i],st->ft[2*i]); 658 for (i=0;i<N;i++) 659 st->ps[i] = PSHR32(st->ps[i], 2*st->frame_shift); 660 661 filterbank_compute_bank32(st->bank, ps, ps+N); 662 } 663 664 static void update_noise_prob(SpeexPreprocessState *st) 665 { 666 int i; 667 int min_range; 668 int N = st->ps_size; 669 670 for (i=1;i<N-1;i++) 671 st->S[i] = MULT16_32_Q15(QCONST16(.8f,15),st->S[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i-1]) 672 + MULT16_32_Q15(QCONST16(.1f,15),st->ps[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i+1]); 673 st->S[0] = MULT16_32_Q15(QCONST16(.8f,15),st->S[0]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[0]); 674 st->S[N-1] = MULT16_32_Q15(QCONST16(.8f,15),st->S[N-1]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[N-1]); 675 676 if (st->nb_adapt==1) 677 { 678 for (i=0;i<N;i++) 679 st->Smin[i] = st->Stmp[i] = 0; 680 } 681 682 if (st->nb_adapt < 100) 683 min_range = 15; 684 else if (st->nb_adapt < 1000) 685 min_range = 50; 686 else if (st->nb_adapt < 10000) 687 min_range = 150; 688 else 689 min_range = 300; 690 if (st->min_count > min_range) 691 { 692 st->min_count = 0; 693 for (i=0;i<N;i++) 694 { 695 st->Smin[i] = MIN32(st->Stmp[i], st->S[i]); 696 st->Stmp[i] = st->S[i]; 697 } 698 } else { 699 for (i=0;i<N;i++) 700 { 701 st->Smin[i] = MIN32(st->Smin[i], st->S[i]); 702 st->Stmp[i] = MIN32(st->Stmp[i], st->S[i]); 703 } 704 } 705 for (i=0;i<N;i++) 706 { 707 if (MULT16_32_Q15(QCONST16(.4f,15),st->S[i]) > st->Smin[i]) 708 st->update_prob[i] = 1; 709 else 710 st->update_prob[i] = 0; 711 /*fprintf (stderr, "%f ", st->S[i]/st->Smin[i]);*/ 712 /*fprintf (stderr, "%f ", st->update_prob[i]);*/ 713 } 714 715 } 716 717 #define NOISE_OVERCOMPENS 1. 718 719 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len); 720 721 EXPORT int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo) 722 { 723 return speex_preprocess_run(st, x); 724 } 725 726 EXPORT int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) 727 { 728 int i; 729 int M; 730 int N = st->ps_size; 731 int N3 = 2*N - st->frame_size; 732 int N4 = st->frame_size - N3; 733 spx_word32_t *ps=st->ps; 734 spx_word32_t Zframe; 735 spx_word16_t Pframe; 736 spx_word16_t beta, beta_1; 737 spx_word16_t effective_echo_suppress; 738 739 st->nb_adapt++; 740 if (st->nb_adapt>20000) 741 st->nb_adapt = 20000; 742 st->min_count++; 743 744 beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt)); 745 beta_1 = Q15_ONE-beta; 746 M = st->nbands; 747 /* Deal with residual echo if provided */ 748 if (st->echo_state) 749 { 750 speex_echo_get_residual(st->echo_state, st->residual_echo, N); 751 #ifndef FIXED_POINT 752 /* If there are NaNs or ridiculous values, it'll show up in the DC and we just reset everything to zero */ 753 if (!(st->residual_echo[0] >=0 && st->residual_echo[0]<N*1e9f)) 754 { 755 for (i=0;i<N;i++) 756 st->residual_echo[i] = 0; 757 } 758 #endif 759 for (i=0;i<N;i++) 760 st->echo_noise[i] = MAX32(MULT16_32_Q15(QCONST16(.6f,15),st->echo_noise[i]), st->residual_echo[i]); 761 filterbank_compute_bank32(st->bank, st->echo_noise, st->echo_noise+N); 762 } else { 763 for (i=0;i<N+M;i++) 764 st->echo_noise[i] = 0; 765 } 766 preprocess_analysis(st, x); 767 768 update_noise_prob(st); 769 770 /* Noise estimation always updated for the 10 first frames */ 771 /*if (st->nb_adapt<10) 772 { 773 for (i=1;i<N-1;i++) 774 st->update_prob[i] = 0; 775 } 776 */ 777 778 /* Update the noise estimate for the frequencies where it can be */ 779 for (i=0;i<N;i++) 780 { 781 if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i], NOISE_SHIFT)) 782 st->noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,st->noise[i]) + MULT16_32_Q15(beta,SHL32(st->ps[i],NOISE_SHIFT))); 783 } 784 filterbank_compute_bank32(st->bank, st->noise, st->noise+N); 785 786 /* Special case for first frame */ 787 if (st->nb_adapt==1) 788 for (i=0;i<N+M;i++) 789 st->old_ps[i] = ps[i]; 790 791 /* Compute a posteriori SNR */ 792 for (i=0;i<N+M;i++) 793 { 794 spx_word16_t gamma; 795 796 /* Total noise estimate including residual echo and reverberation */ 797 spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]); 798 799 /* A posteriori SNR = ps/noise - 1*/ 800 st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT)); 801 st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT)); 802 803 /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */ 804 gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise)))); 805 806 /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */ 807 st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15)); 808 st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT)); 809 } 810 811 /*print_vec(st->post, N+M, "");*/ 812 813 /* Recursive average of the a priori SNR. A bit smoothed for the psd components */ 814 st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15); 815 for (i=1;i<N-1;i++) 816 st->zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.15f,15),st->prior[i])), 817 MULT16_16(QCONST16(.075f,15),st->prior[i-1])), MULT16_16(QCONST16(.075f,15),st->prior[i+1])),15); 818 for (i=N-1;i<N+M;i++) 819 st->zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.3f,15),st->prior[i])),15); 820 821 /* Speech probability of presence for the entire frame is based on the average filterbank a priori SNR */ 822 Zframe = 0; 823 for (i=N;i<N+M;i++) 824 Zframe = ADD32(Zframe, EXTEND32(st->zeta[i])); 825 Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands))); 826 827 effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15)); 828 829 compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M); 830 831 /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale) 832 Technically this is actually wrong because the EM gaim assumes a slightly different probability 833 distribution */ 834 for (i=N;i<N+M;i++) 835 { 836 /* See EM and Cohen papers*/ 837 spx_word32_t theta; 838 /* Gain from hypergeometric function */ 839 spx_word32_t MM; 840 /* Weiner filter gain */ 841 spx_word16_t prior_ratio; 842 /* a priority probability of speech presence based on Bark sub-band alone */ 843 spx_word16_t P1; 844 /* Speech absence a priori probability (considering sub-band and frame) */ 845 spx_word16_t q; 846 #ifdef FIXED_POINT 847 spx_word16_t tmp; 848 #endif 849 850 prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT))); 851 theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT)); 852 853 MM = hypergeom_gain(theta); 854 /* Gain with bound */ 855 st->gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM))); 856 /* Save old Bark power spectrum */ 857 st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]); 858 859 P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (st->zeta[i])); 860 q = Q15_ONE-MULT16_16_Q15(Pframe,P1); 861 #ifdef FIXED_POINT 862 theta = MIN32(theta, EXTEND32(32767)); 863 /*Q8*/tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+st->prior[i]),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1)))); 864 tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp); /* Prevent overflows in the next line*/ 865 /*Q8*/tmp = EXTRACT16(PSHR32(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8)); 866 st->gain2[i]=DIV32_16(SHL32(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp)); 867 #else 868 st->gain2[i]=1/(1.f + (q/(1.f-q))*(1+st->prior[i])*exp(-theta)); 869 #endif 870 } 871 /* Convert the EM gains and speech prob to linear frequency */ 872 filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2); 873 filterbank_compute_psd16(st->bank,st->gain+N, st->gain); 874 875 /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */ 876 if (1) 877 { 878 filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor); 879 880 /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */ 881 for (i=0;i<N;i++) 882 { 883 spx_word32_t MM; 884 spx_word32_t theta; 885 spx_word16_t prior_ratio; 886 spx_word16_t tmp; 887 spx_word16_t p; 888 spx_word16_t g; 889 890 /* Wiener filter gain */ 891 prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT))); 892 theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT)); 893 894 /* Optimal estimator for loudness domain */ 895 MM = hypergeom_gain(theta); 896 /* EM gain with bound */ 897 g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM))); 898 /* Interpolated speech probability of presence */ 899 p = st->gain2[i]; 900 901 /* Constrain the gain to be close to the Bark scale gain */ 902 if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i]) 903 g = MULT16_16(3,st->gain[i]); 904 st->gain[i] = g; 905 906 /* Save old power spectrum */ 907 st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]); 908 909 /* Apply gain floor */ 910 if (st->gain[i] < st->gain_floor[i]) 911 st->gain[i] = st->gain_floor[i]; 912 913 /* Exponential decay model for reverberation (unused) */ 914 /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/ 915 916 /* Take into account speech probability of presence (loudness domain MMSE estimator) */ 917 /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */ 918 tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15))); 919 st->gain2[i]=SQR16_Q15(tmp); 920 921 /* Use this if you want a log-domain MMSE estimator instead */ 922 /*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/ 923 } 924 } else { 925 for (i=N;i<N+M;i++) 926 { 927 spx_word16_t tmp; 928 spx_word16_t p = st->gain2[i]; 929 st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]); 930 tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15))); 931 st->gain2[i]=SQR16_Q15(tmp); 932 } 933 filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2); 934 } 935 936 /* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */ 937 if (!st->denoise_enabled) 938 { 939 for (i=0;i<N+M;i++) 940 st->gain2[i]=Q15_ONE; 941 } 942 943 /* Apply computed gain */ 944 for (i=1;i<N;i++) 945 { 946 st->ft[2*i-1] = MULT16_16_P15(st->gain2[i],st->ft[2*i-1]); 947 st->ft[2*i] = MULT16_16_P15(st->gain2[i],st->ft[2*i]); 948 } 949 st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]); 950 st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]); 951 952 /*FIXME: This *will* not work for fixed-point */ 953 #ifndef FIXED_POINT 954 if (st->agc_enabled) 955 speex_compute_agc(st, Pframe, st->ft); 956 #endif 957 958 /* Inverse FFT with 1/N scaling */ 959 spx_ifft(st->fft_lookup, st->ft, st->frame); 960 /* Scale back to original (lower) amplitude */ 961 for (i=0;i<2*N;i++) 962 st->frame[i] = PSHR16(st->frame[i], st->frame_shift); 963 964 /*FIXME: This *will* not work for fixed-point */ 965 #ifndef FIXED_POINT 966 if (st->agc_enabled) 967 { 968 float max_sample=0; 969 for (i=0;i<2*N;i++) 970 if (fabs(st->frame[i])>max_sample) 971 max_sample = fabs(st->frame[i]); 972 if (max_sample>28000.f) 973 { 974 float damp = 28000.f/max_sample; 975 for (i=0;i<2*N;i++) 976 st->frame[i] *= damp; 977 } 978 } 979 #endif 980 981 /* Synthesis window (for WOLA) */ 982 for (i=0;i<2*N;i++) 983 st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]); 984 985 /* Perform overlap and add */ 986 for (i=0;i<N3;i++) 987 x[i] = st->outbuf[i] + st->frame[i]; 988 for (i=0;i<N4;i++) 989 x[N3+i] = st->frame[N3+i]; 990 991 /* Update outbuf */ 992 for (i=0;i<N3;i++) 993 st->outbuf[i] = st->frame[st->frame_size+i]; 994 995 /* FIXME: This VAD is a kludge */ 996 st->speech_prob = Pframe; 997 if (st->vad_enabled) 998 { 999 if (st->speech_prob > st->speech_prob_start || (st->was_speech && st->speech_prob > st->speech_prob_continue)) 1000 { 1001 st->was_speech=1; 1002 return 1; 1003 } else 1004 { 1005 st->was_speech=0; 1006 return 0; 1007 } 1008 } else { 1009 return 1; 1010 } 1011 } 1012 1013 EXPORT void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x) 1014 { 1015 int i; 1016 int N = st->ps_size; 1017 int N3 = 2*N - st->frame_size; 1018 int M; 1019 spx_word32_t *ps=st->ps; 1020 1021 M = st->nbands; 1022 st->min_count++; 1023 1024 preprocess_analysis(st, x); 1025 1026 update_noise_prob(st); 1027 1028 for (i=1;i<N-1;i++) 1029 { 1030 if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT)) 1031 { 1032 st->noise[i] = MULT16_32_Q15(QCONST16(.95f,15),st->noise[i]) + MULT16_32_Q15(QCONST16(.05f,15),SHL32(st->ps[i],NOISE_SHIFT)); 1033 } 1034 } 1035 1036 for (i=0;i<N3;i++) 1037 st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]); 1038 1039 /* Save old power spectrum */ 1040 for (i=0;i<N+M;i++) 1041 st->old_ps[i] = ps[i]; 1042 1043 for (i=0;i<N;i++) 1044 st->reverb_estimate[i] = MULT16_32_Q15(st->reverb_decay, st->reverb_estimate[i]); 1045 } 1046 1047 1048 EXPORT int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr) 1049 { 1050 int i; 1051 SpeexPreprocessState *st; 1052 st=(SpeexPreprocessState*)state; 1053 switch(request) 1054 { 1055 case SPEEX_PREPROCESS_SET_DENOISE: 1056 st->denoise_enabled = (*(spx_int32_t*)ptr); 1057 break; 1058 case SPEEX_PREPROCESS_GET_DENOISE: 1059 (*(spx_int32_t*)ptr) = st->denoise_enabled; 1060 break; 1061 #ifndef FIXED_POINT 1062 case SPEEX_PREPROCESS_SET_AGC: 1063 st->agc_enabled = (*(spx_int32_t*)ptr); 1064 break; 1065 case SPEEX_PREPROCESS_GET_AGC: 1066 (*(spx_int32_t*)ptr) = st->agc_enabled; 1067 break; 1068 #ifndef DISABLE_FLOAT_API 1069 case SPEEX_PREPROCESS_SET_AGC_LEVEL: 1070 st->agc_level = (*(float*)ptr); 1071 if (st->agc_level<1) 1072 st->agc_level=1; 1073 if (st->agc_level>32768) 1074 st->agc_level=32768; 1075 break; 1076 case SPEEX_PREPROCESS_GET_AGC_LEVEL: 1077 (*(float*)ptr) = st->agc_level; 1078 break; 1079 #endif /* #ifndef DISABLE_FLOAT_API */ 1080 case SPEEX_PREPROCESS_SET_AGC_INCREMENT: 1081 st->max_increase_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate); 1082 break; 1083 case SPEEX_PREPROCESS_GET_AGC_INCREMENT: 1084 (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_increase_step)*st->sampling_rate/st->frame_size); 1085 break; 1086 case SPEEX_PREPROCESS_SET_AGC_DECREMENT: 1087 st->max_decrease_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate); 1088 break; 1089 case SPEEX_PREPROCESS_GET_AGC_DECREMENT: 1090 (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_decrease_step)*st->sampling_rate/st->frame_size); 1091 break; 1092 case SPEEX_PREPROCESS_SET_AGC_MAX_GAIN: 1093 st->max_gain = exp(0.11513f * (*(spx_int32_t*)ptr)); 1094 break; 1095 case SPEEX_PREPROCESS_GET_AGC_MAX_GAIN: 1096 (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_gain)); 1097 break; 1098 #endif 1099 case SPEEX_PREPROCESS_SET_VAD: 1100 // Disabled by mlebeau, we don't want this in the launched Google Mobile App for iPhone. 1101 //speex_warning("The VAD has been replaced by a hack pending a complete rewrite"); 1102 st->vad_enabled = (*(spx_int32_t*)ptr); 1103 break; 1104 case SPEEX_PREPROCESS_GET_VAD: 1105 (*(spx_int32_t*)ptr) = st->vad_enabled; 1106 break; 1107 1108 case SPEEX_PREPROCESS_SET_DEREVERB: 1109 st->dereverb_enabled = (*(spx_int32_t*)ptr); 1110 for (i=0;i<st->ps_size;i++) 1111 st->reverb_estimate[i]=0; 1112 break; 1113 case SPEEX_PREPROCESS_GET_DEREVERB: 1114 (*(spx_int32_t*)ptr) = st->dereverb_enabled; 1115 break; 1116 1117 case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL: 1118 /* FIXME: Re-enable when de-reverberation is actually enabled again */ 1119 /*st->reverb_level = (*(float*)ptr);*/ 1120 break; 1121 case SPEEX_PREPROCESS_GET_DEREVERB_LEVEL: 1122 /* FIXME: Re-enable when de-reverberation is actually enabled again */ 1123 /*(*(float*)ptr) = st->reverb_level;*/ 1124 break; 1125 1126 case SPEEX_PREPROCESS_SET_DEREVERB_DECAY: 1127 /* FIXME: Re-enable when de-reverberation is actually enabled again */ 1128 /*st->reverb_decay = (*(float*)ptr);*/ 1129 break; 1130 case SPEEX_PREPROCESS_GET_DEREVERB_DECAY: 1131 /* FIXME: Re-enable when de-reverberation is actually enabled again */ 1132 /*(*(float*)ptr) = st->reverb_decay;*/ 1133 break; 1134 1135 case SPEEX_PREPROCESS_SET_PROB_START: 1136 *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr)); 1137 st->speech_prob_start = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100); 1138 break; 1139 case SPEEX_PREPROCESS_GET_PROB_START: 1140 (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_start, 100); 1141 break; 1142 1143 case SPEEX_PREPROCESS_SET_PROB_CONTINUE: 1144 *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr)); 1145 st->speech_prob_continue = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100); 1146 break; 1147 case SPEEX_PREPROCESS_GET_PROB_CONTINUE: 1148 (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_continue, 100); 1149 break; 1150 1151 case SPEEX_PREPROCESS_SET_NOISE_SUPPRESS: 1152 st->noise_suppress = -ABS(*(spx_int32_t*)ptr); 1153 break; 1154 case SPEEX_PREPROCESS_GET_NOISE_SUPPRESS: 1155 (*(spx_int32_t*)ptr) = st->noise_suppress; 1156 break; 1157 case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS: 1158 st->echo_suppress = -ABS(*(spx_int32_t*)ptr); 1159 break; 1160 case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS: 1161 (*(spx_int32_t*)ptr) = st->echo_suppress; 1162 break; 1163 case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE: 1164 st->echo_suppress_active = -ABS(*(spx_int32_t*)ptr); 1165 break; 1166 case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE: 1167 (*(spx_int32_t*)ptr) = st->echo_suppress_active; 1168 break; 1169 case SPEEX_PREPROCESS_SET_ECHO_STATE: 1170 st->echo_state = (SpeexEchoState*)ptr; 1171 break; 1172 case SPEEX_PREPROCESS_GET_ECHO_STATE: 1173 (*(SpeexEchoState**)ptr) = (SpeexEchoState*)st->echo_state; 1174 break; 1175 #ifndef FIXED_POINT 1176 case SPEEX_PREPROCESS_GET_AGC_LOUDNESS: 1177 (*(spx_int32_t*)ptr) = pow(st->loudness, 1.0/LOUDNESS_EXP); 1178 break; 1179 case SPEEX_PREPROCESS_GET_AGC_GAIN: 1180 (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->agc_gain)); 1181 break; 1182 #endif 1183 case SPEEX_PREPROCESS_GET_PSD_SIZE: 1184 case SPEEX_PREPROCESS_GET_NOISE_PSD_SIZE: 1185 (*(spx_int32_t*)ptr) = st->ps_size; 1186 break; 1187 case SPEEX_PREPROCESS_GET_PSD: 1188 for(i=0;i<st->ps_size;i++) 1189 ((spx_int32_t *)ptr)[i] = (spx_int32_t) st->ps[i]; 1190 break; 1191 case SPEEX_PREPROCESS_GET_NOISE_PSD: 1192 for(i=0;i<st->ps_size;i++) 1193 ((spx_int32_t *)ptr)[i] = (spx_int32_t) PSHR32(st->noise[i], NOISE_SHIFT); 1194 break; 1195 case SPEEX_PREPROCESS_GET_PROB: 1196 (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob, 100); 1197 break; 1198 #ifndef FIXED_POINT 1199 case SPEEX_PREPROCESS_SET_AGC_TARGET: 1200 st->agc_level = (*(spx_int32_t*)ptr); 1201 if (st->agc_level<1) 1202 st->agc_level=1; 1203 if (st->agc_level>32768) 1204 st->agc_level=32768; 1205 break; 1206 case SPEEX_PREPROCESS_GET_AGC_TARGET: 1207 (*(spx_int32_t*)ptr) = st->agc_level; 1208 break; 1209 #endif 1210 default: 1211 speex_warning_int("Unknown speex_preprocess_ctl request: ", request); 1212 return -1; 1213 } 1214 return 0; 1215 } 1216 1217 #ifdef FIXED_DEBUG 1218 long long spx_mips=0; 1219 #endif 1220 1221