1 /* Copyright (C) 2002-2006 Jean-Marc Valin 2 File: nb_celp.c 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 8 - Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 11 - Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 15 - Neither the name of the Xiph.org Foundation nor the names of its 16 contributors may be used to endorse or promote products derived from 17 this software without specific prior written permission. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 23 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #ifdef HAVE_CONFIG_H 33 #include "config.h" 34 #endif 35 36 #include <math.h> 37 #include "nb_celp.h" 38 #include "lpc.h" 39 #include "lsp.h" 40 #include "ltp.h" 41 #include "quant_lsp.h" 42 #include "cb_search.h" 43 #include "filters.h" 44 #include "stack_alloc.h" 45 #include "vq.h" 46 #include <speex/speex_bits.h> 47 #include "vbr.h" 48 #include "arch.h" 49 #include "math_approx.h" 50 #include "os_support.h" 51 #include <speex/speex_callbacks.h> 52 53 #ifdef VORBIS_PSYCHO 54 #include "vorbis_psy.h" 55 #endif 56 57 #ifndef M_PI 58 #define M_PI 3.14159265358979323846 /* pi */ 59 #endif 60 61 #ifndef NULL 62 #define NULL 0 63 #endif 64 65 #define SUBMODE(x) st->submodes[st->submodeID]->x 66 67 /* Default size for the encoder and decoder stack (can be changed at compile time). 68 This does not apply when using variable-size arrays or alloca. */ 69 #ifndef NB_ENC_STACK 70 #define NB_ENC_STACK (8000*sizeof(spx_sig_t)) 71 #endif 72 73 #ifndef NB_DEC_STACK 74 #define NB_DEC_STACK (4000*sizeof(spx_sig_t)) 75 #endif 76 77 78 #ifdef FIXED_POINT 79 const spx_word32_t ol_gain_table[32]={18900, 25150, 33468, 44536, 59265, 78865, 104946, 139653, 185838, 247297, 329081, 437913, 582736, 775454, 1031906, 1373169, 1827293, 2431601, 3235761, 4305867, 5729870, 7624808, 10146425, 13501971, 17967238, 23909222, 31816294, 42338330, 56340132, 74972501, 99766822, 132760927}; 80 const spx_word16_t exc_gain_quant_scal3_bound[7]={1841, 3883, 6051, 8062, 10444, 13580, 18560}; 81 const spx_word16_t exc_gain_quant_scal3[8]={1002, 2680, 5086, 7016, 9108, 11781, 15380, 21740}; 82 const spx_word16_t exc_gain_quant_scal1_bound[1]={14385}; 83 const spx_word16_t exc_gain_quant_scal1[2]={11546, 17224}; 84 85 #define LSP_MARGIN 16 86 #define LSP_DELTA1 6553 87 #define LSP_DELTA2 1638 88 89 #else 90 91 const float exc_gain_quant_scal3_bound[7]={0.112338f, 0.236980f, 0.369316f, 0.492054f, 0.637471f, 0.828874f, 1.132784f}; 92 const float exc_gain_quant_scal3[8]={0.061130f, 0.163546f, 0.310413f, 0.428220f, 0.555887f, 0.719055f, 0.938694f, 1.326874f}; 93 const float exc_gain_quant_scal1_bound[1]={0.87798f}; 94 const float exc_gain_quant_scal1[2]={0.70469f, 1.05127f}; 95 96 #define LSP_MARGIN .002f 97 #define LSP_DELTA1 .2f 98 #define LSP_DELTA2 .05f 99 100 #endif 101 102 #ifdef VORBIS_PSYCHO 103 #define EXTRA_BUFFER 100 104 #else 105 #define EXTRA_BUFFER 0 106 #endif 107 108 109 #define sqr(x) ((x)*(x)) 110 111 extern const spx_word16_t lag_window[]; 112 extern const spx_word16_t lpc_window[]; 113 114 void *nb_encoder_init(const SpeexMode *m) 115 { 116 EncState *st; 117 const SpeexNBMode *mode; 118 int i; 119 120 mode=(const SpeexNBMode *)m->mode; 121 st = (EncState*)speex_alloc(sizeof(EncState)); 122 if (!st) 123 return NULL; 124 #if defined(VAR_ARRAYS) || defined (USE_ALLOCA) 125 st->stack = NULL; 126 #else 127 st->stack = (char*)speex_alloc_scratch(NB_ENC_STACK); 128 #endif 129 130 st->mode=m; 131 132 st->frameSize = mode->frameSize; 133 st->nbSubframes=mode->frameSize/mode->subframeSize; 134 st->subframeSize=mode->subframeSize; 135 st->windowSize = st->frameSize+st->subframeSize; 136 st->lpcSize = mode->lpcSize; 137 st->gamma1=mode->gamma1; 138 st->gamma2=mode->gamma2; 139 st->min_pitch=mode->pitchStart; 140 st->max_pitch=mode->pitchEnd; 141 st->lpc_floor = mode->lpc_floor; 142 143 st->submodes=mode->submodes; 144 st->submodeID=st->submodeSelect=mode->defaultSubmode; 145 st->bounded_pitch = 1; 146 147 st->encode_submode = 1; 148 149 #ifdef VORBIS_PSYCHO 150 st->psy = vorbis_psy_init(8000, 256); 151 st->curve = (float*)speex_alloc(128*sizeof(float)); 152 st->old_curve = (float*)speex_alloc(128*sizeof(float)); 153 st->psy_window = (float*)speex_alloc(256*sizeof(float)); 154 #endif 155 156 st->cumul_gain = 1024; 157 158 /* Allocating input buffer */ 159 st->winBuf = (spx_word16_t*)speex_alloc((st->windowSize-st->frameSize)*sizeof(spx_word16_t)); 160 /* Allocating excitation buffer */ 161 st->excBuf = (spx_word16_t*)speex_alloc((mode->frameSize+mode->pitchEnd+2)*sizeof(spx_word16_t)); 162 st->exc = st->excBuf + mode->pitchEnd + 2; 163 st->swBuf = (spx_word16_t*)speex_alloc((mode->frameSize+mode->pitchEnd+2)*sizeof(spx_word16_t)); 164 st->sw = st->swBuf + mode->pitchEnd + 2; 165 166 st->window= lpc_window; 167 168 /* Create the window for autocorrelation (lag-windowing) */ 169 st->lagWindow = lag_window; 170 171 st->old_lsp = (spx_lsp_t*)speex_alloc((st->lpcSize)*sizeof(spx_lsp_t)); 172 st->old_qlsp = (spx_lsp_t*)speex_alloc((st->lpcSize)*sizeof(spx_lsp_t)); 173 st->first = 1; 174 for (i=0;i<st->lpcSize;i++) 175 st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), st->lpcSize+1); 176 177 st->mem_sp = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); 178 st->mem_sw = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); 179 st->mem_sw_whole = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); 180 st->mem_exc = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); 181 st->mem_exc2 = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); 182 183 st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t)); 184 st->innov_rms_save = NULL; 185 186 st->pitch = (int*)speex_alloc((st->nbSubframes)*sizeof(int)); 187 188 #ifndef DISABLE_VBR 189 st->vbr = (VBRState*)speex_alloc(sizeof(VBRState)); 190 vbr_init(st->vbr); 191 st->vbr_quality = 8; 192 st->vbr_enabled = 0; 193 st->vbr_max = 0; 194 st->vad_enabled = 0; 195 st->dtx_enabled = 0; 196 st->dtx_count=0; 197 st->abr_enabled = 0; 198 st->abr_drift = 0; 199 st->abr_drift2 = 0; 200 #endif /* #ifndef DISABLE_VBR */ 201 202 st->plc_tuning = 2; 203 st->complexity=2; 204 st->sampling_rate=8000; 205 st->isWideband = 0; 206 st->highpass_enabled = 1; 207 208 #ifdef ENABLE_VALGRIND 209 VALGRIND_MAKE_READABLE(st, NB_ENC_STACK); 210 #endif 211 return st; 212 } 213 214 void nb_encoder_destroy(void *state) 215 { 216 EncState *st=(EncState *)state; 217 /* Free all allocated memory */ 218 #if !(defined(VAR_ARRAYS) || defined (USE_ALLOCA)) 219 speex_free_scratch(st->stack); 220 #endif 221 222 speex_free (st->winBuf); 223 speex_free (st->excBuf); 224 speex_free (st->old_qlsp); 225 speex_free (st->swBuf); 226 227 speex_free (st->old_lsp); 228 speex_free (st->mem_sp); 229 speex_free (st->mem_sw); 230 speex_free (st->mem_sw_whole); 231 speex_free (st->mem_exc); 232 speex_free (st->mem_exc2); 233 speex_free (st->pi_gain); 234 speex_free (st->pitch); 235 236 #ifndef DISABLE_VBR 237 vbr_destroy(st->vbr); 238 speex_free (st->vbr); 239 #endif /* #ifndef DISABLE_VBR */ 240 241 #ifdef VORBIS_PSYCHO 242 vorbis_psy_destroy(st->psy); 243 speex_free (st->curve); 244 speex_free (st->old_curve); 245 speex_free (st->psy_window); 246 #endif 247 248 /*Free state memory... should be last*/ 249 speex_free(st); 250 } 251 252 int nb_encode(void *state, void *vin, SpeexBits *bits) 253 { 254 EncState *st; 255 int i, sub, roots; 256 int ol_pitch; 257 spx_word16_t ol_pitch_coef; 258 spx_word32_t ol_gain; 259 VARDECL(spx_word16_t *ringing); 260 VARDECL(spx_word16_t *target); 261 VARDECL(spx_sig_t *innov); 262 VARDECL(spx_word32_t *exc32); 263 VARDECL(spx_mem_t *mem); 264 VARDECL(spx_coef_t *bw_lpc1); 265 VARDECL(spx_coef_t *bw_lpc2); 266 VARDECL(spx_coef_t *lpc); 267 VARDECL(spx_lsp_t *lsp); 268 VARDECL(spx_lsp_t *qlsp); 269 VARDECL(spx_lsp_t *interp_lsp); 270 VARDECL(spx_lsp_t *interp_qlsp); 271 VARDECL(spx_coef_t *interp_lpc); 272 VARDECL(spx_coef_t *interp_qlpc); 273 char *stack; 274 VARDECL(spx_word16_t *syn_resp); 275 VARDECL(spx_word16_t *real_exc); 276 277 spx_word32_t ener=0; 278 spx_word16_t fine_gain; 279 spx_word16_t *in = (spx_word16_t*)vin; 280 281 st=(EncState *)state; 282 stack=st->stack; 283 284 ALLOC(lpc, st->lpcSize, spx_coef_t); 285 ALLOC(bw_lpc1, st->lpcSize, spx_coef_t); 286 ALLOC(bw_lpc2, st->lpcSize, spx_coef_t); 287 ALLOC(lsp, st->lpcSize, spx_lsp_t); 288 ALLOC(qlsp, st->lpcSize, spx_lsp_t); 289 ALLOC(interp_lsp, st->lpcSize, spx_lsp_t); 290 ALLOC(interp_qlsp, st->lpcSize, spx_lsp_t); 291 ALLOC(interp_lpc, st->lpcSize, spx_coef_t); 292 ALLOC(interp_qlpc, st->lpcSize, spx_coef_t); 293 294 /* Move signals 1 frame towards the past */ 295 SPEEX_MOVE(st->excBuf, st->excBuf+st->frameSize, st->max_pitch+2); 296 SPEEX_MOVE(st->swBuf, st->swBuf+st->frameSize, st->max_pitch+2); 297 298 if (st->highpass_enabled) 299 highpass(in, in, st->frameSize, (st->isWideband?HIGHPASS_WIDEBAND:HIGHPASS_NARROWBAND)|HIGHPASS_INPUT, st->mem_hp); 300 301 { 302 VARDECL(spx_word16_t *w_sig); 303 VARDECL(spx_word16_t *autocorr); 304 ALLOC(w_sig, st->windowSize, spx_word16_t); 305 ALLOC(autocorr, st->lpcSize+1, spx_word16_t); 306 /* Window for analysis */ 307 for (i=0;i<st->windowSize-st->frameSize;i++) 308 w_sig[i] = EXTRACT16(SHR32(MULT16_16(st->winBuf[i],st->window[i]),SIG_SHIFT)); 309 for (;i<st->windowSize;i++) 310 w_sig[i] = EXTRACT16(SHR32(MULT16_16(in[i-st->windowSize+st->frameSize],st->window[i]),SIG_SHIFT)); 311 /* Compute auto-correlation */ 312 _spx_autocorr(w_sig, autocorr, st->lpcSize+1, st->windowSize); 313 autocorr[0] = ADD16(autocorr[0],MULT16_16_Q15(autocorr[0],st->lpc_floor)); /* Noise floor in auto-correlation domain */ 314 315 /* Lag windowing: equivalent to filtering in the power-spectrum domain */ 316 for (i=0;i<st->lpcSize+1;i++) 317 autocorr[i] = MULT16_16_Q14(autocorr[i],st->lagWindow[i]); 318 319 /* Levinson-Durbin */ 320 _spx_lpc(lpc, autocorr, st->lpcSize); 321 /* LPC to LSPs (x-domain) transform */ 322 roots=lpc_to_lsp (lpc, st->lpcSize, lsp, 10, LSP_DELTA1, stack); 323 /* Check if we found all the roots */ 324 if (roots!=st->lpcSize) 325 { 326 /*If we can't find all LSP's, do some damage control and use previous filter*/ 327 for (i=0;i<st->lpcSize;i++) 328 { 329 lsp[i]=st->old_lsp[i]; 330 } 331 } 332 } 333 334 335 336 337 /* Whole frame analysis (open-loop estimation of pitch and excitation gain) */ 338 { 339 int diff = st->windowSize-st->frameSize; 340 if (st->first) 341 for (i=0;i<st->lpcSize;i++) 342 interp_lsp[i] = lsp[i]; 343 else 344 lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, st->nbSubframes, st->nbSubframes<<1); 345 346 lsp_enforce_margin(interp_lsp, st->lpcSize, LSP_MARGIN); 347 348 /* Compute interpolated LPCs (unquantized) for whole frame*/ 349 lsp_to_lpc(interp_lsp, interp_lpc, st->lpcSize,stack); 350 351 352 /*Open-loop pitch*/ 353 if (!st->submodes[st->submodeID] || (st->complexity>2 && SUBMODE(have_subframe_gain)<3) || SUBMODE(forced_pitch_gain) || SUBMODE(lbr_pitch) != -1 354 #ifndef DISABLE_VBR 355 || st->vbr_enabled || st->vad_enabled 356 #endif 357 ) 358 { 359 int nol_pitch[6]; 360 spx_word16_t nol_pitch_coef[6]; 361 362 bw_lpc(st->gamma1, interp_lpc, bw_lpc1, st->lpcSize); 363 bw_lpc(st->gamma2, interp_lpc, bw_lpc2, st->lpcSize); 364 365 SPEEX_COPY(st->sw, st->winBuf, diff); 366 SPEEX_COPY(st->sw+diff, in, st->frameSize-diff); 367 filter_mem16(st->sw, bw_lpc1, bw_lpc2, st->sw, st->frameSize, st->lpcSize, st->mem_sw_whole, stack); 368 369 open_loop_nbest_pitch(st->sw, st->min_pitch, st->max_pitch, st->frameSize, 370 nol_pitch, nol_pitch_coef, 6, stack); 371 ol_pitch=nol_pitch[0]; 372 ol_pitch_coef = nol_pitch_coef[0]; 373 /*Try to remove pitch multiples*/ 374 for (i=1;i<6;i++) 375 { 376 #ifdef FIXED_POINT 377 if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],27853)) && 378 #else 379 if ((nol_pitch_coef[i]>.85*nol_pitch_coef[0]) && 380 #endif 381 (ABS(2*nol_pitch[i]-ol_pitch)<=2 || ABS(3*nol_pitch[i]-ol_pitch)<=3 || 382 ABS(4*nol_pitch[i]-ol_pitch)<=4 || ABS(5*nol_pitch[i]-ol_pitch)<=5)) 383 { 384 /*ol_pitch_coef=nol_pitch_coef[i];*/ 385 ol_pitch = nol_pitch[i]; 386 } 387 } 388 /*if (ol_pitch>50) 389 ol_pitch/=2;*/ 390 /*ol_pitch_coef = sqrt(ol_pitch_coef);*/ 391 392 } else { 393 ol_pitch=0; 394 ol_pitch_coef=0; 395 } 396 397 /*Compute "real" excitation*/ 398 SPEEX_COPY(st->exc, st->winBuf, diff); 399 SPEEX_COPY(st->exc+diff, in, st->frameSize-diff); 400 fir_mem16(st->exc, interp_lpc, st->exc, st->frameSize, st->lpcSize, st->mem_exc, stack); 401 402 /* Compute open-loop excitation gain */ 403 { 404 spx_word16_t g = compute_rms16(st->exc, st->frameSize); 405 if (st->submodeID!=1 && ol_pitch>0) 406 ol_gain = MULT16_16(g, MULT16_16_Q14(QCONST16(1.1,14), 407 spx_sqrt(QCONST32(1.,28)-MULT16_32_Q15(QCONST16(.8,15),SHL32(MULT16_16(ol_pitch_coef,ol_pitch_coef),16))))); 408 else 409 ol_gain = SHL32(EXTEND32(g),SIG_SHIFT); 410 } 411 } 412 413 #ifdef VORBIS_PSYCHO 414 SPEEX_MOVE(st->psy_window, st->psy_window+st->frameSize, 256-st->frameSize); 415 SPEEX_COPY(&st->psy_window[256-st->frameSize], in, st->frameSize); 416 compute_curve(st->psy, st->psy_window, st->curve); 417 /*print_vec(st->curve, 128, "curve");*/ 418 if (st->first) 419 SPEEX_COPY(st->old_curve, st->curve, 128); 420 #endif 421 422 /*VBR stuff*/ 423 #ifndef DISABLE_VBR 424 if (st->vbr && (st->vbr_enabled||st->vad_enabled)) 425 { 426 float lsp_dist=0; 427 for (i=0;i<st->lpcSize;i++) 428 lsp_dist += (st->old_lsp[i] - lsp[i])*(st->old_lsp[i] - lsp[i]); 429 lsp_dist /= LSP_SCALING*LSP_SCALING; 430 431 if (st->abr_enabled) 432 { 433 float qual_change=0; 434 if (st->abr_drift2 * st->abr_drift > 0) 435 { 436 /* Only adapt if long-term and short-term drift are the same sign */ 437 qual_change = -.00001*st->abr_drift/(1+st->abr_count); 438 if (qual_change>.05) 439 qual_change=.05; 440 if (qual_change<-.05) 441 qual_change=-.05; 442 } 443 st->vbr_quality += qual_change; 444 if (st->vbr_quality>10) 445 st->vbr_quality=10; 446 if (st->vbr_quality<0) 447 st->vbr_quality=0; 448 } 449 450 st->relative_quality = vbr_analysis(st->vbr, in, st->frameSize, ol_pitch, GAIN_SCALING_1*ol_pitch_coef); 451 /*if (delta_qual<0)*/ 452 /* delta_qual*=.1*(3+st->vbr_quality);*/ 453 if (st->vbr_enabled) 454 { 455 spx_int32_t mode; 456 int choice=0; 457 float min_diff=100; 458 mode = 8; 459 while (mode) 460 { 461 int v1; 462 float thresh; 463 v1=(int)floor(st->vbr_quality); 464 if (v1==10) 465 thresh = vbr_nb_thresh[mode][v1]; 466 else 467 thresh = (st->vbr_quality-v1)*vbr_nb_thresh[mode][v1+1] + (1+v1-st->vbr_quality)*vbr_nb_thresh[mode][v1]; 468 if (st->relative_quality > thresh && 469 st->relative_quality-thresh<min_diff) 470 { 471 choice = mode; 472 min_diff = st->relative_quality-thresh; 473 } 474 mode--; 475 } 476 mode=choice; 477 if (mode==0) 478 { 479 if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20) 480 { 481 mode=1; 482 st->dtx_count=1; 483 } else { 484 mode=0; 485 st->dtx_count++; 486 } 487 } else { 488 st->dtx_count=0; 489 } 490 491 speex_encoder_ctl(state, SPEEX_SET_MODE, &mode); 492 if (st->vbr_max>0) 493 { 494 spx_int32_t rate; 495 speex_encoder_ctl(state, SPEEX_GET_BITRATE, &rate); 496 if (rate > st->vbr_max) 497 { 498 rate = st->vbr_max; 499 speex_encoder_ctl(state, SPEEX_SET_BITRATE, &rate); 500 } 501 } 502 503 if (st->abr_enabled) 504 { 505 spx_int32_t bitrate; 506 speex_encoder_ctl(state, SPEEX_GET_BITRATE, &bitrate); 507 st->abr_drift+=(bitrate-st->abr_enabled); 508 st->abr_drift2 = .95*st->abr_drift2 + .05*(bitrate-st->abr_enabled); 509 st->abr_count += 1.0; 510 } 511 512 } else { 513 /*VAD only case*/ 514 int mode; 515 if (st->relative_quality<2) 516 { 517 if (st->dtx_count==0 || lsp_dist>.05 || !st->dtx_enabled || st->dtx_count>20) 518 { 519 st->dtx_count=1; 520 mode=1; 521 } else { 522 mode=0; 523 st->dtx_count++; 524 } 525 } else { 526 st->dtx_count = 0; 527 mode=st->submodeSelect; 528 } 529 /*speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);*/ 530 st->submodeID=mode; 531 } 532 } else { 533 st->relative_quality = -1; 534 } 535 #endif /* #ifndef DISABLE_VBR */ 536 537 if (st->encode_submode) 538 { 539 /* First, transmit a zero for narrowband */ 540 speex_bits_pack(bits, 0, 1); 541 542 /* Transmit the sub-mode we use for this frame */ 543 speex_bits_pack(bits, st->submodeID, NB_SUBMODE_BITS); 544 545 } 546 547 /* If null mode (no transmission), just set a couple things to zero*/ 548 if (st->submodes[st->submodeID] == NULL) 549 { 550 for (i=0;i<st->frameSize;i++) 551 st->exc[i]=st->sw[i]=VERY_SMALL; 552 553 for (i=0;i<st->lpcSize;i++) 554 st->mem_sw[i]=0; 555 st->first=1; 556 st->bounded_pitch = 1; 557 558 SPEEX_COPY(st->winBuf, in+2*st->frameSize-st->windowSize, st->windowSize-st->frameSize); 559 560 /* Clear memory (no need to really compute it) */ 561 for (i=0;i<st->lpcSize;i++) 562 st->mem_sp[i] = 0; 563 return 0; 564 565 } 566 567 /* LSP Quantization */ 568 if (st->first) 569 { 570 for (i=0;i<st->lpcSize;i++) 571 st->old_lsp[i] = lsp[i]; 572 } 573 574 575 /*Quantize LSPs*/ 576 #if 1 /*0 for unquantized*/ 577 SUBMODE(lsp_quant)(lsp, qlsp, st->lpcSize, bits); 578 #else 579 for (i=0;i<st->lpcSize;i++) 580 qlsp[i]=lsp[i]; 581 #endif 582 583 /*If we use low bit-rate pitch mode, transmit open-loop pitch*/ 584 if (SUBMODE(lbr_pitch)!=-1) 585 { 586 speex_bits_pack(bits, ol_pitch-st->min_pitch, 7); 587 } 588 589 if (SUBMODE(forced_pitch_gain)) 590 { 591 int quant; 592 /* This just damps the pitch a bit, because it tends to be too aggressive when forced */ 593 ol_pitch_coef = MULT16_16_Q15(QCONST16(.9,15), ol_pitch_coef); 594 #ifdef FIXED_POINT 595 quant = PSHR16(MULT16_16_16(15, ol_pitch_coef),GAIN_SHIFT); 596 #else 597 quant = (int)floor(.5+15*ol_pitch_coef*GAIN_SCALING_1); 598 #endif 599 if (quant>15) 600 quant=15; 601 if (quant<0) 602 quant=0; 603 speex_bits_pack(bits, quant, 4); 604 ol_pitch_coef=MULT16_16_P15(QCONST16(0.066667,15),SHL16(quant,GAIN_SHIFT)); 605 } 606 607 608 /*Quantize and transmit open-loop excitation gain*/ 609 #ifdef FIXED_POINT 610 { 611 int qe = scal_quant32(ol_gain, ol_gain_table, 32); 612 /*ol_gain = exp(qe/3.5)*SIG_SCALING;*/ 613 ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]); 614 speex_bits_pack(bits, qe, 5); 615 } 616 #else 617 { 618 int qe = (int)(floor(.5+3.5*log(ol_gain*1.0/SIG_SCALING))); 619 if (qe<0) 620 qe=0; 621 if (qe>31) 622 qe=31; 623 ol_gain = exp(qe/3.5)*SIG_SCALING; 624 speex_bits_pack(bits, qe, 5); 625 } 626 #endif 627 628 629 630 /* Special case for first frame */ 631 if (st->first) 632 { 633 for (i=0;i<st->lpcSize;i++) 634 st->old_qlsp[i] = qlsp[i]; 635 } 636 637 /* Target signal */ 638 ALLOC(target, st->subframeSize, spx_word16_t); 639 ALLOC(innov, st->subframeSize, spx_sig_t); 640 ALLOC(exc32, st->subframeSize, spx_word32_t); 641 ALLOC(ringing, st->subframeSize, spx_word16_t); 642 ALLOC(syn_resp, st->subframeSize, spx_word16_t); 643 ALLOC(real_exc, st->subframeSize, spx_word16_t); 644 ALLOC(mem, st->lpcSize, spx_mem_t); 645 646 /* Loop on sub-frames */ 647 for (sub=0;sub<st->nbSubframes;sub++) 648 { 649 int offset; 650 spx_word16_t *sw; 651 spx_word16_t *exc; 652 int pitch; 653 int response_bound = st->subframeSize; 654 655 /* Offset relative to start of frame */ 656 offset = st->subframeSize*sub; 657 /* Excitation */ 658 exc=st->exc+offset; 659 /* Weighted signal */ 660 sw=st->sw+offset; 661 662 /* LSP interpolation (quantized and unquantized) */ 663 lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, sub, st->nbSubframes); 664 lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes); 665 666 /* Make sure the filters are stable */ 667 lsp_enforce_margin(interp_lsp, st->lpcSize, LSP_MARGIN); 668 lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN); 669 670 /* Compute interpolated LPCs (quantized and unquantized) */ 671 lsp_to_lpc(interp_lsp, interp_lpc, st->lpcSize,stack); 672 673 lsp_to_lpc(interp_qlsp, interp_qlpc, st->lpcSize, stack); 674 675 /* Compute analysis filter gain at w=pi (for use in SB-CELP) */ 676 { 677 spx_word32_t pi_g=LPC_SCALING; 678 for (i=0;i<st->lpcSize;i+=2) 679 { 680 /*pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];*/ 681 pi_g = ADD32(pi_g, SUB32(EXTEND32(interp_qlpc[i+1]),EXTEND32(interp_qlpc[i]))); 682 } 683 st->pi_gain[sub] = pi_g; 684 } 685 686 #ifdef VORBIS_PSYCHO 687 { 688 float curr_curve[128]; 689 float fact = ((float)sub+1.0f)/st->nbSubframes; 690 for (i=0;i<128;i++) 691 curr_curve[i] = (1.0f-fact)*st->old_curve[i] + fact*st->curve[i]; 692 curve_to_lpc(st->psy, curr_curve, bw_lpc1, bw_lpc2, 10); 693 } 694 #else 695 /* Compute bandwidth-expanded (unquantized) LPCs for perceptual weighting */ 696 bw_lpc(st->gamma1, interp_lpc, bw_lpc1, st->lpcSize); 697 if (st->gamma2>=0) 698 bw_lpc(st->gamma2, interp_lpc, bw_lpc2, st->lpcSize); 699 else 700 { 701 for (i=0;i<st->lpcSize;i++) 702 bw_lpc2[i]=0; 703 } 704 /*print_vec(st->bw_lpc1, 10, "bw_lpc");*/ 705 #endif 706 707 /*FIXME: This will break if we change the window size */ 708 speex_assert(st->windowSize-st->frameSize == st->subframeSize); 709 if (sub==0) 710 { 711 for (i=0;i<st->subframeSize;i++) 712 real_exc[i] = sw[i] = st->winBuf[i]; 713 } else { 714 for (i=0;i<st->subframeSize;i++) 715 real_exc[i] = sw[i] = in[i+((sub-1)*st->subframeSize)]; 716 } 717 fir_mem16(real_exc, interp_qlpc, real_exc, st->subframeSize, st->lpcSize, st->mem_exc2, stack); 718 719 if (st->complexity==0) 720 response_bound >>= 1; 721 compute_impulse_response(interp_qlpc, bw_lpc1, bw_lpc2, syn_resp, response_bound, st->lpcSize, stack); 722 for (i=response_bound;i<st->subframeSize;i++) 723 syn_resp[i]=VERY_SMALL; 724 725 /* Compute zero response of A(z/g1) / ( A(z/g2) * A(z) ) */ 726 for (i=0;i<st->lpcSize;i++) 727 mem[i]=SHL32(st->mem_sp[i],1); 728 for (i=0;i<st->subframeSize;i++) 729 ringing[i] = VERY_SMALL; 730 #ifdef SHORTCUTS2 731 iir_mem16(ringing, interp_qlpc, ringing, response_bound, st->lpcSize, mem, stack); 732 for (i=0;i<st->lpcSize;i++) 733 mem[i]=SHL32(st->mem_sw[i],1); 734 filter_mem16(ringing, st->bw_lpc1, st->bw_lpc2, ringing, response_bound, st->lpcSize, mem, stack); 735 SPEEX_MEMSET(&ringing[response_bound], 0, st->subframeSize-response_bound); 736 #else 737 iir_mem16(ringing, interp_qlpc, ringing, st->subframeSize, st->lpcSize, mem, stack); 738 for (i=0;i<st->lpcSize;i++) 739 mem[i]=SHL32(st->mem_sw[i],1); 740 filter_mem16(ringing, bw_lpc1, bw_lpc2, ringing, st->subframeSize, st->lpcSize, mem, stack); 741 #endif 742 743 /* Compute weighted signal */ 744 for (i=0;i<st->lpcSize;i++) 745 mem[i]=st->mem_sw[i]; 746 filter_mem16(sw, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, mem, stack); 747 748 if (st->complexity==0) 749 for (i=0;i<st->lpcSize;i++) 750 st->mem_sw[i]=mem[i]; 751 752 /* Compute target signal (saturation prevents overflows on clipped input speech) */ 753 for (i=0;i<st->subframeSize;i++) 754 target[i]=EXTRACT16(SATURATE(SUB32(sw[i],PSHR32(ringing[i],1)),32767)); 755 756 /* Reset excitation */ 757 SPEEX_MEMSET(exc, 0, st->subframeSize); 758 759 /* If we have a long-term predictor (otherwise, something's wrong) */ 760 speex_assert (SUBMODE(ltp_quant)); 761 { 762 int pit_min, pit_max; 763 /* Long-term prediction */ 764 if (SUBMODE(lbr_pitch) != -1) 765 { 766 /* Low bit-rate pitch handling */ 767 int margin; 768 margin = SUBMODE(lbr_pitch); 769 if (margin) 770 { 771 if (ol_pitch < st->min_pitch+margin-1) 772 ol_pitch=st->min_pitch+margin-1; 773 if (ol_pitch > st->max_pitch-margin) 774 ol_pitch=st->max_pitch-margin; 775 pit_min = ol_pitch-margin+1; 776 pit_max = ol_pitch+margin; 777 } else { 778 pit_min=pit_max=ol_pitch; 779 } 780 } else { 781 pit_min = st->min_pitch; 782 pit_max = st->max_pitch; 783 } 784 785 /* Force pitch to use only the current frame if needed */ 786 if (st->bounded_pitch && pit_max>offset) 787 pit_max=offset; 788 789 /* Perform pitch search */ 790 pitch = SUBMODE(ltp_quant)(target, sw, interp_qlpc, bw_lpc1, bw_lpc2, 791 exc32, SUBMODE(ltp_params), pit_min, pit_max, ol_pitch_coef, 792 st->lpcSize, st->subframeSize, bits, stack, 793 exc, syn_resp, st->complexity, 0, st->plc_tuning, &st->cumul_gain); 794 795 st->pitch[sub]=pitch; 796 } 797 /* Quantization of innovation */ 798 SPEEX_MEMSET(innov, 0, st->subframeSize); 799 800 /* FIXME: Make sure this is save from overflows (so far so good) */ 801 for (i=0;i<st->subframeSize;i++) 802 real_exc[i] = EXTRACT16(SUB32(EXTEND32(real_exc[i]), PSHR32(exc32[i],SIG_SHIFT-1))); 803 804 ener = SHL32(EXTEND32(compute_rms16(real_exc, st->subframeSize)),SIG_SHIFT); 805 806 /*FIXME: Should use DIV32_16 and make sure result fits in 16 bits */ 807 #ifdef FIXED_POINT 808 { 809 spx_word32_t f = PDIV32(ener,PSHR32(ol_gain,SIG_SHIFT)); 810 if (f<=32767) 811 fine_gain = f; 812 else 813 fine_gain = 32767; 814 } 815 #else 816 fine_gain = PDIV32_16(ener,PSHR32(ol_gain,SIG_SHIFT)); 817 #endif 818 /* Calculate gain correction for the sub-frame (if any) */ 819 if (SUBMODE(have_subframe_gain)) 820 { 821 int qe; 822 if (SUBMODE(have_subframe_gain)==3) 823 { 824 qe = scal_quant(fine_gain, exc_gain_quant_scal3_bound, 8); 825 speex_bits_pack(bits, qe, 3); 826 ener=MULT16_32_Q14(exc_gain_quant_scal3[qe],ol_gain); 827 } else { 828 qe = scal_quant(fine_gain, exc_gain_quant_scal1_bound, 2); 829 speex_bits_pack(bits, qe, 1); 830 ener=MULT16_32_Q14(exc_gain_quant_scal1[qe],ol_gain); 831 } 832 } else { 833 ener=ol_gain; 834 } 835 836 /*printf ("%f %f\n", ener, ol_gain);*/ 837 838 /* Normalize innovation */ 839 signal_div(target, target, ener, st->subframeSize); 840 841 /* Quantize innovation */ 842 speex_assert (SUBMODE(innovation_quant)); 843 { 844 /* Codebook search */ 845 SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2, 846 SUBMODE(innovation_params), st->lpcSize, st->subframeSize, 847 innov, syn_resp, bits, stack, st->complexity, SUBMODE(double_codebook)); 848 849 /* De-normalize innovation and update excitation */ 850 signal_mul(innov, innov, ener, st->subframeSize); 851 852 for (i=0;i<st->subframeSize;i++) 853 exc[i] = EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767)); 854 855 /* In some (rare) modes, we do a second search (more bits) to reduce noise even more */ 856 if (SUBMODE(double_codebook)) { 857 char *tmp_stack=stack; 858 VARDECL(spx_sig_t *innov2); 859 ALLOC(innov2, st->subframeSize, spx_sig_t); 860 SPEEX_MEMSET(innov2, 0, st->subframeSize); 861 for (i=0;i<st->subframeSize;i++) 862 target[i]=MULT16_16_P13(QCONST16(2.2f,13), target[i]); 863 SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2, 864 SUBMODE(innovation_params), st->lpcSize, st->subframeSize, 865 innov2, syn_resp, bits, stack, st->complexity, 0); 866 signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545f,15),ener), st->subframeSize); 867 for (i=0;i<st->subframeSize;i++) 868 innov[i] = ADD32(innov[i],innov2[i]); 869 stack = tmp_stack; 870 } 871 for (i=0;i<st->subframeSize;i++) 872 exc[i] = EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767)); 873 if (st->innov_rms_save) 874 { 875 st->innov_rms_save[sub] = compute_rms(innov, st->subframeSize); 876 } 877 } 878 879 /* Final signal synthesis from excitation */ 880 iir_mem16(exc, interp_qlpc, sw, st->subframeSize, st->lpcSize, st->mem_sp, stack); 881 882 /* Compute weighted signal again, from synthesized speech (not sure it's the right thing) */ 883 if (st->complexity!=0) 884 filter_mem16(sw, bw_lpc1, bw_lpc2, sw, st->subframeSize, st->lpcSize, st->mem_sw, stack); 885 886 } 887 888 /* Store the LSPs for interpolation in the next frame */ 889 if (st->submodeID>=1) 890 { 891 for (i=0;i<st->lpcSize;i++) 892 st->old_lsp[i] = lsp[i]; 893 for (i=0;i<st->lpcSize;i++) 894 st->old_qlsp[i] = qlsp[i]; 895 } 896 897 #ifdef VORBIS_PSYCHO 898 if (st->submodeID>=1) 899 SPEEX_COPY(st->old_curve, st->curve, 128); 900 #endif 901 902 if (st->submodeID==1) 903 { 904 #ifndef DISABLE_VBR 905 if (st->dtx_count) 906 speex_bits_pack(bits, 15, 4); 907 else 908 #endif 909 speex_bits_pack(bits, 0, 4); 910 } 911 912 /* The next frame will not be the first (Duh!) */ 913 st->first = 0; 914 SPEEX_COPY(st->winBuf, in+2*st->frameSize-st->windowSize, st->windowSize-st->frameSize); 915 916 if (SUBMODE(innovation_quant) == noise_codebook_quant || st->submodeID==0) 917 st->bounded_pitch = 1; 918 else 919 st->bounded_pitch = 0; 920 921 return 1; 922 } 923 924 void *nb_decoder_init(const SpeexMode *m) 925 { 926 DecState *st; 927 const SpeexNBMode *mode; 928 int i; 929 930 mode=(const SpeexNBMode*)m->mode; 931 st = (DecState *)speex_alloc(sizeof(DecState)); 932 if (!st) 933 return NULL; 934 #if defined(VAR_ARRAYS) || defined (USE_ALLOCA) 935 st->stack = NULL; 936 #else 937 st->stack = (char*)speex_alloc_scratch(NB_DEC_STACK); 938 #endif 939 940 st->mode=m; 941 942 943 st->encode_submode = 1; 944 945 st->first=1; 946 /* Codec parameters, should eventually have several "modes"*/ 947 st->frameSize = mode->frameSize; 948 st->nbSubframes=mode->frameSize/mode->subframeSize; 949 st->subframeSize=mode->subframeSize; 950 st->lpcSize = mode->lpcSize; 951 st->min_pitch=mode->pitchStart; 952 st->max_pitch=mode->pitchEnd; 953 954 st->submodes=mode->submodes; 955 st->submodeID=mode->defaultSubmode; 956 957 st->lpc_enh_enabled=1; 958 959 st->excBuf = (spx_word16_t*)speex_alloc((st->frameSize + 2*st->max_pitch + st->subframeSize + 12)*sizeof(spx_word16_t)); 960 st->exc = st->excBuf + 2*st->max_pitch + st->subframeSize + 6; 961 SPEEX_MEMSET(st->excBuf, 0, st->frameSize + st->max_pitch); 962 963 st->interp_qlpc = (spx_coef_t*)speex_alloc(st->lpcSize*sizeof(spx_coef_t)); 964 st->old_qlsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); 965 st->mem_sp = (spx_mem_t*)speex_alloc(st->lpcSize*sizeof(spx_mem_t)); 966 st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t)); 967 st->last_pitch = 40; 968 st->count_lost=0; 969 st->pitch_gain_buf[0] = st->pitch_gain_buf[1] = st->pitch_gain_buf[2] = 0; 970 st->pitch_gain_buf_idx = 0; 971 st->seed = 1000; 972 973 st->sampling_rate=8000; 974 st->last_ol_gain = 0; 975 976 st->user_callback.func = &speex_default_user_handler; 977 st->user_callback.data = NULL; 978 for (i=0;i<16;i++) 979 st->speex_callbacks[i].func = NULL; 980 981 st->voc_m1=st->voc_m2=st->voc_mean=0; 982 st->voc_offset=0; 983 st->dtx_enabled=0; 984 st->isWideband = 0; 985 st->highpass_enabled = 1; 986 987 #ifdef ENABLE_VALGRIND 988 VALGRIND_MAKE_READABLE(st, NB_DEC_STACK); 989 #endif 990 return st; 991 } 992 993 void nb_decoder_destroy(void *state) 994 { 995 DecState *st; 996 st=(DecState*)state; 997 998 #if !(defined(VAR_ARRAYS) || defined (USE_ALLOCA)) 999 speex_free_scratch(st->stack); 1000 #endif 1001 1002 speex_free (st->excBuf); 1003 speex_free (st->interp_qlpc); 1004 speex_free (st->old_qlsp); 1005 speex_free (st->mem_sp); 1006 speex_free (st->pi_gain); 1007 1008 speex_free(state); 1009 } 1010 1011 #define median3(a, b, c) ((a) < (b) ? ((b) < (c) ? (b) : ((a) < (c) ? (c) : (a))) : ((c) < (b) ? (b) : ((c) < (a) ? (c) : (a)))) 1012 1013 #ifdef FIXED_POINT 1014 const spx_word16_t attenuation[10] = {32767, 31483, 27923, 22861, 17278, 12055, 7764, 4616, 2533, 1283}; 1015 #else 1016 const spx_word16_t attenuation[10] = {1., 0.961, 0.852, 0.698, 0.527, 0.368, 0.237, 0.141, 0.077, 0.039}; 1017 1018 #endif 1019 1020 static void nb_decode_lost(DecState *st, spx_word16_t *out, char *stack) 1021 { 1022 int i; 1023 int pitch_val; 1024 spx_word16_t pitch_gain; 1025 spx_word16_t fact; 1026 spx_word16_t gain_med; 1027 spx_word16_t innov_gain; 1028 spx_word16_t noise_gain; 1029 1030 if (st->count_lost<10) 1031 fact = attenuation[st->count_lost]; 1032 else 1033 fact = 0; 1034 1035 gain_med = median3(st->pitch_gain_buf[0], st->pitch_gain_buf[1], st->pitch_gain_buf[2]); 1036 if (gain_med < st->last_pitch_gain) 1037 st->last_pitch_gain = gain_med; 1038 1039 #ifdef FIXED_POINT 1040 pitch_gain = st->last_pitch_gain; 1041 if (pitch_gain>54) 1042 pitch_gain = 54; 1043 pitch_gain = SHL16(pitch_gain, 9); 1044 #else 1045 pitch_gain = GAIN_SCALING_1*st->last_pitch_gain; 1046 if (pitch_gain>.85) 1047 pitch_gain=.85; 1048 #endif 1049 pitch_gain = MULT16_16_Q15(fact,pitch_gain) + VERY_SMALL; 1050 /* FIXME: This was rms of innovation (not exc) */ 1051 innov_gain = compute_rms16(st->exc, st->frameSize); 1052 noise_gain = MULT16_16_Q15(innov_gain, MULT16_16_Q15(fact, SUB16(Q15ONE,MULT16_16_Q15(pitch_gain,pitch_gain)))); 1053 /* Shift all buffers by one frame */ 1054 SPEEX_MOVE(st->excBuf, st->excBuf+st->frameSize, 2*st->max_pitch + st->subframeSize + 12); 1055 1056 1057 pitch_val = st->last_pitch + SHR32((spx_int32_t)speex_rand(1+st->count_lost, &st->seed),SIG_SHIFT); 1058 if (pitch_val > st->max_pitch) 1059 pitch_val = st->max_pitch; 1060 if (pitch_val < st->min_pitch) 1061 pitch_val = st->min_pitch; 1062 for (i=0;i<st->frameSize;i++) 1063 { 1064 st->exc[i]= MULT16_16_Q15(pitch_gain, (st->exc[i-pitch_val]+VERY_SMALL)) + 1065 speex_rand(noise_gain, &st->seed); 1066 } 1067 1068 bw_lpc(QCONST16(.98,15), st->interp_qlpc, st->interp_qlpc, st->lpcSize); 1069 iir_mem16(&st->exc[-st->subframeSize], st->interp_qlpc, out, st->frameSize, 1070 st->lpcSize, st->mem_sp, stack); 1071 highpass(out, out, st->frameSize, HIGHPASS_NARROWBAND|HIGHPASS_OUTPUT, st->mem_hp); 1072 1073 st->first = 0; 1074 st->count_lost++; 1075 st->pitch_gain_buf[st->pitch_gain_buf_idx++] = PSHR16(pitch_gain,9); 1076 if (st->pitch_gain_buf_idx > 2) /* rollover */ 1077 st->pitch_gain_buf_idx = 0; 1078 } 1079 1080 /* Just so we don't need to carry the complete wideband mode information */ 1081 static const int wb_skip_table[8] = {0, 36, 112, 192, 352, 0, 0, 0}; 1082 1083 int nb_decode(void *state, SpeexBits *bits, void *vout) 1084 { 1085 DecState *st; 1086 int i, sub; 1087 int pitch; 1088 spx_word16_t pitch_gain[3]; 1089 spx_word32_t ol_gain=0; 1090 int ol_pitch=0; 1091 spx_word16_t ol_pitch_coef=0; 1092 int best_pitch=40; 1093 spx_word16_t best_pitch_gain=0; 1094 int wideband; 1095 int m; 1096 char *stack; 1097 VARDECL(spx_sig_t *innov); 1098 VARDECL(spx_word32_t *exc32); 1099 VARDECL(spx_coef_t *ak); 1100 VARDECL(spx_lsp_t *qlsp); 1101 spx_word16_t pitch_average=0; 1102 1103 spx_word16_t *out = (spx_word16_t*)vout; 1104 VARDECL(spx_lsp_t *interp_qlsp); 1105 1106 st=(DecState*)state; 1107 stack=st->stack; 1108 1109 /* Check if we're in DTX mode*/ 1110 if (!bits && st->dtx_enabled) 1111 { 1112 st->submodeID=0; 1113 } else 1114 { 1115 /* If bits is NULL, consider the packet to be lost (what could we do anyway) */ 1116 if (!bits) 1117 { 1118 nb_decode_lost(st, out, stack); 1119 return 0; 1120 } 1121 1122 if (st->encode_submode) 1123 { 1124 1125 /* Search for next narrowband block (handle requests, skip wideband blocks) */ 1126 do { 1127 if (speex_bits_remaining(bits)<5) 1128 return -1; 1129 wideband = speex_bits_unpack_unsigned(bits, 1); 1130 if (wideband) /* Skip wideband block (for compatibility) */ 1131 { 1132 int submode; 1133 int advance; 1134 advance = submode = speex_bits_unpack_unsigned(bits, SB_SUBMODE_BITS); 1135 /*speex_mode_query(&speex_wb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance);*/ 1136 advance = wb_skip_table[submode]; 1137 if (advance < 0) 1138 { 1139 speex_notify("Invalid mode encountered. The stream is corrupted."); 1140 return -2; 1141 } 1142 advance -= (SB_SUBMODE_BITS+1); 1143 speex_bits_advance(bits, advance); 1144 1145 if (speex_bits_remaining(bits)<5) 1146 return -1; 1147 wideband = speex_bits_unpack_unsigned(bits, 1); 1148 if (wideband) 1149 { 1150 advance = submode = speex_bits_unpack_unsigned(bits, SB_SUBMODE_BITS); 1151 /*speex_mode_query(&speex_wb_mode, SPEEX_SUBMODE_BITS_PER_FRAME, &advance);*/ 1152 advance = wb_skip_table[submode]; 1153 if (advance < 0) 1154 { 1155 speex_notify("Invalid mode encountered. The stream is corrupted."); 1156 return -2; 1157 } 1158 advance -= (SB_SUBMODE_BITS+1); 1159 speex_bits_advance(bits, advance); 1160 wideband = speex_bits_unpack_unsigned(bits, 1); 1161 if (wideband) 1162 { 1163 speex_notify("More than two wideband layers found. The stream is corrupted."); 1164 return -2; 1165 } 1166 1167 } 1168 } 1169 if (speex_bits_remaining(bits)<4) 1170 return -1; 1171 /* FIXME: Check for overflow */ 1172 m = speex_bits_unpack_unsigned(bits, 4); 1173 if (m==15) /* We found a terminator */ 1174 { 1175 return -1; 1176 } else if (m==14) /* Speex in-band request */ 1177 { 1178 int ret = speex_inband_handler(bits, st->speex_callbacks, state); 1179 if (ret) 1180 return ret; 1181 } else if (m==13) /* User in-band request */ 1182 { 1183 int ret = st->user_callback.func(bits, state, st->user_callback.data); 1184 if (ret) 1185 return ret; 1186 } else if (m>8) /* Invalid mode */ 1187 { 1188 speex_notify("Invalid mode encountered. The stream is corrupted."); 1189 return -2; 1190 } 1191 1192 } while (m>8); 1193 1194 /* Get the sub-mode that was used */ 1195 st->submodeID = m; 1196 } 1197 1198 } 1199 1200 /* Shift all buffers by one frame */ 1201 SPEEX_MOVE(st->excBuf, st->excBuf+st->frameSize, 2*st->max_pitch + st->subframeSize + 12); 1202 1203 /* If null mode (no transmission), just set a couple things to zero*/ 1204 if (st->submodes[st->submodeID] == NULL) 1205 { 1206 VARDECL(spx_coef_t *lpc); 1207 ALLOC(lpc, st->lpcSize, spx_coef_t); 1208 bw_lpc(QCONST16(0.93f,15), st->interp_qlpc, lpc, st->lpcSize); 1209 { 1210 spx_word16_t innov_gain=0; 1211 /* FIXME: This was innov, not exc */ 1212 innov_gain = compute_rms16(st->exc, st->frameSize); 1213 for (i=0;i<st->frameSize;i++) 1214 st->exc[i]=speex_rand(innov_gain, &st->seed); 1215 } 1216 1217 1218 st->first=1; 1219 1220 /* Final signal synthesis from excitation */ 1221 iir_mem16(st->exc, lpc, out, st->frameSize, st->lpcSize, st->mem_sp, stack); 1222 1223 st->count_lost=0; 1224 return 0; 1225 } 1226 1227 ALLOC(qlsp, st->lpcSize, spx_lsp_t); 1228 1229 /* Unquantize LSPs */ 1230 SUBMODE(lsp_unquant)(qlsp, st->lpcSize, bits); 1231 1232 /*Damp memory if a frame was lost and the LSP changed too much*/ 1233 if (st->count_lost) 1234 { 1235 spx_word16_t fact; 1236 spx_word32_t lsp_dist=0; 1237 for (i=0;i<st->lpcSize;i++) 1238 lsp_dist = ADD32(lsp_dist, EXTEND32(ABS(st->old_qlsp[i] - qlsp[i]))); 1239 #ifdef FIXED_POINT 1240 fact = SHR16(19661,SHR32(lsp_dist,LSP_SHIFT+2)); 1241 #else 1242 fact = .6*exp(-.2*lsp_dist); 1243 #endif 1244 for (i=0;i<st->lpcSize;i++) 1245 st->mem_sp[i] = MULT16_32_Q15(fact,st->mem_sp[i]); 1246 } 1247 1248 1249 /* Handle first frame and lost-packet case */ 1250 if (st->first || st->count_lost) 1251 { 1252 for (i=0;i<st->lpcSize;i++) 1253 st->old_qlsp[i] = qlsp[i]; 1254 } 1255 1256 /* Get open-loop pitch estimation for low bit-rate pitch coding */ 1257 if (SUBMODE(lbr_pitch)!=-1) 1258 { 1259 ol_pitch = st->min_pitch+speex_bits_unpack_unsigned(bits, 7); 1260 } 1261 1262 if (SUBMODE(forced_pitch_gain)) 1263 { 1264 int quant; 1265 quant = speex_bits_unpack_unsigned(bits, 4); 1266 ol_pitch_coef=MULT16_16_P15(QCONST16(0.066667,15),SHL16(quant,GAIN_SHIFT)); 1267 } 1268 1269 /* Get global excitation gain */ 1270 { 1271 int qe; 1272 qe = speex_bits_unpack_unsigned(bits, 5); 1273 #ifdef FIXED_POINT 1274 /* FIXME: Perhaps we could slightly lower the gain here when the output is going to saturate? */ 1275 ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]); 1276 #else 1277 ol_gain = SIG_SCALING*exp(qe/3.5); 1278 #endif 1279 } 1280 1281 ALLOC(ak, st->lpcSize, spx_coef_t); 1282 ALLOC(innov, st->subframeSize, spx_sig_t); 1283 ALLOC(exc32, st->subframeSize, spx_word32_t); 1284 1285 if (st->submodeID==1) 1286 { 1287 int extra; 1288 extra = speex_bits_unpack_unsigned(bits, 4); 1289 1290 if (extra==15) 1291 st->dtx_enabled=1; 1292 else 1293 st->dtx_enabled=0; 1294 } 1295 if (st->submodeID>1) 1296 st->dtx_enabled=0; 1297 1298 /*Loop on subframes */ 1299 for (sub=0;sub<st->nbSubframes;sub++) 1300 { 1301 int offset; 1302 spx_word16_t *exc; 1303 spx_word16_t *sp; 1304 spx_word16_t *innov_save = NULL; 1305 spx_word16_t tmp; 1306 1307 /* Offset relative to start of frame */ 1308 offset = st->subframeSize*sub; 1309 /* Excitation */ 1310 exc=st->exc+offset; 1311 /* Original signal */ 1312 sp=out+offset; 1313 if (st->innov_save) 1314 innov_save = st->innov_save+offset; 1315 1316 1317 /* Reset excitation */ 1318 SPEEX_MEMSET(exc, 0, st->subframeSize); 1319 1320 /*Adaptive codebook contribution*/ 1321 speex_assert (SUBMODE(ltp_unquant)); 1322 { 1323 int pit_min, pit_max; 1324 /* Handle pitch constraints if any */ 1325 if (SUBMODE(lbr_pitch) != -1) 1326 { 1327 int margin; 1328 margin = SUBMODE(lbr_pitch); 1329 if (margin) 1330 { 1331 /* GT - need optimization? 1332 if (ol_pitch < st->min_pitch+margin-1) 1333 ol_pitch=st->min_pitch+margin-1; 1334 if (ol_pitch > st->max_pitch-margin) 1335 ol_pitch=st->max_pitch-margin; 1336 pit_min = ol_pitch-margin+1; 1337 pit_max = ol_pitch+margin; 1338 */ 1339 pit_min = ol_pitch-margin+1; 1340 if (pit_min < st->min_pitch) 1341 pit_min = st->min_pitch; 1342 pit_max = ol_pitch+margin; 1343 if (pit_max > st->max_pitch) 1344 pit_max = st->max_pitch; 1345 } else { 1346 pit_min = pit_max = ol_pitch; 1347 } 1348 } else { 1349 pit_min = st->min_pitch; 1350 pit_max = st->max_pitch; 1351 } 1352 1353 1354 1355 SUBMODE(ltp_unquant)(exc, exc32, pit_min, pit_max, ol_pitch_coef, SUBMODE(ltp_params), 1356 st->subframeSize, &pitch, &pitch_gain[0], bits, stack, 1357 st->count_lost, offset, st->last_pitch_gain, 0); 1358 1359 /* Ensuring that things aren't blowing up as would happen if e.g. an encoder is 1360 crafting packets to make us produce NaNs and slow down the decoder (vague DoS threat). 1361 We can probably be even more aggressive and limit to 15000 or so. */ 1362 sanitize_values32(exc32, NEG32(QCONST32(32000,SIG_SHIFT-1)), QCONST32(32000,SIG_SHIFT-1), st->subframeSize); 1363 1364 tmp = gain_3tap_to_1tap(pitch_gain); 1365 1366 pitch_average += tmp; 1367 if ((tmp>best_pitch_gain&&ABS(2*best_pitch-pitch)>=3&&ABS(3*best_pitch-pitch)>=4&&ABS(4*best_pitch-pitch)>=5) 1368 || (tmp>MULT16_16_Q15(QCONST16(.6,15),best_pitch_gain)&&(ABS(best_pitch-2*pitch)<3||ABS(best_pitch-3*pitch)<4||ABS(best_pitch-4*pitch)<5)) 1369 || (MULT16_16_Q15(QCONST16(.67,15),tmp)>best_pitch_gain&&(ABS(2*best_pitch-pitch)<3||ABS(3*best_pitch-pitch)<4||ABS(4*best_pitch-pitch)<5)) ) 1370 { 1371 best_pitch = pitch; 1372 if (tmp > best_pitch_gain) 1373 best_pitch_gain = tmp; 1374 } 1375 } 1376 1377 /* Unquantize the innovation */ 1378 { 1379 int q_energy; 1380 spx_word32_t ener; 1381 1382 SPEEX_MEMSET(innov, 0, st->subframeSize); 1383 1384 /* Decode sub-frame gain correction */ 1385 if (SUBMODE(have_subframe_gain)==3) 1386 { 1387 q_energy = speex_bits_unpack_unsigned(bits, 3); 1388 ener = MULT16_32_Q14(exc_gain_quant_scal3[q_energy],ol_gain); 1389 } else if (SUBMODE(have_subframe_gain)==1) 1390 { 1391 q_energy = speex_bits_unpack_unsigned(bits, 1); 1392 ener = MULT16_32_Q14(exc_gain_quant_scal1[q_energy],ol_gain); 1393 } else { 1394 ener = ol_gain; 1395 } 1396 1397 speex_assert (SUBMODE(innovation_unquant)); 1398 { 1399 /*Fixed codebook contribution*/ 1400 SUBMODE(innovation_unquant)(innov, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed); 1401 /* De-normalize innovation and update excitation */ 1402 1403 signal_mul(innov, innov, ener, st->subframeSize); 1404 1405 /* Decode second codebook (only for some modes) */ 1406 if (SUBMODE(double_codebook)) 1407 { 1408 char *tmp_stack=stack; 1409 VARDECL(spx_sig_t *innov2); 1410 ALLOC(innov2, st->subframeSize, spx_sig_t); 1411 SPEEX_MEMSET(innov2, 0, st->subframeSize); 1412 SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed); 1413 signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545f,15),ener), st->subframeSize); 1414 for (i=0;i<st->subframeSize;i++) 1415 innov[i] = ADD32(innov[i], innov2[i]); 1416 stack = tmp_stack; 1417 } 1418 for (i=0;i<st->subframeSize;i++) 1419 exc[i]=EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767)); 1420 /*print_vec(exc, 40, "innov");*/ 1421 if (innov_save) 1422 { 1423 for (i=0;i<st->subframeSize;i++) 1424 innov_save[i] = EXTRACT16(PSHR32(innov[i], SIG_SHIFT)); 1425 } 1426 } 1427 1428 /*Vocoder mode*/ 1429 if (st->submodeID==1) 1430 { 1431 spx_word16_t g=ol_pitch_coef; 1432 g=MULT16_16_P14(QCONST16(1.5f,14),(g-QCONST16(.2f,6))); 1433 if (g<0) 1434 g=0; 1435 if (g>GAIN_SCALING) 1436 g=GAIN_SCALING; 1437 1438 SPEEX_MEMSET(exc, 0, st->subframeSize); 1439 while (st->voc_offset<st->subframeSize) 1440 { 1441 /* exc[st->voc_offset]= g*sqrt(2*ol_pitch)*ol_gain; 1442 Not quite sure why we need the factor of two in the sqrt */ 1443 if (st->voc_offset>=0) 1444 exc[st->voc_offset]=MULT16_16(spx_sqrt(MULT16_16_16(2,ol_pitch)),EXTRACT16(PSHR32(MULT16_16(g,PSHR32(ol_gain,SIG_SHIFT)),6))); 1445 st->voc_offset+=ol_pitch; 1446 } 1447 st->voc_offset -= st->subframeSize; 1448 1449 for (i=0;i<st->subframeSize;i++) 1450 { 1451 spx_word16_t exci=exc[i]; 1452 exc[i]= ADD16(ADD16(MULT16_16_Q15(QCONST16(.7f,15),exc[i]) , MULT16_16_Q15(QCONST16(.3f,15),st->voc_m1)), 1453 SUB16(MULT16_16_Q15(Q15_ONE-MULT16_16_16(QCONST16(.85f,9),g),EXTRACT16(PSHR32(innov[i],SIG_SHIFT))), 1454 MULT16_16_Q15(MULT16_16_16(QCONST16(.15f,9),g),EXTRACT16(PSHR32(st->voc_m2,SIG_SHIFT))) 1455 )); 1456 st->voc_m1 = exci; 1457 st->voc_m2=innov[i]; 1458 st->voc_mean = EXTRACT16(PSHR32(ADD32(MULT16_16(QCONST16(.8f,15),st->voc_mean), MULT16_16(QCONST16(.2f,15),exc[i])), 15)); 1459 exc[i]-=st->voc_mean; 1460 } 1461 } 1462 1463 } 1464 } 1465 1466 ALLOC(interp_qlsp, st->lpcSize, spx_lsp_t); 1467 1468 if (st->lpc_enh_enabled && SUBMODE(comb_gain)>0 && !st->count_lost) 1469 { 1470 multicomb(st->exc-st->subframeSize, out, st->interp_qlpc, st->lpcSize, 2*st->subframeSize, best_pitch, 40, SUBMODE(comb_gain), stack); 1471 multicomb(st->exc+st->subframeSize, out+2*st->subframeSize, st->interp_qlpc, st->lpcSize, 2*st->subframeSize, best_pitch, 40, SUBMODE(comb_gain), stack); 1472 } else { 1473 SPEEX_COPY(out, &st->exc[-st->subframeSize], st->frameSize); 1474 } 1475 1476 /* If the last packet was lost, re-scale the excitation to obtain the same energy as encoded in ol_gain */ 1477 if (st->count_lost) 1478 { 1479 spx_word16_t exc_ener; 1480 spx_word32_t gain32; 1481 spx_word16_t gain; 1482 exc_ener = compute_rms16 (st->exc, st->frameSize); 1483 gain32 = PDIV32(ol_gain, ADD16(exc_ener,1)); 1484 #ifdef FIXED_POINT 1485 if (gain32 > 32767) 1486 gain32 = 32767; 1487 gain = EXTRACT16(gain32); 1488 #else 1489 if (gain32 > 2) 1490 gain32=2; 1491 gain = gain32; 1492 #endif 1493 for (i=0;i<st->frameSize;i++) 1494 { 1495 st->exc[i] = MULT16_16_Q14(gain, st->exc[i]); 1496 out[i]=st->exc[i-st->subframeSize]; 1497 } 1498 } 1499 1500 /*Loop on subframes */ 1501 for (sub=0;sub<st->nbSubframes;sub++) 1502 { 1503 int offset; 1504 spx_word16_t *sp; 1505 spx_word16_t *exc; 1506 /* Offset relative to start of frame */ 1507 offset = st->subframeSize*sub; 1508 /* Original signal */ 1509 sp=out+offset; 1510 /* Excitation */ 1511 exc=st->exc+offset; 1512 1513 /* LSP interpolation (quantized and unquantized) */ 1514 lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpcSize, sub, st->nbSubframes); 1515 1516 /* Make sure the LSP's are stable */ 1517 lsp_enforce_margin(interp_qlsp, st->lpcSize, LSP_MARGIN); 1518 1519 /* Compute interpolated LPCs (unquantized) */ 1520 lsp_to_lpc(interp_qlsp, ak, st->lpcSize, stack); 1521 1522 /* Compute analysis filter at w=pi */ 1523 { 1524 spx_word32_t pi_g=LPC_SCALING; 1525 for (i=0;i<st->lpcSize;i+=2) 1526 { 1527 /*pi_g += -st->interp_qlpc[i] + st->interp_qlpc[i+1];*/ 1528 pi_g = ADD32(pi_g, SUB32(EXTEND32(ak[i+1]),EXTEND32(ak[i]))); 1529 } 1530 st->pi_gain[sub] = pi_g; 1531 } 1532 1533 iir_mem16(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, 1534 st->mem_sp, stack); 1535 1536 for (i=0;i<st->lpcSize;i++) 1537 st->interp_qlpc[i] = ak[i]; 1538 1539 } 1540 1541 if (st->highpass_enabled) 1542 highpass(out, out, st->frameSize, (st->isWideband?HIGHPASS_WIDEBAND:HIGHPASS_NARROWBAND)|HIGHPASS_OUTPUT, st->mem_hp); 1543 /*for (i=0;i<st->frameSize;i++) 1544 printf ("%d\n", (int)st->frame[i]);*/ 1545 1546 /* Tracking output level */ 1547 st->level = 1+PSHR32(ol_gain,SIG_SHIFT); 1548 st->max_level = MAX16(MULT16_16_Q15(QCONST16(.99f,15), st->max_level), st->level); 1549 st->min_level = MIN16(ADD16(1,MULT16_16_Q14(QCONST16(1.01f,14), st->min_level)), st->level); 1550 if (st->max_level < st->min_level+1) 1551 st->max_level = st->min_level+1; 1552 /*printf ("%f %f %f %d\n", og, st->min_level, st->max_level, update);*/ 1553 1554 /* Store the LSPs for interpolation in the next frame */ 1555 for (i=0;i<st->lpcSize;i++) 1556 st->old_qlsp[i] = qlsp[i]; 1557 1558 /* The next frame will not be the first (Duh!) */ 1559 st->first = 0; 1560 st->count_lost=0; 1561 st->last_pitch = best_pitch; 1562 #ifdef FIXED_POINT 1563 st->last_pitch_gain = PSHR16(pitch_average,2); 1564 #else 1565 st->last_pitch_gain = .25*pitch_average; 1566 #endif 1567 st->pitch_gain_buf[st->pitch_gain_buf_idx++] = st->last_pitch_gain; 1568 if (st->pitch_gain_buf_idx > 2) /* rollover */ 1569 st->pitch_gain_buf_idx = 0; 1570 1571 st->last_ol_gain = ol_gain; 1572 1573 return 0; 1574 } 1575 1576 int nb_encoder_ctl(void *state, int request, void *ptr) 1577 { 1578 EncState *st; 1579 st=(EncState*)state; 1580 switch(request) 1581 { 1582 case SPEEX_GET_FRAME_SIZE: 1583 (*(spx_int32_t*)ptr) = st->frameSize; 1584 break; 1585 case SPEEX_SET_LOW_MODE: 1586 case SPEEX_SET_MODE: 1587 st->submodeSelect = st->submodeID = (*(spx_int32_t*)ptr); 1588 break; 1589 case SPEEX_GET_LOW_MODE: 1590 case SPEEX_GET_MODE: 1591 (*(spx_int32_t*)ptr) = st->submodeID; 1592 break; 1593 #ifndef DISABLE_VBR 1594 case SPEEX_SET_VBR: 1595 st->vbr_enabled = (*(spx_int32_t*)ptr); 1596 break; 1597 case SPEEX_GET_VBR: 1598 (*(spx_int32_t*)ptr) = st->vbr_enabled; 1599 break; 1600 case SPEEX_SET_VAD: 1601 st->vad_enabled = (*(spx_int32_t*)ptr); 1602 break; 1603 case SPEEX_GET_VAD: 1604 (*(spx_int32_t*)ptr) = st->vad_enabled; 1605 break; 1606 case SPEEX_SET_DTX: 1607 st->dtx_enabled = (*(spx_int32_t*)ptr); 1608 break; 1609 case SPEEX_GET_DTX: 1610 (*(spx_int32_t*)ptr) = st->dtx_enabled; 1611 break; 1612 case SPEEX_SET_ABR: 1613 st->abr_enabled = (*(spx_int32_t*)ptr); 1614 st->vbr_enabled = st->abr_enabled!=0; 1615 if (st->vbr_enabled) 1616 { 1617 spx_int32_t i=10; 1618 spx_int32_t rate, target; 1619 float vbr_qual; 1620 target = (*(spx_int32_t*)ptr); 1621 while (i>=0) 1622 { 1623 speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i); 1624 speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate); 1625 if (rate <= target) 1626 break; 1627 i--; 1628 } 1629 vbr_qual=i; 1630 if (vbr_qual<0) 1631 vbr_qual=0; 1632 speex_encoder_ctl(st, SPEEX_SET_VBR_QUALITY, &vbr_qual); 1633 st->abr_count=0; 1634 st->abr_drift=0; 1635 st->abr_drift2=0; 1636 } 1637 1638 break; 1639 case SPEEX_GET_ABR: 1640 (*(spx_int32_t*)ptr) = st->abr_enabled; 1641 break; 1642 #endif /* #ifndef DISABLE_VBR */ 1643 #if !defined(DISABLE_VBR) && !defined(DISABLE_FLOAT_API) 1644 case SPEEX_SET_VBR_QUALITY: 1645 st->vbr_quality = (*(float*)ptr); 1646 break; 1647 case SPEEX_GET_VBR_QUALITY: 1648 (*(float*)ptr) = st->vbr_quality; 1649 break; 1650 #endif /* !defined(DISABLE_VBR) && !defined(DISABLE_FLOAT_API) */ 1651 case SPEEX_SET_QUALITY: 1652 { 1653 int quality = (*(spx_int32_t*)ptr); 1654 if (quality < 0) 1655 quality = 0; 1656 if (quality > 10) 1657 quality = 10; 1658 st->submodeSelect = st->submodeID = ((const SpeexNBMode*)(st->mode->mode))->quality_map[quality]; 1659 } 1660 break; 1661 case SPEEX_SET_COMPLEXITY: 1662 st->complexity = (*(spx_int32_t*)ptr); 1663 if (st->complexity<0) 1664 st->complexity=0; 1665 break; 1666 case SPEEX_GET_COMPLEXITY: 1667 (*(spx_int32_t*)ptr) = st->complexity; 1668 break; 1669 case SPEEX_SET_BITRATE: 1670 { 1671 spx_int32_t i=10; 1672 spx_int32_t rate, target; 1673 target = (*(spx_int32_t*)ptr); 1674 while (i>=0) 1675 { 1676 speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i); 1677 speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate); 1678 if (rate <= target) 1679 break; 1680 i--; 1681 } 1682 } 1683 break; 1684 case SPEEX_GET_BITRATE: 1685 if (st->submodes[st->submodeID]) 1686 (*(spx_int32_t*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize; 1687 else 1688 (*(spx_int32_t*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize; 1689 break; 1690 case SPEEX_SET_SAMPLING_RATE: 1691 st->sampling_rate = (*(spx_int32_t*)ptr); 1692 break; 1693 case SPEEX_GET_SAMPLING_RATE: 1694 (*(spx_int32_t*)ptr)=st->sampling_rate; 1695 break; 1696 case SPEEX_RESET_STATE: 1697 { 1698 int i; 1699 st->bounded_pitch = 1; 1700 st->first = 1; 1701 for (i=0;i<st->lpcSize;i++) 1702 st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), st->lpcSize+1); 1703 for (i=0;i<st->lpcSize;i++) 1704 st->mem_sw[i]=st->mem_sw_whole[i]=st->mem_sp[i]=st->mem_exc[i]=0; 1705 for (i=0;i<st->frameSize+st->max_pitch+1;i++) 1706 st->excBuf[i]=st->swBuf[i]=0; 1707 for (i=0;i<st->windowSize-st->frameSize;i++) 1708 st->winBuf[i]=0; 1709 } 1710 break; 1711 case SPEEX_SET_SUBMODE_ENCODING: 1712 st->encode_submode = (*(spx_int32_t*)ptr); 1713 break; 1714 case SPEEX_GET_SUBMODE_ENCODING: 1715 (*(spx_int32_t*)ptr) = st->encode_submode; 1716 break; 1717 case SPEEX_GET_LOOKAHEAD: 1718 (*(spx_int32_t*)ptr)=(st->windowSize-st->frameSize); 1719 break; 1720 case SPEEX_SET_PLC_TUNING: 1721 st->plc_tuning = (*(spx_int32_t*)ptr); 1722 if (st->plc_tuning>100) 1723 st->plc_tuning=100; 1724 break; 1725 case SPEEX_GET_PLC_TUNING: 1726 (*(spx_int32_t*)ptr)=(st->plc_tuning); 1727 break; 1728 #ifndef DISABLE_VBR 1729 case SPEEX_SET_VBR_MAX_BITRATE: 1730 st->vbr_max = (*(spx_int32_t*)ptr); 1731 break; 1732 case SPEEX_GET_VBR_MAX_BITRATE: 1733 (*(spx_int32_t*)ptr) = st->vbr_max; 1734 break; 1735 #endif /* #ifndef DISABLE_VBR */ 1736 case SPEEX_SET_HIGHPASS: 1737 st->highpass_enabled = (*(spx_int32_t*)ptr); 1738 break; 1739 case SPEEX_GET_HIGHPASS: 1740 (*(spx_int32_t*)ptr) = st->highpass_enabled; 1741 break; 1742 1743 /* This is all internal stuff past this point */ 1744 case SPEEX_GET_PI_GAIN: 1745 { 1746 int i; 1747 spx_word32_t *g = (spx_word32_t*)ptr; 1748 for (i=0;i<st->nbSubframes;i++) 1749 g[i]=st->pi_gain[i]; 1750 } 1751 break; 1752 case SPEEX_GET_EXC: 1753 { 1754 int i; 1755 for (i=0;i<st->nbSubframes;i++) 1756 ((spx_word16_t*)ptr)[i] = compute_rms16(st->exc+i*st->subframeSize, st->subframeSize); 1757 } 1758 break; 1759 #ifndef DISABLE_VBR 1760 case SPEEX_GET_RELATIVE_QUALITY: 1761 (*(float*)ptr)=st->relative_quality; 1762 break; 1763 #endif /* #ifndef DISABLE_VBR */ 1764 case SPEEX_SET_INNOVATION_SAVE: 1765 st->innov_rms_save = (spx_word16_t*)ptr; 1766 break; 1767 case SPEEX_SET_WIDEBAND: 1768 st->isWideband = *((spx_int32_t*)ptr); 1769 break; 1770 case SPEEX_GET_STACK: 1771 *((char**)ptr) = st->stack; 1772 break; 1773 default: 1774 speex_warning_int("Unknown nb_ctl request: ", request); 1775 return -1; 1776 } 1777 return 0; 1778 } 1779 1780 int nb_decoder_ctl(void *state, int request, void *ptr) 1781 { 1782 DecState *st; 1783 st=(DecState*)state; 1784 switch(request) 1785 { 1786 case SPEEX_SET_LOW_MODE: 1787 case SPEEX_SET_MODE: 1788 st->submodeID = (*(spx_int32_t*)ptr); 1789 break; 1790 case SPEEX_GET_LOW_MODE: 1791 case SPEEX_GET_MODE: 1792 (*(spx_int32_t*)ptr) = st->submodeID; 1793 break; 1794 case SPEEX_SET_ENH: 1795 st->lpc_enh_enabled = *((spx_int32_t*)ptr); 1796 break; 1797 case SPEEX_GET_ENH: 1798 *((spx_int32_t*)ptr) = st->lpc_enh_enabled; 1799 break; 1800 case SPEEX_GET_FRAME_SIZE: 1801 (*(spx_int32_t*)ptr) = st->frameSize; 1802 break; 1803 case SPEEX_GET_BITRATE: 1804 if (st->submodes[st->submodeID]) 1805 (*(spx_int32_t*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize; 1806 else 1807 (*(spx_int32_t*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize; 1808 break; 1809 case SPEEX_SET_SAMPLING_RATE: 1810 st->sampling_rate = (*(spx_int32_t*)ptr); 1811 break; 1812 case SPEEX_GET_SAMPLING_RATE: 1813 (*(spx_int32_t*)ptr)=st->sampling_rate; 1814 break; 1815 case SPEEX_SET_HANDLER: 1816 { 1817 SpeexCallback *c = (SpeexCallback*)ptr; 1818 st->speex_callbacks[c->callback_id].func=c->func; 1819 st->speex_callbacks[c->callback_id].data=c->data; 1820 st->speex_callbacks[c->callback_id].callback_id=c->callback_id; 1821 } 1822 break; 1823 case SPEEX_SET_USER_HANDLER: 1824 { 1825 SpeexCallback *c = (SpeexCallback*)ptr; 1826 st->user_callback.func=c->func; 1827 st->user_callback.data=c->data; 1828 st->user_callback.callback_id=c->callback_id; 1829 } 1830 break; 1831 case SPEEX_RESET_STATE: 1832 { 1833 int i; 1834 for (i=0;i<st->lpcSize;i++) 1835 st->mem_sp[i]=0; 1836 for (i=0;i<st->frameSize + st->max_pitch + 1;i++) 1837 st->excBuf[i]=0; 1838 } 1839 break; 1840 case SPEEX_SET_SUBMODE_ENCODING: 1841 st->encode_submode = (*(spx_int32_t*)ptr); 1842 break; 1843 case SPEEX_GET_SUBMODE_ENCODING: 1844 (*(spx_int32_t*)ptr) = st->encode_submode; 1845 break; 1846 case SPEEX_GET_LOOKAHEAD: 1847 (*(spx_int32_t*)ptr)=st->subframeSize; 1848 break; 1849 case SPEEX_SET_HIGHPASS: 1850 st->highpass_enabled = (*(spx_int32_t*)ptr); 1851 break; 1852 case SPEEX_GET_HIGHPASS: 1853 (*(spx_int32_t*)ptr) = st->highpass_enabled; 1854 break; 1855 /* FIXME: Convert to fixed-point and re-enable even when float API is disabled */ 1856 #ifndef DISABLE_FLOAT_API 1857 case SPEEX_GET_ACTIVITY: 1858 { 1859 float ret; 1860 ret = log(st->level/st->min_level)/log(st->max_level/st->min_level); 1861 if (ret>1) 1862 ret = 1; 1863 /* Done in a strange way to catch NaNs as well */ 1864 if (!(ret > 0)) 1865 ret = 0; 1866 /*printf ("%f %f %f %f\n", st->level, st->min_level, st->max_level, ret);*/ 1867 (*(spx_int32_t*)ptr) = (int)(100*ret); 1868 } 1869 break; 1870 #endif 1871 case SPEEX_GET_PI_GAIN: 1872 { 1873 int i; 1874 spx_word32_t *g = (spx_word32_t*)ptr; 1875 for (i=0;i<st->nbSubframes;i++) 1876 g[i]=st->pi_gain[i]; 1877 } 1878 break; 1879 case SPEEX_GET_EXC: 1880 { 1881 int i; 1882 for (i=0;i<st->nbSubframes;i++) 1883 ((spx_word16_t*)ptr)[i] = compute_rms16(st->exc+i*st->subframeSize, st->subframeSize); 1884 } 1885 break; 1886 case SPEEX_GET_DTX_STATUS: 1887 *((spx_int32_t*)ptr) = st->dtx_enabled; 1888 break; 1889 case SPEEX_SET_INNOVATION_SAVE: 1890 st->innov_save = (spx_word16_t*)ptr; 1891 break; 1892 case SPEEX_SET_WIDEBAND: 1893 st->isWideband = *((spx_int32_t*)ptr); 1894 break; 1895 case SPEEX_GET_STACK: 1896 *((char**)ptr) = st->stack; 1897 break; 1898 default: 1899 speex_warning_int("Unknown nb_ctl request: ", request); 1900 return -1; 1901 } 1902 return 0; 1903 } 1904