1 /* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited 2 Written by Jean-Marc Valin and Koen Vos */ 3 /* 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 8 - Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 11 - Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 32 #include <stdarg.h> 33 #include "celt.h" 34 #include "entenc.h" 35 #include "modes.h" 36 #include "API.h" 37 #include "stack_alloc.h" 38 #include "float_cast.h" 39 #include "opus.h" 40 #include "arch.h" 41 #include "opus_private.h" 42 #include "os_support.h" 43 #include "cpu_support.h" 44 #include "analysis.h" 45 #include "mathops.h" 46 #include "tuning_parameters.h" 47 #ifdef FIXED_POINT 48 #include "fixed/structs_FIX.h" 49 #else 50 #include "float/structs_FLP.h" 51 #endif 52 53 #define MAX_ENCODER_BUFFER 480 54 55 typedef struct { 56 opus_val32 XX, XY, YY; 57 opus_val16 smoothed_width; 58 opus_val16 max_follower; 59 } StereoWidthState; 60 61 struct OpusEncoder { 62 int celt_enc_offset; 63 int silk_enc_offset; 64 silk_EncControlStruct silk_mode; 65 int application; 66 int channels; 67 int delay_compensation; 68 int force_channels; 69 int signal_type; 70 int user_bandwidth; 71 int max_bandwidth; 72 int user_forced_mode; 73 int voice_ratio; 74 opus_int32 Fs; 75 int use_vbr; 76 int vbr_constraint; 77 int variable_duration; 78 opus_int32 bitrate_bps; 79 opus_int32 user_bitrate_bps; 80 int lsb_depth; 81 int encoder_buffer; 82 int lfe; 83 84 #define OPUS_ENCODER_RESET_START stream_channels 85 int stream_channels; 86 opus_int16 hybrid_stereo_width_Q14; 87 opus_int32 variable_HP_smth2_Q15; 88 opus_val16 prev_HB_gain; 89 opus_val32 hp_mem[4]; 90 int mode; 91 int prev_mode; 92 int prev_channels; 93 int prev_framesize; 94 int bandwidth; 95 int silk_bw_switch; 96 /* Sampling rate (at the API level) */ 97 int first; 98 opus_val16 * energy_masking; 99 StereoWidthState width_mem; 100 opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; 101 #ifndef DISABLE_FLOAT_API 102 TonalityAnalysisState analysis; 103 int detected_bandwidth; 104 int analysis_offset; 105 #endif 106 opus_uint32 rangeFinal; 107 int arch; 108 }; 109 110 /* Transition tables for the voice and music. First column is the 111 middle (memoriless) threshold. The second column is the hysteresis 112 (difference with the middle) */ 113 static const opus_int32 mono_voice_bandwidth_thresholds[8] = { 114 11000, 1000, /* NB<->MB */ 115 14000, 1000, /* MB<->WB */ 116 17000, 1000, /* WB<->SWB */ 117 21000, 2000, /* SWB<->FB */ 118 }; 119 static const opus_int32 mono_music_bandwidth_thresholds[8] = { 120 12000, 1000, /* NB<->MB */ 121 15000, 1000, /* MB<->WB */ 122 18000, 2000, /* WB<->SWB */ 123 22000, 2000, /* SWB<->FB */ 124 }; 125 static const opus_int32 stereo_voice_bandwidth_thresholds[8] = { 126 11000, 1000, /* NB<->MB */ 127 14000, 1000, /* MB<->WB */ 128 21000, 2000, /* WB<->SWB */ 129 28000, 2000, /* SWB<->FB */ 130 }; 131 static const opus_int32 stereo_music_bandwidth_thresholds[8] = { 132 12000, 1000, /* NB<->MB */ 133 18000, 2000, /* MB<->WB */ 134 21000, 2000, /* WB<->SWB */ 135 30000, 2000, /* SWB<->FB */ 136 }; 137 /* Threshold bit-rates for switching between mono and stereo */ 138 static const opus_int32 stereo_voice_threshold = 30000; 139 static const opus_int32 stereo_music_threshold = 30000; 140 141 /* Threshold bit-rate for switching between SILK/hybrid and CELT-only */ 142 static const opus_int32 mode_thresholds[2][2] = { 143 /* voice */ /* music */ 144 { 64000, 16000}, /* mono */ 145 { 36000, 16000}, /* stereo */ 146 }; 147 148 int opus_encoder_get_size(int channels) 149 { 150 int silkEncSizeBytes, celtEncSizeBytes; 151 int ret; 152 if (channels<1 || channels > 2) 153 return 0; 154 ret = silk_Get_Encoder_Size( &silkEncSizeBytes ); 155 if (ret) 156 return 0; 157 silkEncSizeBytes = align(silkEncSizeBytes); 158 celtEncSizeBytes = celt_encoder_get_size(channels); 159 return align(sizeof(OpusEncoder))+silkEncSizeBytes+celtEncSizeBytes; 160 } 161 162 int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int application) 163 { 164 void *silk_enc; 165 CELTEncoder *celt_enc; 166 int err; 167 int ret, silkEncSizeBytes; 168 169 if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)|| 170 (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO 171 && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)) 172 return OPUS_BAD_ARG; 173 174 OPUS_CLEAR((char*)st, opus_encoder_get_size(channels)); 175 /* Create SILK encoder */ 176 ret = silk_Get_Encoder_Size( &silkEncSizeBytes ); 177 if (ret) 178 return OPUS_BAD_ARG; 179 silkEncSizeBytes = align(silkEncSizeBytes); 180 st->silk_enc_offset = align(sizeof(OpusEncoder)); 181 st->celt_enc_offset = st->silk_enc_offset+silkEncSizeBytes; 182 silk_enc = (char*)st+st->silk_enc_offset; 183 celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); 184 185 st->stream_channels = st->channels = channels; 186 187 st->Fs = Fs; 188 189 st->arch = opus_select_arch(); 190 191 ret = silk_InitEncoder( silk_enc, st->arch, &st->silk_mode ); 192 if(ret)return OPUS_INTERNAL_ERROR; 193 194 /* default SILK parameters */ 195 st->silk_mode.nChannelsAPI = channels; 196 st->silk_mode.nChannelsInternal = channels; 197 st->silk_mode.API_sampleRate = st->Fs; 198 st->silk_mode.maxInternalSampleRate = 16000; 199 st->silk_mode.minInternalSampleRate = 8000; 200 st->silk_mode.desiredInternalSampleRate = 16000; 201 st->silk_mode.payloadSize_ms = 20; 202 st->silk_mode.bitRate = 25000; 203 st->silk_mode.packetLossPercentage = 0; 204 st->silk_mode.complexity = 9; 205 st->silk_mode.useInBandFEC = 0; 206 st->silk_mode.useDTX = 0; 207 st->silk_mode.useCBR = 0; 208 st->silk_mode.reducedDependency = 0; 209 210 /* Create CELT encoder */ 211 /* Initialize CELT encoder */ 212 err = celt_encoder_init(celt_enc, Fs, channels, st->arch); 213 if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR; 214 215 celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0)); 216 celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity)); 217 218 st->use_vbr = 1; 219 /* Makes constrained VBR the default (safer for real-time use) */ 220 st->vbr_constraint = 1; 221 st->user_bitrate_bps = OPUS_AUTO; 222 st->bitrate_bps = 3000+Fs*channels; 223 st->application = application; 224 st->signal_type = OPUS_AUTO; 225 st->user_bandwidth = OPUS_AUTO; 226 st->max_bandwidth = OPUS_BANDWIDTH_FULLBAND; 227 st->force_channels = OPUS_AUTO; 228 st->user_forced_mode = OPUS_AUTO; 229 st->voice_ratio = -1; 230 st->encoder_buffer = st->Fs/100; 231 st->lsb_depth = 24; 232 st->variable_duration = OPUS_FRAMESIZE_ARG; 233 234 /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead 235 + 1.5 ms for SILK resamplers and stereo prediction) */ 236 st->delay_compensation = st->Fs/250; 237 238 st->hybrid_stereo_width_Q14 = 1 << 14; 239 st->prev_HB_gain = Q15ONE; 240 st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); 241 st->first = 1; 242 st->mode = MODE_HYBRID; 243 st->bandwidth = OPUS_BANDWIDTH_FULLBAND; 244 245 return OPUS_OK; 246 } 247 248 static unsigned char gen_toc(int mode, int framerate, int bandwidth, int channels) 249 { 250 int period; 251 unsigned char toc; 252 period = 0; 253 while (framerate < 400) 254 { 255 framerate <<= 1; 256 period++; 257 } 258 if (mode == MODE_SILK_ONLY) 259 { 260 toc = (bandwidth-OPUS_BANDWIDTH_NARROWBAND)<<5; 261 toc |= (period-2)<<3; 262 } else if (mode == MODE_CELT_ONLY) 263 { 264 int tmp = bandwidth-OPUS_BANDWIDTH_MEDIUMBAND; 265 if (tmp < 0) 266 tmp = 0; 267 toc = 0x80; 268 toc |= tmp << 5; 269 toc |= period<<3; 270 } else /* Hybrid */ 271 { 272 toc = 0x60; 273 toc |= (bandwidth-OPUS_BANDWIDTH_SUPERWIDEBAND)<<4; 274 toc |= (period-2)<<3; 275 } 276 toc |= (channels==2)<<2; 277 return toc; 278 } 279 280 #ifndef FIXED_POINT 281 static void silk_biquad_float( 282 const opus_val16 *in, /* I: Input signal */ 283 const opus_int32 *B_Q28, /* I: MA coefficients [3] */ 284 const opus_int32 *A_Q28, /* I: AR coefficients [2] */ 285 opus_val32 *S, /* I/O: State vector [2] */ 286 opus_val16 *out, /* O: Output signal */ 287 const opus_int32 len, /* I: Signal length (must be even) */ 288 int stride 289 ) 290 { 291 /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */ 292 opus_int k; 293 opus_val32 vout; 294 opus_val32 inval; 295 opus_val32 A[2], B[3]; 296 297 A[0] = (opus_val32)(A_Q28[0] * (1.f/((opus_int32)1<<28))); 298 A[1] = (opus_val32)(A_Q28[1] * (1.f/((opus_int32)1<<28))); 299 B[0] = (opus_val32)(B_Q28[0] * (1.f/((opus_int32)1<<28))); 300 B[1] = (opus_val32)(B_Q28[1] * (1.f/((opus_int32)1<<28))); 301 B[2] = (opus_val32)(B_Q28[2] * (1.f/((opus_int32)1<<28))); 302 303 /* Negate A_Q28 values and split in two parts */ 304 305 for( k = 0; k < len; k++ ) { 306 /* S[ 0 ], S[ 1 ]: Q12 */ 307 inval = in[ k*stride ]; 308 vout = S[ 0 ] + B[0]*inval; 309 310 S[ 0 ] = S[1] - vout*A[0] + B[1]*inval; 311 312 S[ 1 ] = - vout*A[1] + B[2]*inval + VERY_SMALL; 313 314 /* Scale back to Q0 and saturate */ 315 out[ k*stride ] = vout; 316 } 317 } 318 #endif 319 320 static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) 321 { 322 opus_int32 B_Q28[ 3 ], A_Q28[ 2 ]; 323 opus_int32 Fc_Q19, r_Q28, r_Q22; 324 325 silk_assert( cutoff_Hz <= silk_int32_MAX / SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ) ); 326 Fc_Q19 = silk_DIV32_16( silk_SMULBB( SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ), cutoff_Hz ), Fs/1000 ); 327 silk_assert( Fc_Q19 > 0 && Fc_Q19 < 32768 ); 328 329 r_Q28 = SILK_FIX_CONST( 1.0, 28 ) - silk_MUL( SILK_FIX_CONST( 0.92, 9 ), Fc_Q19 ); 330 331 /* b = r * [ 1; -2; 1 ]; */ 332 /* a = [ 1; -2 * r * ( 1 - 0.5 * Fc^2 ); r^2 ]; */ 333 B_Q28[ 0 ] = r_Q28; 334 B_Q28[ 1 ] = silk_LSHIFT( -r_Q28, 1 ); 335 B_Q28[ 2 ] = r_Q28; 336 337 /* -r * ( 2 - Fc * Fc ); */ 338 r_Q22 = silk_RSHIFT( r_Q28, 6 ); 339 A_Q28[ 0 ] = silk_SMULWW( r_Q22, silk_SMULWW( Fc_Q19, Fc_Q19 ) - SILK_FIX_CONST( 2.0, 22 ) ); 340 A_Q28[ 1 ] = silk_SMULWW( r_Q22, r_Q22 ); 341 342 #ifdef FIXED_POINT 343 silk_biquad_alt( in, B_Q28, A_Q28, hp_mem, out, len, channels ); 344 if( channels == 2 ) { 345 silk_biquad_alt( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels ); 346 } 347 #else 348 silk_biquad_float( in, B_Q28, A_Q28, hp_mem, out, len, channels ); 349 if( channels == 2 ) { 350 silk_biquad_float( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels ); 351 } 352 #endif 353 } 354 355 #ifdef FIXED_POINT 356 static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) 357 { 358 int c, i; 359 int shift; 360 361 /* Approximates -round(log2(4.*cutoff_Hz/Fs)) */ 362 shift=celt_ilog2(Fs/(cutoff_Hz*3)); 363 for (c=0;c<channels;c++) 364 { 365 for (i=0;i<len;i++) 366 { 367 opus_val32 x, tmp, y; 368 x = SHL32(EXTEND32(in[channels*i+c]), 15); 369 /* First stage */ 370 tmp = x-hp_mem[2*c]; 371 hp_mem[2*c] = hp_mem[2*c] + PSHR32(x - hp_mem[2*c], shift); 372 /* Second stage */ 373 y = tmp - hp_mem[2*c+1]; 374 hp_mem[2*c+1] = hp_mem[2*c+1] + PSHR32(tmp - hp_mem[2*c+1], shift); 375 out[channels*i+c] = EXTRACT16(SATURATE(PSHR32(y, 15), 32767)); 376 } 377 } 378 } 379 380 #else 381 static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) 382 { 383 int c, i; 384 float coef; 385 386 coef = 4.0f*cutoff_Hz/Fs; 387 for (c=0;c<channels;c++) 388 { 389 for (i=0;i<len;i++) 390 { 391 opus_val32 x, tmp, y; 392 x = in[channels*i+c]; 393 /* First stage */ 394 tmp = x-hp_mem[2*c]; 395 hp_mem[2*c] = hp_mem[2*c] + coef*(x - hp_mem[2*c]) + VERY_SMALL; 396 /* Second stage */ 397 y = tmp - hp_mem[2*c+1]; 398 hp_mem[2*c+1] = hp_mem[2*c+1] + coef*(tmp - hp_mem[2*c+1]) + VERY_SMALL; 399 out[channels*i+c] = y; 400 } 401 } 402 } 403 #endif 404 405 static void stereo_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2, 406 int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs) 407 { 408 int i; 409 int overlap; 410 int inc; 411 inc = 48000/Fs; 412 overlap=overlap48/inc; 413 g1 = Q15ONE-g1; 414 g2 = Q15ONE-g2; 415 for (i=0;i<overlap;i++) 416 { 417 opus_val32 diff; 418 opus_val16 g, w; 419 w = MULT16_16_Q15(window[i*inc], window[i*inc]); 420 g = SHR32(MAC16_16(MULT16_16(w,g2), 421 Q15ONE-w, g1), 15); 422 diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1])); 423 diff = MULT16_16_Q15(g, diff); 424 out[i*channels] = out[i*channels] - diff; 425 out[i*channels+1] = out[i*channels+1] + diff; 426 } 427 for (;i<frame_size;i++) 428 { 429 opus_val32 diff; 430 diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1])); 431 diff = MULT16_16_Q15(g2, diff); 432 out[i*channels] = out[i*channels] - diff; 433 out[i*channels+1] = out[i*channels+1] + diff; 434 } 435 } 436 437 static void gain_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2, 438 int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs) 439 { 440 int i; 441 int inc; 442 int overlap; 443 int c; 444 inc = 48000/Fs; 445 overlap=overlap48/inc; 446 if (channels==1) 447 { 448 for (i=0;i<overlap;i++) 449 { 450 opus_val16 g, w; 451 w = MULT16_16_Q15(window[i*inc], window[i*inc]); 452 g = SHR32(MAC16_16(MULT16_16(w,g2), 453 Q15ONE-w, g1), 15); 454 out[i] = MULT16_16_Q15(g, in[i]); 455 } 456 } else { 457 for (i=0;i<overlap;i++) 458 { 459 opus_val16 g, w; 460 w = MULT16_16_Q15(window[i*inc], window[i*inc]); 461 g = SHR32(MAC16_16(MULT16_16(w,g2), 462 Q15ONE-w, g1), 15); 463 out[i*2] = MULT16_16_Q15(g, in[i*2]); 464 out[i*2+1] = MULT16_16_Q15(g, in[i*2+1]); 465 } 466 } 467 c=0;do { 468 for (i=overlap;i<frame_size;i++) 469 { 470 out[i*channels+c] = MULT16_16_Q15(g2, in[i*channels+c]); 471 } 472 } 473 while (++c<channels); 474 } 475 476 OpusEncoder *opus_encoder_create(opus_int32 Fs, int channels, int application, int *error) 477 { 478 int ret; 479 OpusEncoder *st; 480 if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)|| 481 (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO 482 && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)) 483 { 484 if (error) 485 *error = OPUS_BAD_ARG; 486 return NULL; 487 } 488 st = (OpusEncoder *)opus_alloc(opus_encoder_get_size(channels)); 489 if (st == NULL) 490 { 491 if (error) 492 *error = OPUS_ALLOC_FAIL; 493 return NULL; 494 } 495 ret = opus_encoder_init(st, Fs, channels, application); 496 if (error) 497 *error = ret; 498 if (ret != OPUS_OK) 499 { 500 opus_free(st); 501 st = NULL; 502 } 503 return st; 504 } 505 506 static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int max_data_bytes) 507 { 508 if(!frame_size)frame_size=st->Fs/400; 509 if (st->user_bitrate_bps==OPUS_AUTO) 510 return 60*st->Fs/frame_size + st->Fs*st->channels; 511 else if (st->user_bitrate_bps==OPUS_BITRATE_MAX) 512 return max_data_bytes*8*st->Fs/frame_size; 513 else 514 return st->user_bitrate_bps; 515 } 516 517 #ifndef DISABLE_FLOAT_API 518 /* Don't use more than 60 ms for the frame size analysis */ 519 #define MAX_DYNAMIC_FRAMESIZE 24 520 /* Estimates how much the bitrate will be boosted based on the sub-frame energy */ 521 static float transient_boost(const float *E, const float *E_1, int LM, int maxM) 522 { 523 int i; 524 int M; 525 float sumE=0, sumE_1=0; 526 float metric; 527 528 M = IMIN(maxM, (1<<LM)+1); 529 for (i=0;i<M;i++) 530 { 531 sumE += E[i]; 532 sumE_1 += E_1[i]; 533 } 534 metric = sumE*sumE_1/(M*M); 535 /*if (LM==3) 536 printf("%f\n", metric);*/ 537 /*return metric>10 ? 1 : 0;*/ 538 /*return MAX16(0,1-exp(-.25*(metric-2.)));*/ 539 return MIN16(1,(float)sqrt(MAX16(0,.05f*(metric-2)))); 540 } 541 542 /* Viterbi decoding trying to find the best frame size combination using look-ahead 543 544 State numbering: 545 0: unused 546 1: 2.5 ms 547 2: 5 ms (#1) 548 3: 5 ms (#2) 549 4: 10 ms (#1) 550 5: 10 ms (#2) 551 6: 10 ms (#3) 552 7: 10 ms (#4) 553 8: 20 ms (#1) 554 9: 20 ms (#2) 555 10: 20 ms (#3) 556 11: 20 ms (#4) 557 12: 20 ms (#5) 558 13: 20 ms (#6) 559 14: 20 ms (#7) 560 15: 20 ms (#8) 561 */ 562 static int transient_viterbi(const float *E, const float *E_1, int N, int frame_cost, int rate) 563 { 564 int i; 565 float cost[MAX_DYNAMIC_FRAMESIZE][16]; 566 int states[MAX_DYNAMIC_FRAMESIZE][16]; 567 float best_cost; 568 int best_state; 569 float factor; 570 /* Take into account that we damp VBR in the 32 kb/s to 64 kb/s range. */ 571 if (rate<80) 572 factor=0; 573 else if (rate>160) 574 factor=1; 575 else 576 factor = (rate-80.f)/80.f; 577 /* Makes variable framesize less aggressive at lower bitrates, but I can't 578 find any valid theoretical justification for this (other than it seems 579 to help) */ 580 for (i=0;i<16;i++) 581 { 582 /* Impossible state */ 583 states[0][i] = -1; 584 cost[0][i] = 1e10; 585 } 586 for (i=0;i<4;i++) 587 { 588 cost[0][1<<i] = (frame_cost + rate*(1<<i))*(1+factor*transient_boost(E, E_1, i, N+1)); 589 states[0][1<<i] = i; 590 } 591 for (i=1;i<N;i++) 592 { 593 int j; 594 595 /* Follow continuations */ 596 for (j=2;j<16;j++) 597 { 598 cost[i][j] = cost[i-1][j-1]; 599 states[i][j] = j-1; 600 } 601 602 /* New frames */ 603 for(j=0;j<4;j++) 604 { 605 int k; 606 float min_cost; 607 float curr_cost; 608 states[i][1<<j] = 1; 609 min_cost = cost[i-1][1]; 610 for(k=1;k<4;k++) 611 { 612 float tmp = cost[i-1][(1<<(k+1))-1]; 613 if (tmp < min_cost) 614 { 615 states[i][1<<j] = (1<<(k+1))-1; 616 min_cost = tmp; 617 } 618 } 619 curr_cost = (frame_cost + rate*(1<<j))*(1+factor*transient_boost(E+i, E_1+i, j, N-i+1)); 620 cost[i][1<<j] = min_cost; 621 /* If part of the frame is outside the analysis window, only count part of the cost */ 622 if (N-i < (1<<j)) 623 cost[i][1<<j] += curr_cost*(float)(N-i)/(1<<j); 624 else 625 cost[i][1<<j] += curr_cost; 626 } 627 } 628 629 best_state=1; 630 best_cost = cost[N-1][1]; 631 /* Find best end state (doesn't force a frame to end at N-1) */ 632 for (i=2;i<16;i++) 633 { 634 if (cost[N-1][i]<best_cost) 635 { 636 best_cost = cost[N-1][i]; 637 best_state = i; 638 } 639 } 640 641 /* Follow transitions back */ 642 for (i=N-1;i>=0;i--) 643 { 644 /*printf("%d ", best_state);*/ 645 best_state = states[i][best_state]; 646 } 647 /*printf("%d\n", best_state);*/ 648 return best_state; 649 } 650 651 int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, 652 int bitrate, opus_val16 tonality, float *mem, int buffering, 653 downmix_func downmix) 654 { 655 int N; 656 int i; 657 float e[MAX_DYNAMIC_FRAMESIZE+4]; 658 float e_1[MAX_DYNAMIC_FRAMESIZE+3]; 659 opus_val32 memx; 660 int bestLM=0; 661 int subframe; 662 int pos; 663 VARDECL(opus_val32, sub); 664 665 subframe = Fs/400; 666 ALLOC(sub, subframe, opus_val32); 667 e[0]=mem[0]; 668 e_1[0]=1.f/(EPSILON+mem[0]); 669 if (buffering) 670 { 671 /* Consider the CELT delay when not in restricted-lowdelay */ 672 /* We assume the buffering is between 2.5 and 5 ms */ 673 int offset = 2*subframe - buffering; 674 celt_assert(offset>=0 && offset <= subframe); 675 x += C*offset; 676 len -= offset; 677 e[1]=mem[1]; 678 e_1[1]=1.f/(EPSILON+mem[1]); 679 e[2]=mem[2]; 680 e_1[2]=1.f/(EPSILON+mem[2]); 681 pos = 3; 682 } else { 683 pos=1; 684 } 685 N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE); 686 /* Just silencing a warning, it's really initialized later */ 687 memx = 0; 688 for (i=0;i<N;i++) 689 { 690 float tmp; 691 opus_val32 tmpx; 692 int j; 693 tmp=EPSILON; 694 695 downmix(x, sub, subframe, i*subframe, 0, -2, C); 696 if (i==0) 697 memx = sub[0]; 698 for (j=0;j<subframe;j++) 699 { 700 tmpx = sub[j]; 701 tmp += (tmpx-memx)*(float)(tmpx-memx); 702 memx = tmpx; 703 } 704 e[i+pos] = tmp; 705 e_1[i+pos] = 1.f/tmp; 706 } 707 /* Hack to get 20 ms working with APPLICATION_AUDIO 708 The real problem is that the corresponding memory needs to use 1.5 ms 709 from this frame and 1 ms from the next frame */ 710 e[i+pos] = e[i+pos-1]; 711 if (buffering) 712 N=IMIN(MAX_DYNAMIC_FRAMESIZE, N+2); 713 bestLM = transient_viterbi(e, e_1, N, (int)((1.f+.5f*tonality)*(60*C+40)), bitrate/400); 714 mem[0] = e[1<<bestLM]; 715 if (buffering) 716 { 717 mem[1] = e[(1<<bestLM)+1]; 718 mem[2] = e[(1<<bestLM)+2]; 719 } 720 return bestLM; 721 } 722 723 #endif 724 725 #ifndef DISABLE_FLOAT_API 726 #ifdef FIXED_POINT 727 #define PCM2VAL(x) FLOAT2INT16(x) 728 #else 729 #define PCM2VAL(x) SCALEIN(x) 730 #endif 731 void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) 732 { 733 const float *x; 734 opus_val32 scale; 735 int j; 736 x = (const float *)_x; 737 for (j=0;j<subframe;j++) 738 sub[j] = PCM2VAL(x[(j+offset)*C+c1]); 739 if (c2>-1) 740 { 741 for (j=0;j<subframe;j++) 742 sub[j] += PCM2VAL(x[(j+offset)*C+c2]); 743 } else if (c2==-2) 744 { 745 int c; 746 for (c=1;c<C;c++) 747 { 748 for (j=0;j<subframe;j++) 749 sub[j] += PCM2VAL(x[(j+offset)*C+c]); 750 } 751 } 752 #ifdef FIXED_POINT 753 scale = (1<<SIG_SHIFT); 754 #else 755 scale = 1.f; 756 #endif 757 if (C==-2) 758 scale /= C; 759 else 760 scale /= 2; 761 for (j=0;j<subframe;j++) 762 sub[j] *= scale; 763 } 764 #endif 765 766 void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) 767 { 768 const opus_int16 *x; 769 opus_val32 scale; 770 int j; 771 x = (const opus_int16 *)_x; 772 for (j=0;j<subframe;j++) 773 sub[j] = x[(j+offset)*C+c1]; 774 if (c2>-1) 775 { 776 for (j=0;j<subframe;j++) 777 sub[j] += x[(j+offset)*C+c2]; 778 } else if (c2==-2) 779 { 780 int c; 781 for (c=1;c<C;c++) 782 { 783 for (j=0;j<subframe;j++) 784 sub[j] += x[(j+offset)*C+c]; 785 } 786 } 787 #ifdef FIXED_POINT 788 scale = (1<<SIG_SHIFT); 789 #else 790 scale = 1.f/32768; 791 #endif 792 if (C==-2) 793 scale /= C; 794 else 795 scale /= 2; 796 for (j=0;j<subframe;j++) 797 sub[j] *= scale; 798 } 799 800 opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs) 801 { 802 int new_size; 803 if (frame_size<Fs/400) 804 return -1; 805 if (variable_duration == OPUS_FRAMESIZE_ARG) 806 new_size = frame_size; 807 else if (variable_duration == OPUS_FRAMESIZE_VARIABLE) 808 new_size = Fs/50; 809 else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_60_MS) 810 new_size = IMIN(3*Fs/50, (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS)); 811 else 812 return -1; 813 if (new_size>frame_size) 814 return -1; 815 if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs && 816 50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs) 817 return -1; 818 return new_size; 819 } 820 821 opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, 822 int variable_duration, int C, opus_int32 Fs, int bitrate_bps, 823 int delay_compensation, downmix_func downmix 824 #ifndef DISABLE_FLOAT_API 825 , float *subframe_mem 826 #endif 827 ) 828 { 829 #ifndef DISABLE_FLOAT_API 830 if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200) 831 { 832 int LM = 3; 833 LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps, 834 0, subframe_mem, delay_compensation, downmix); 835 while ((Fs/400<<LM)>frame_size) 836 LM--; 837 frame_size = (Fs/400<<LM); 838 } else 839 #endif 840 { 841 frame_size = frame_size_select(frame_size, variable_duration, Fs); 842 } 843 if (frame_size<0) 844 return -1; 845 return frame_size; 846 } 847 848 opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem) 849 { 850 opus_val16 corr; 851 opus_val16 ldiff; 852 opus_val16 width; 853 opus_val32 xx, xy, yy; 854 opus_val16 sqrt_xx, sqrt_yy; 855 opus_val16 qrrt_xx, qrrt_yy; 856 int frame_rate; 857 int i; 858 opus_val16 short_alpha; 859 860 frame_rate = Fs/frame_size; 861 short_alpha = Q15ONE - 25*Q15ONE/IMAX(50,frame_rate); 862 xx=xy=yy=0; 863 for (i=0;i<frame_size;i+=4) 864 { 865 opus_val32 pxx=0; 866 opus_val32 pxy=0; 867 opus_val32 pyy=0; 868 opus_val16 x, y; 869 x = pcm[2*i]; 870 y = pcm[2*i+1]; 871 pxx = SHR32(MULT16_16(x,x),2); 872 pxy = SHR32(MULT16_16(x,y),2); 873 pyy = SHR32(MULT16_16(y,y),2); 874 x = pcm[2*i+2]; 875 y = pcm[2*i+3]; 876 pxx += SHR32(MULT16_16(x,x),2); 877 pxy += SHR32(MULT16_16(x,y),2); 878 pyy += SHR32(MULT16_16(y,y),2); 879 x = pcm[2*i+4]; 880 y = pcm[2*i+5]; 881 pxx += SHR32(MULT16_16(x,x),2); 882 pxy += SHR32(MULT16_16(x,y),2); 883 pyy += SHR32(MULT16_16(y,y),2); 884 x = pcm[2*i+6]; 885 y = pcm[2*i+7]; 886 pxx += SHR32(MULT16_16(x,x),2); 887 pxy += SHR32(MULT16_16(x,y),2); 888 pyy += SHR32(MULT16_16(y,y),2); 889 890 xx += SHR32(pxx, 10); 891 xy += SHR32(pxy, 10); 892 yy += SHR32(pyy, 10); 893 } 894 mem->XX += MULT16_32_Q15(short_alpha, xx-mem->XX); 895 mem->XY += MULT16_32_Q15(short_alpha, xy-mem->XY); 896 mem->YY += MULT16_32_Q15(short_alpha, yy-mem->YY); 897 mem->XX = MAX32(0, mem->XX); 898 mem->XY = MAX32(0, mem->XY); 899 mem->YY = MAX32(0, mem->YY); 900 if (MAX32(mem->XX, mem->YY)>QCONST16(8e-4f, 18)) 901 { 902 sqrt_xx = celt_sqrt(mem->XX); 903 sqrt_yy = celt_sqrt(mem->YY); 904 qrrt_xx = celt_sqrt(sqrt_xx); 905 qrrt_yy = celt_sqrt(sqrt_yy); 906 /* Inter-channel correlation */ 907 mem->XY = MIN32(mem->XY, sqrt_xx*sqrt_yy); 908 corr = SHR32(frac_div32(mem->XY,EPSILON+MULT16_16(sqrt_xx,sqrt_yy)),16); 909 /* Approximate loudness difference */ 910 ldiff = Q15ONE*ABS16(qrrt_xx-qrrt_yy)/(EPSILON+qrrt_xx+qrrt_yy); 911 width = MULT16_16_Q15(celt_sqrt(QCONST32(1.f,30)-MULT16_16(corr,corr)), ldiff); 912 /* Smoothing over one second */ 913 mem->smoothed_width += (width-mem->smoothed_width)/frame_rate; 914 /* Peak follower */ 915 mem->max_follower = MAX16(mem->max_follower-QCONST16(.02f,15)/frame_rate, mem->smoothed_width); 916 } else { 917 width = 0; 918 corr=Q15ONE; 919 ldiff=0; 920 } 921 /*printf("%f %f %f %f %f ", corr/(float)Q15ONE, ldiff/(float)Q15ONE, width/(float)Q15ONE, mem->smoothed_width/(float)Q15ONE, mem->max_follower/(float)Q15ONE);*/ 922 return EXTRACT16(MIN32(Q15ONE,20*mem->max_follower)); 923 } 924 925 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, 926 unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, 927 const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix) 928 { 929 void *silk_enc; 930 CELTEncoder *celt_enc; 931 int i; 932 int ret=0; 933 opus_int32 nBytes; 934 ec_enc enc; 935 int bytes_target; 936 int prefill=0; 937 int start_band = 0; 938 int redundancy = 0; 939 int redundancy_bytes = 0; /* Number of bytes to use for redundancy frame */ 940 int celt_to_silk = 0; 941 VARDECL(opus_val16, pcm_buf); 942 int nb_compr_bytes; 943 int to_celt = 0; 944 opus_uint32 redundant_rng = 0; 945 int cutoff_Hz, hp_freq_smth1; 946 int voice_est; /* Probability of voice in Q7 */ 947 opus_int32 equiv_rate; 948 int delay_compensation; 949 int frame_rate; 950 opus_int32 max_rate; /* Max bitrate we're allowed to use */ 951 int curr_bandwidth; 952 opus_val16 HB_gain; 953 opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */ 954 int total_buffer; 955 opus_val16 stereo_width; 956 const CELTMode *celt_mode; 957 AnalysisInfo analysis_info; 958 int analysis_read_pos_bak=-1; 959 int analysis_read_subframe_bak=-1; 960 VARDECL(opus_val16, tmp_prefill); 961 962 ALLOC_STACK; 963 964 max_data_bytes = IMIN(1276, out_data_bytes); 965 966 st->rangeFinal = 0; 967 if ((!st->variable_duration && 400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs && 968 50*frame_size != st->Fs && 25*frame_size != st->Fs && 50*frame_size != 3*st->Fs) 969 || (400*frame_size < st->Fs) 970 || max_data_bytes<=0 971 ) 972 { 973 RESTORE_STACK; 974 return OPUS_BAD_ARG; 975 } 976 silk_enc = (char*)st+st->silk_enc_offset; 977 celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); 978 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) 979 delay_compensation = 0; 980 else 981 delay_compensation = st->delay_compensation; 982 983 lsb_depth = IMIN(lsb_depth, st->lsb_depth); 984 985 analysis_info.valid = 0; 986 celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); 987 #ifndef DISABLE_FLOAT_API 988 #ifdef FIXED_POINT 989 if (st->silk_mode.complexity >= 10 && st->Fs==48000) 990 #else 991 if (st->silk_mode.complexity >= 7 && st->Fs==48000) 992 #endif 993 { 994 analysis_read_pos_bak = st->analysis.read_pos; 995 analysis_read_subframe_bak = st->analysis.read_subframe; 996 run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, 997 c1, c2, analysis_channels, st->Fs, 998 lsb_depth, downmix, &analysis_info); 999 } 1000 #endif 1001 1002 st->voice_ratio = -1; 1003 1004 #ifndef DISABLE_FLOAT_API 1005 st->detected_bandwidth = 0; 1006 if (analysis_info.valid) 1007 { 1008 int analysis_bandwidth; 1009 if (st->signal_type == OPUS_AUTO) 1010 st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob)); 1011 1012 analysis_bandwidth = analysis_info.bandwidth; 1013 if (analysis_bandwidth<=12) 1014 st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND; 1015 else if (analysis_bandwidth<=14) 1016 st->detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; 1017 else if (analysis_bandwidth<=16) 1018 st->detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1019 else if (analysis_bandwidth<=18) 1020 st->detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; 1021 else 1022 st->detected_bandwidth = OPUS_BANDWIDTH_FULLBAND; 1023 } 1024 #endif 1025 1026 if (st->channels==2 && st->force_channels!=1) 1027 stereo_width = compute_stereo_width(pcm, frame_size, st->Fs, &st->width_mem); 1028 else 1029 stereo_width = 0; 1030 total_buffer = delay_compensation; 1031 st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes); 1032 1033 frame_rate = st->Fs/frame_size; 1034 if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8 1035 || (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400))) 1036 { 1037 /*If the space is too low to do something useful, emit 'PLC' frames.*/ 1038 int tocmode = st->mode; 1039 int bw = st->bandwidth == 0 ? OPUS_BANDWIDTH_NARROWBAND : st->bandwidth; 1040 if (tocmode==0) 1041 tocmode = MODE_SILK_ONLY; 1042 if (frame_rate>100) 1043 tocmode = MODE_CELT_ONLY; 1044 if (frame_rate < 50) 1045 tocmode = MODE_SILK_ONLY; 1046 if(tocmode==MODE_SILK_ONLY&&bw>OPUS_BANDWIDTH_WIDEBAND) 1047 bw=OPUS_BANDWIDTH_WIDEBAND; 1048 else if (tocmode==MODE_CELT_ONLY&&bw==OPUS_BANDWIDTH_MEDIUMBAND) 1049 bw=OPUS_BANDWIDTH_NARROWBAND; 1050 else if (bw<=OPUS_BANDWIDTH_SUPERWIDEBAND) 1051 bw=OPUS_BANDWIDTH_SUPERWIDEBAND; 1052 data[0] = gen_toc(tocmode, frame_rate, bw, st->stream_channels); 1053 RESTORE_STACK; 1054 return 1; 1055 } 1056 if (!st->use_vbr) 1057 { 1058 int cbrBytes; 1059 cbrBytes = IMIN( (st->bitrate_bps + 4*frame_rate)/(8*frame_rate) , max_data_bytes); 1060 st->bitrate_bps = cbrBytes * (8*frame_rate); 1061 max_data_bytes = cbrBytes; 1062 } 1063 max_rate = frame_rate*max_data_bytes*8; 1064 1065 /* Equivalent 20-ms rate for mode/channel/bandwidth decisions */ 1066 equiv_rate = st->bitrate_bps - (40*st->channels+20)*(st->Fs/frame_size - 50); 1067 1068 if (st->signal_type == OPUS_SIGNAL_VOICE) 1069 voice_est = 127; 1070 else if (st->signal_type == OPUS_SIGNAL_MUSIC) 1071 voice_est = 0; 1072 else if (st->voice_ratio >= 0) 1073 { 1074 voice_est = st->voice_ratio*327>>8; 1075 /* For AUDIO, never be more than 90% confident of having speech */ 1076 if (st->application == OPUS_APPLICATION_AUDIO) 1077 voice_est = IMIN(voice_est, 115); 1078 } else if (st->application == OPUS_APPLICATION_VOIP) 1079 voice_est = 115; 1080 else 1081 voice_est = 48; 1082 1083 if (st->force_channels!=OPUS_AUTO && st->channels == 2) 1084 { 1085 st->stream_channels = st->force_channels; 1086 } else { 1087 #ifdef FUZZING 1088 /* Random mono/stereo decision */ 1089 if (st->channels == 2 && (rand()&0x1F)==0) 1090 st->stream_channels = 3-st->stream_channels; 1091 #else 1092 /* Rate-dependent mono-stereo decision */ 1093 if (st->channels == 2) 1094 { 1095 opus_int32 stereo_threshold; 1096 stereo_threshold = stereo_music_threshold + ((voice_est*voice_est*(stereo_voice_threshold-stereo_music_threshold))>>14); 1097 if (st->stream_channels == 2) 1098 stereo_threshold -= 1000; 1099 else 1100 stereo_threshold += 1000; 1101 st->stream_channels = (equiv_rate > stereo_threshold) ? 2 : 1; 1102 } else { 1103 st->stream_channels = st->channels; 1104 } 1105 #endif 1106 } 1107 equiv_rate = st->bitrate_bps - (40*st->stream_channels+20)*(st->Fs/frame_size - 50); 1108 1109 /* Mode selection depending on application and signal type */ 1110 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) 1111 { 1112 st->mode = MODE_CELT_ONLY; 1113 } else if (st->user_forced_mode == OPUS_AUTO) 1114 { 1115 #ifdef FUZZING 1116 /* Random mode switching */ 1117 if ((rand()&0xF)==0) 1118 { 1119 if ((rand()&0x1)==0) 1120 st->mode = MODE_CELT_ONLY; 1121 else 1122 st->mode = MODE_SILK_ONLY; 1123 } else { 1124 if (st->prev_mode==MODE_CELT_ONLY) 1125 st->mode = MODE_CELT_ONLY; 1126 else 1127 st->mode = MODE_SILK_ONLY; 1128 } 1129 #else 1130 opus_int32 mode_voice, mode_music; 1131 opus_int32 threshold; 1132 1133 /* Interpolate based on stereo width */ 1134 mode_voice = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[0][0]) 1135 + MULT16_32_Q15(stereo_width,mode_thresholds[1][0])); 1136 mode_music = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[1][1]) 1137 + MULT16_32_Q15(stereo_width,mode_thresholds[1][1])); 1138 /* Interpolate based on speech/music probability */ 1139 threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14); 1140 /* Bias towards SILK for VoIP because of some useful features */ 1141 if (st->application == OPUS_APPLICATION_VOIP) 1142 threshold += 8000; 1143 1144 /*printf("%f %d\n", stereo_width/(float)Q15ONE, threshold);*/ 1145 /* Hysteresis */ 1146 if (st->prev_mode == MODE_CELT_ONLY) 1147 threshold -= 4000; 1148 else if (st->prev_mode>0) 1149 threshold += 4000; 1150 1151 st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY; 1152 1153 /* When FEC is enabled and there's enough packet loss, use SILK */ 1154 if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4) 1155 st->mode = MODE_SILK_ONLY; 1156 /* When encoding voice and DTX is enabled, set the encoder to SILK mode (at least for now) */ 1157 if (st->silk_mode.useDTX && voice_est > 100) 1158 st->mode = MODE_SILK_ONLY; 1159 #endif 1160 } else { 1161 st->mode = st->user_forced_mode; 1162 } 1163 1164 /* Override the chosen mode to make sure we meet the requested frame size */ 1165 if (st->mode != MODE_CELT_ONLY && frame_size < st->Fs/100) 1166 st->mode = MODE_CELT_ONLY; 1167 if (st->lfe) 1168 st->mode = MODE_CELT_ONLY; 1169 /* If max_data_bytes represents less than 8 kb/s, switch to CELT-only mode */ 1170 if (max_data_bytes < (frame_rate > 50 ? 12000 : 8000)*frame_size / (st->Fs * 8)) 1171 st->mode = MODE_CELT_ONLY; 1172 1173 if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0 1174 && st->mode != MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY) 1175 { 1176 /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */ 1177 st->silk_mode.toMono = 1; 1178 st->stream_channels = 2; 1179 } else { 1180 st->silk_mode.toMono = 0; 1181 } 1182 1183 if (st->prev_mode > 0 && 1184 ((st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) || 1185 (st->mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY))) 1186 { 1187 redundancy = 1; 1188 celt_to_silk = (st->mode != MODE_CELT_ONLY); 1189 if (!celt_to_silk) 1190 { 1191 /* Switch to SILK/hybrid if frame size is 10 ms or more*/ 1192 if (frame_size >= st->Fs/100) 1193 { 1194 st->mode = st->prev_mode; 1195 to_celt = 1; 1196 } else { 1197 redundancy=0; 1198 } 1199 } 1200 } 1201 /* For the first frame at a new SILK bandwidth */ 1202 if (st->silk_bw_switch) 1203 { 1204 redundancy = 1; 1205 celt_to_silk = 1; 1206 st->silk_bw_switch = 0; 1207 prefill=1; 1208 } 1209 1210 if (redundancy) 1211 { 1212 /* Fair share of the max size allowed */ 1213 redundancy_bytes = IMIN(257, max_data_bytes*(opus_int32)(st->Fs/200)/(frame_size+st->Fs/200)); 1214 /* For VBR, target the actual bitrate (subject to the limit above) */ 1215 if (st->use_vbr) 1216 redundancy_bytes = IMIN(redundancy_bytes, st->bitrate_bps/1600); 1217 } 1218 1219 if (st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) 1220 { 1221 silk_EncControlStruct dummy; 1222 silk_InitEncoder( silk_enc, st->arch, &dummy); 1223 prefill=1; 1224 } 1225 1226 /* Automatic (rate-dependent) bandwidth selection */ 1227 if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch) 1228 { 1229 const opus_int32 *voice_bandwidth_thresholds, *music_bandwidth_thresholds; 1230 opus_int32 bandwidth_thresholds[8]; 1231 int bandwidth = OPUS_BANDWIDTH_FULLBAND; 1232 opus_int32 equiv_rate2; 1233 1234 equiv_rate2 = equiv_rate; 1235 if (st->mode != MODE_CELT_ONLY) 1236 { 1237 /* Adjust the threshold +/- 10% depending on complexity */ 1238 equiv_rate2 = equiv_rate2 * (45+st->silk_mode.complexity)/50; 1239 /* CBR is less efficient by ~1 kb/s */ 1240 if (!st->use_vbr) 1241 equiv_rate2 -= 1000; 1242 } 1243 if (st->channels==2 && st->force_channels!=1) 1244 { 1245 voice_bandwidth_thresholds = stereo_voice_bandwidth_thresholds; 1246 music_bandwidth_thresholds = stereo_music_bandwidth_thresholds; 1247 } else { 1248 voice_bandwidth_thresholds = mono_voice_bandwidth_thresholds; 1249 music_bandwidth_thresholds = mono_music_bandwidth_thresholds; 1250 } 1251 /* Interpolate bandwidth thresholds depending on voice estimation */ 1252 for (i=0;i<8;i++) 1253 { 1254 bandwidth_thresholds[i] = music_bandwidth_thresholds[i] 1255 + ((voice_est*voice_est*(voice_bandwidth_thresholds[i]-music_bandwidth_thresholds[i]))>>14); 1256 } 1257 do { 1258 int threshold, hysteresis; 1259 threshold = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)]; 1260 hysteresis = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)+1]; 1261 if (!st->first) 1262 { 1263 if (st->bandwidth >= bandwidth) 1264 threshold -= hysteresis; 1265 else 1266 threshold += hysteresis; 1267 } 1268 if (equiv_rate2 >= threshold) 1269 break; 1270 } while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND); 1271 st->bandwidth = bandwidth; 1272 /* Prevents any transition to SWB/FB until the SILK layer has fully 1273 switched to WB mode and turned the variable LP filter off */ 1274 if (!st->first && st->mode != MODE_CELT_ONLY && !st->silk_mode.inWBmodeWithoutVariableLP && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND) 1275 st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1276 } 1277 1278 if (st->bandwidth>st->max_bandwidth) 1279 st->bandwidth = st->max_bandwidth; 1280 1281 if (st->user_bandwidth != OPUS_AUTO) 1282 st->bandwidth = st->user_bandwidth; 1283 1284 /* This prevents us from using hybrid at unsafe CBR/max rates */ 1285 if (st->mode != MODE_CELT_ONLY && max_rate < 15000) 1286 { 1287 st->bandwidth = IMIN(st->bandwidth, OPUS_BANDWIDTH_WIDEBAND); 1288 } 1289 1290 /* Prevents Opus from wasting bits on frequencies that are above 1291 the Nyquist rate of the input signal */ 1292 if (st->Fs <= 24000 && st->bandwidth > OPUS_BANDWIDTH_SUPERWIDEBAND) 1293 st->bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; 1294 if (st->Fs <= 16000 && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND) 1295 st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1296 if (st->Fs <= 12000 && st->bandwidth > OPUS_BANDWIDTH_MEDIUMBAND) 1297 st->bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; 1298 if (st->Fs <= 8000 && st->bandwidth > OPUS_BANDWIDTH_NARROWBAND) 1299 st->bandwidth = OPUS_BANDWIDTH_NARROWBAND; 1300 #ifndef DISABLE_FLOAT_API 1301 /* Use detected bandwidth to reduce the encoded bandwidth. */ 1302 if (st->detected_bandwidth && st->user_bandwidth == OPUS_AUTO) 1303 { 1304 int min_detected_bandwidth; 1305 /* Makes bandwidth detection more conservative just in case the detector 1306 gets it wrong when we could have coded a high bandwidth transparently. 1307 When operating in SILK/hybrid mode, we don't go below wideband to avoid 1308 more complicated switches that require redundancy. */ 1309 if (equiv_rate <= 18000*st->stream_channels && st->mode == MODE_CELT_ONLY) 1310 min_detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND; 1311 else if (equiv_rate <= 24000*st->stream_channels && st->mode == MODE_CELT_ONLY) 1312 min_detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; 1313 else if (equiv_rate <= 30000*st->stream_channels) 1314 min_detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1315 else if (equiv_rate <= 44000*st->stream_channels) 1316 min_detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; 1317 else 1318 min_detected_bandwidth = OPUS_BANDWIDTH_FULLBAND; 1319 1320 st->detected_bandwidth = IMAX(st->detected_bandwidth, min_detected_bandwidth); 1321 st->bandwidth = IMIN(st->bandwidth, st->detected_bandwidth); 1322 } 1323 #endif 1324 celt_encoder_ctl(celt_enc, OPUS_SET_LSB_DEPTH(lsb_depth)); 1325 1326 /* CELT mode doesn't support mediumband, use wideband instead */ 1327 if (st->mode == MODE_CELT_ONLY && st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) 1328 st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1329 if (st->lfe) 1330 st->bandwidth = OPUS_BANDWIDTH_NARROWBAND; 1331 1332 /* Can't support higher than wideband for >20 ms frames */ 1333 if (frame_size > st->Fs/50 && (st->mode == MODE_CELT_ONLY || st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)) 1334 { 1335 VARDECL(unsigned char, tmp_data); 1336 int nb_frames; 1337 int bak_mode, bak_bandwidth, bak_channels, bak_to_mono; 1338 VARDECL(OpusRepacketizer, rp); 1339 opus_int32 bytes_per_frame; 1340 opus_int32 repacketize_len; 1341 1342 #ifndef DISABLE_FLOAT_API 1343 if (analysis_read_pos_bak!= -1) 1344 { 1345 st->analysis.read_pos = analysis_read_pos_bak; 1346 st->analysis.read_subframe = analysis_read_subframe_bak; 1347 } 1348 #endif 1349 1350 nb_frames = frame_size > st->Fs/25 ? 3 : 2; 1351 bytes_per_frame = IMIN(1276,(out_data_bytes-3)/nb_frames); 1352 1353 ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char); 1354 1355 ALLOC(rp, 1, OpusRepacketizer); 1356 opus_repacketizer_init(rp); 1357 1358 bak_mode = st->user_forced_mode; 1359 bak_bandwidth = st->user_bandwidth; 1360 bak_channels = st->force_channels; 1361 1362 st->user_forced_mode = st->mode; 1363 st->user_bandwidth = st->bandwidth; 1364 st->force_channels = st->stream_channels; 1365 bak_to_mono = st->silk_mode.toMono; 1366 1367 if (bak_to_mono) 1368 st->force_channels = 1; 1369 else 1370 st->prev_channels = st->stream_channels; 1371 for (i=0;i<nb_frames;i++) 1372 { 1373 int tmp_len; 1374 st->silk_mode.toMono = 0; 1375 /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */ 1376 if (to_celt && i==nb_frames-1) 1377 st->user_forced_mode = MODE_CELT_ONLY; 1378 tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, 1379 tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, 1380 NULL, 0, c1, c2, analysis_channels, downmix); 1381 if (tmp_len<0) 1382 { 1383 RESTORE_STACK; 1384 return OPUS_INTERNAL_ERROR; 1385 } 1386 ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len); 1387 if (ret<0) 1388 { 1389 RESTORE_STACK; 1390 return OPUS_INTERNAL_ERROR; 1391 } 1392 } 1393 if (st->use_vbr) 1394 repacketize_len = out_data_bytes; 1395 else 1396 repacketize_len = IMIN(3*st->bitrate_bps/(3*8*50/nb_frames), out_data_bytes); 1397 ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr); 1398 if (ret<0) 1399 { 1400 RESTORE_STACK; 1401 return OPUS_INTERNAL_ERROR; 1402 } 1403 st->user_forced_mode = bak_mode; 1404 st->user_bandwidth = bak_bandwidth; 1405 st->force_channels = bak_channels; 1406 st->silk_mode.toMono = bak_to_mono; 1407 RESTORE_STACK; 1408 return ret; 1409 } 1410 curr_bandwidth = st->bandwidth; 1411 1412 /* Chooses the appropriate mode for speech 1413 *NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */ 1414 if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND) 1415 st->mode = MODE_HYBRID; 1416 if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND) 1417 st->mode = MODE_SILK_ONLY; 1418 1419 /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, curr_bandwidth); */ 1420 bytes_target = IMIN(max_data_bytes-redundancy_bytes, st->bitrate_bps * frame_size / (st->Fs * 8)) - 1; 1421 1422 data += 1; 1423 1424 ec_enc_init(&enc, data, max_data_bytes-1); 1425 1426 ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16); 1427 for (i=0;i<total_buffer*st->channels;i++) 1428 pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels+i]; 1429 1430 if (st->mode == MODE_CELT_ONLY) 1431 hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); 1432 else 1433 hp_freq_smth1 = ((silk_encoder*)silk_enc)->state_Fxx[0].sCmn.variable_HP_smth1_Q15; 1434 1435 st->variable_HP_smth2_Q15 = silk_SMLAWB( st->variable_HP_smth2_Q15, 1436 hp_freq_smth1 - st->variable_HP_smth2_Q15, SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF2, 16 ) ); 1437 1438 /* convert from log scale to Hertz */ 1439 cutoff_Hz = silk_log2lin( silk_RSHIFT( st->variable_HP_smth2_Q15, 8 ) ); 1440 1441 if (st->application == OPUS_APPLICATION_VOIP) 1442 { 1443 hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); 1444 } else { 1445 dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); 1446 } 1447 1448 1449 1450 /* SILK processing */ 1451 HB_gain = Q15ONE; 1452 if (st->mode != MODE_CELT_ONLY) 1453 { 1454 opus_int32 total_bitRate, celt_rate; 1455 #ifdef FIXED_POINT 1456 const opus_int16 *pcm_silk; 1457 #else 1458 VARDECL(opus_int16, pcm_silk); 1459 ALLOC(pcm_silk, st->channels*frame_size, opus_int16); 1460 #endif 1461 1462 /* Distribute bits between SILK and CELT */ 1463 total_bitRate = 8 * bytes_target * frame_rate; 1464 if( st->mode == MODE_HYBRID ) { 1465 int HB_gain_ref; 1466 /* Base rate for SILK */ 1467 st->silk_mode.bitRate = st->stream_channels * ( 5000 + 1000 * ( st->Fs == 100 * frame_size ) ); 1468 if( curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND ) { 1469 /* SILK gets 2/3 of the remaining bits */ 1470 st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 2 / 3; 1471 } else { /* FULLBAND */ 1472 /* SILK gets 3/5 of the remaining bits */ 1473 st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 3 / 5; 1474 } 1475 /* Don't let SILK use more than 80% */ 1476 if( st->silk_mode.bitRate > total_bitRate * 4/5 ) { 1477 st->silk_mode.bitRate = total_bitRate * 4/5; 1478 } 1479 if (!st->energy_masking) 1480 { 1481 /* Increasingly attenuate high band when it gets allocated fewer bits */ 1482 celt_rate = total_bitRate - st->silk_mode.bitRate; 1483 HB_gain_ref = (curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND) ? 3000 : 3600; 1484 HB_gain = SHL32((opus_val32)celt_rate, 9) / SHR32((opus_val32)celt_rate + st->stream_channels * HB_gain_ref, 6); 1485 HB_gain = HB_gain < Q15ONE*6/7 ? HB_gain + Q15ONE/7 : Q15ONE; 1486 } 1487 } else { 1488 /* SILK gets all bits */ 1489 st->silk_mode.bitRate = total_bitRate; 1490 } 1491 1492 /* Surround masking for SILK */ 1493 if (st->energy_masking && st->use_vbr && !st->lfe) 1494 { 1495 opus_val32 mask_sum=0; 1496 opus_val16 masking_depth; 1497 opus_int32 rate_offset; 1498 int c; 1499 int end = 17; 1500 opus_int16 srate = 16000; 1501 if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND) 1502 { 1503 end = 13; 1504 srate = 8000; 1505 } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) 1506 { 1507 end = 15; 1508 srate = 12000; 1509 } 1510 for (c=0;c<st->channels;c++) 1511 { 1512 for(i=0;i<end;i++) 1513 { 1514 opus_val16 mask; 1515 mask = MAX16(MIN16(st->energy_masking[21*c+i], 1516 QCONST16(.5f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT)); 1517 if (mask > 0) 1518 mask = HALF16(mask); 1519 mask_sum += mask; 1520 } 1521 } 1522 /* Conservative rate reduction, we cut the masking in half */ 1523 masking_depth = mask_sum / end*st->channels; 1524 masking_depth += QCONST16(.2f, DB_SHIFT); 1525 rate_offset = (opus_int32)PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT); 1526 rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3); 1527 /* Split the rate change between the SILK and CELT part for hybrid. */ 1528 if (st->bandwidth==OPUS_BANDWIDTH_SUPERWIDEBAND || st->bandwidth==OPUS_BANDWIDTH_FULLBAND) 1529 st->silk_mode.bitRate += 3*rate_offset/5; 1530 else 1531 st->silk_mode.bitRate += rate_offset; 1532 bytes_target += rate_offset * frame_size / (8 * st->Fs); 1533 } 1534 1535 st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs; 1536 st->silk_mode.nChannelsAPI = st->channels; 1537 st->silk_mode.nChannelsInternal = st->stream_channels; 1538 if (curr_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { 1539 st->silk_mode.desiredInternalSampleRate = 8000; 1540 } else if (curr_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { 1541 st->silk_mode.desiredInternalSampleRate = 12000; 1542 } else { 1543 silk_assert( st->mode == MODE_HYBRID || curr_bandwidth == OPUS_BANDWIDTH_WIDEBAND ); 1544 st->silk_mode.desiredInternalSampleRate = 16000; 1545 } 1546 if( st->mode == MODE_HYBRID ) { 1547 /* Don't allow bandwidth reduction at lowest bitrates in hybrid mode */ 1548 st->silk_mode.minInternalSampleRate = 16000; 1549 } else { 1550 st->silk_mode.minInternalSampleRate = 8000; 1551 } 1552 1553 if (st->mode == MODE_SILK_ONLY) 1554 { 1555 opus_int32 effective_max_rate = max_rate; 1556 st->silk_mode.maxInternalSampleRate = 16000; 1557 if (frame_rate > 50) 1558 effective_max_rate = effective_max_rate*2/3; 1559 if (effective_max_rate < 13000) 1560 { 1561 st->silk_mode.maxInternalSampleRate = 12000; 1562 st->silk_mode.desiredInternalSampleRate = IMIN(12000, st->silk_mode.desiredInternalSampleRate); 1563 } 1564 if (effective_max_rate < 9600) 1565 { 1566 st->silk_mode.maxInternalSampleRate = 8000; 1567 st->silk_mode.desiredInternalSampleRate = IMIN(8000, st->silk_mode.desiredInternalSampleRate); 1568 } 1569 } else { 1570 st->silk_mode.maxInternalSampleRate = 16000; 1571 } 1572 1573 st->silk_mode.useCBR = !st->use_vbr; 1574 1575 /* Call SILK encoder for the low band */ 1576 nBytes = IMIN(1275, max_data_bytes-1-redundancy_bytes); 1577 1578 st->silk_mode.maxBits = nBytes*8; 1579 /* Only allow up to 90% of the bits for hybrid mode*/ 1580 if (st->mode == MODE_HYBRID) 1581 st->silk_mode.maxBits = (opus_int32)st->silk_mode.maxBits*9/10; 1582 if (st->silk_mode.useCBR) 1583 { 1584 st->silk_mode.maxBits = (st->silk_mode.bitRate * frame_size / (st->Fs * 8))*8; 1585 /* Reduce the initial target to make it easier to reach the CBR rate */ 1586 st->silk_mode.bitRate = IMAX(1, st->silk_mode.bitRate-2000); 1587 } 1588 1589 if (prefill) 1590 { 1591 opus_int32 zero=0; 1592 int prefill_offset; 1593 /* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode 1594 a discontinuity. The exact location is what we need to avoid leaving any "gap" 1595 in the audio when mixing with the redundant CELT frame. Here we can afford to 1596 overwrite st->delay_buffer because the only thing that uses it before it gets 1597 rewritten is tmp_prefill[] and even then only the part after the ramp really 1598 gets used (rather than sent to the encoder and discarded) */ 1599 prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400); 1600 gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset, 1601 0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs); 1602 for(i=0;i<prefill_offset;i++) 1603 st->delay_buffer[i]=0; 1604 #ifdef FIXED_POINT 1605 pcm_silk = st->delay_buffer; 1606 #else 1607 for (i=0;i<st->encoder_buffer*st->channels;i++) 1608 pcm_silk[i] = FLOAT2INT16(st->delay_buffer[i]); 1609 #endif 1610 silk_Encode( silk_enc, &st->silk_mode, pcm_silk, st->encoder_buffer, NULL, &zero, 1 ); 1611 } 1612 1613 #ifdef FIXED_POINT 1614 pcm_silk = pcm_buf+total_buffer*st->channels; 1615 #else 1616 for (i=0;i<frame_size*st->channels;i++) 1617 pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]); 1618 #endif 1619 ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0 ); 1620 if( ret ) { 1621 /*fprintf (stderr, "SILK encode error: %d\n", ret);*/ 1622 /* Handle error */ 1623 RESTORE_STACK; 1624 return OPUS_INTERNAL_ERROR; 1625 } 1626 if (nBytes==0) 1627 { 1628 st->rangeFinal = 0; 1629 data[-1] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); 1630 RESTORE_STACK; 1631 return 1; 1632 } 1633 /* Extract SILK internal bandwidth for signaling in first byte */ 1634 if( st->mode == MODE_SILK_ONLY ) { 1635 if( st->silk_mode.internalSampleRate == 8000 ) { 1636 curr_bandwidth = OPUS_BANDWIDTH_NARROWBAND; 1637 } else if( st->silk_mode.internalSampleRate == 12000 ) { 1638 curr_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; 1639 } else if( st->silk_mode.internalSampleRate == 16000 ) { 1640 curr_bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1641 } 1642 } else { 1643 silk_assert( st->silk_mode.internalSampleRate == 16000 ); 1644 } 1645 1646 st->silk_mode.opusCanSwitch = st->silk_mode.switchReady; 1647 /* FIXME: How do we allocate the redundancy for CBR? */ 1648 if (st->silk_mode.opusCanSwitch) 1649 { 1650 redundancy = 1; 1651 celt_to_silk = 0; 1652 st->silk_bw_switch = 1; 1653 } 1654 } 1655 1656 /* CELT processing */ 1657 { 1658 int endband=21; 1659 1660 switch(curr_bandwidth) 1661 { 1662 case OPUS_BANDWIDTH_NARROWBAND: 1663 endband = 13; 1664 break; 1665 case OPUS_BANDWIDTH_MEDIUMBAND: 1666 case OPUS_BANDWIDTH_WIDEBAND: 1667 endband = 17; 1668 break; 1669 case OPUS_BANDWIDTH_SUPERWIDEBAND: 1670 endband = 19; 1671 break; 1672 case OPUS_BANDWIDTH_FULLBAND: 1673 endband = 21; 1674 break; 1675 } 1676 celt_encoder_ctl(celt_enc, CELT_SET_END_BAND(endband)); 1677 celt_encoder_ctl(celt_enc, CELT_SET_CHANNELS(st->stream_channels)); 1678 } 1679 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX)); 1680 if (st->mode != MODE_SILK_ONLY) 1681 { 1682 opus_val32 celt_pred=2; 1683 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); 1684 /* We may still decide to disable prediction later */ 1685 if (st->silk_mode.reducedDependency) 1686 celt_pred = 0; 1687 celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(celt_pred)); 1688 1689 if (st->mode == MODE_HYBRID) 1690 { 1691 int len; 1692 1693 len = (ec_tell(&enc)+7)>>3; 1694 if (redundancy) 1695 len += st->mode == MODE_HYBRID ? 3 : 1; 1696 if( st->use_vbr ) { 1697 nb_compr_bytes = len + bytes_target - (st->silk_mode.bitRate * frame_size) / (8 * st->Fs); 1698 } else { 1699 /* check if SILK used up too much */ 1700 nb_compr_bytes = len > bytes_target ? len : bytes_target; 1701 } 1702 } else { 1703 if (st->use_vbr) 1704 { 1705 opus_int32 bonus=0; 1706 #ifndef DISABLE_FLOAT_API 1707 if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50) 1708 { 1709 bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50); 1710 if (analysis_info.valid) 1711 bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality)); 1712 } 1713 #endif 1714 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1)); 1715 celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint)); 1716 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps+bonus)); 1717 nb_compr_bytes = max_data_bytes-1-redundancy_bytes; 1718 } else { 1719 nb_compr_bytes = bytes_target; 1720 } 1721 } 1722 1723 } else { 1724 nb_compr_bytes = 0; 1725 } 1726 1727 ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16); 1728 if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0) 1729 { 1730 for (i=0;i<st->channels*st->Fs/400;i++) 1731 tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i]; 1732 } 1733 1734 for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++) 1735 st->delay_buffer[i] = st->delay_buffer[i+st->channels*frame_size]; 1736 for (;i<st->encoder_buffer*st->channels;i++) 1737 st->delay_buffer[i] = pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels+i]; 1738 1739 /* gain_fade() and stereo_fade() need to be after the buffer copying 1740 because we don't want any of this to affect the SILK part */ 1741 if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) { 1742 gain_fade(pcm_buf, pcm_buf, 1743 st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs); 1744 } 1745 st->prev_HB_gain = HB_gain; 1746 if (st->mode != MODE_HYBRID || st->stream_channels==1) 1747 st->silk_mode.stereoWidth_Q14 = IMIN((1<<14),2*IMAX(0,equiv_rate-30000)); 1748 if( !st->energy_masking && st->channels == 2 ) { 1749 /* Apply stereo width reduction (at low bitrates) */ 1750 if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) { 1751 opus_val16 g1, g2; 1752 g1 = st->hybrid_stereo_width_Q14; 1753 g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14); 1754 #ifdef FIXED_POINT 1755 g1 = g1==16384 ? Q15ONE : SHL16(g1,1); 1756 g2 = g2==16384 ? Q15ONE : SHL16(g2,1); 1757 #else 1758 g1 *= (1.f/16384); 1759 g2 *= (1.f/16384); 1760 #endif 1761 stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap, 1762 frame_size, st->channels, celt_mode->window, st->Fs); 1763 st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14; 1764 } 1765 } 1766 1767 if ( st->mode != MODE_CELT_ONLY && ec_tell(&enc)+17+20*(st->mode == MODE_HYBRID) <= 8*(max_data_bytes-1)) 1768 { 1769 /* For SILK mode, the redundancy is inferred from the length */ 1770 if (st->mode == MODE_HYBRID && (redundancy || ec_tell(&enc)+37 <= 8*nb_compr_bytes)) 1771 ec_enc_bit_logp(&enc, redundancy, 12); 1772 if (redundancy) 1773 { 1774 int max_redundancy; 1775 ec_enc_bit_logp(&enc, celt_to_silk, 1); 1776 if (st->mode == MODE_HYBRID) 1777 max_redundancy = (max_data_bytes-1)-nb_compr_bytes; 1778 else 1779 max_redundancy = (max_data_bytes-1)-((ec_tell(&enc)+7)>>3); 1780 /* Target the same bit-rate for redundancy as for the rest, 1781 up to a max of 257 bytes */ 1782 redundancy_bytes = IMIN(max_redundancy, st->bitrate_bps/1600); 1783 redundancy_bytes = IMIN(257, IMAX(2, redundancy_bytes)); 1784 if (st->mode == MODE_HYBRID) 1785 ec_enc_uint(&enc, redundancy_bytes-2, 256); 1786 } 1787 } else { 1788 redundancy = 0; 1789 } 1790 1791 if (!redundancy) 1792 { 1793 st->silk_bw_switch = 0; 1794 redundancy_bytes = 0; 1795 } 1796 if (st->mode != MODE_CELT_ONLY)start_band=17; 1797 1798 if (st->mode == MODE_SILK_ONLY) 1799 { 1800 ret = (ec_tell(&enc)+7)>>3; 1801 ec_enc_done(&enc); 1802 nb_compr_bytes = ret; 1803 } else { 1804 nb_compr_bytes = IMIN((max_data_bytes-1)-redundancy_bytes, nb_compr_bytes); 1805 ec_enc_shrink(&enc, nb_compr_bytes); 1806 } 1807 1808 #ifndef DISABLE_FLOAT_API 1809 if (redundancy || st->mode != MODE_SILK_ONLY) 1810 celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info)); 1811 #endif 1812 1813 /* 5 ms redundant frame for CELT->SILK */ 1814 if (redundancy && celt_to_silk) 1815 { 1816 int err; 1817 celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); 1818 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); 1819 err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL); 1820 if (err < 0) 1821 { 1822 RESTORE_STACK; 1823 return OPUS_INTERNAL_ERROR; 1824 } 1825 celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng)); 1826 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); 1827 } 1828 1829 celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(start_band)); 1830 1831 if (st->mode != MODE_SILK_ONLY) 1832 { 1833 if (st->mode != st->prev_mode && st->prev_mode > 0) 1834 { 1835 unsigned char dummy[2]; 1836 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); 1837 1838 /* Prefilling */ 1839 celt_encode_with_ec(celt_enc, tmp_prefill, st->Fs/400, dummy, 2, NULL); 1840 celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); 1841 } 1842 /* If false, we already busted the budget and we'll end up with a "PLC packet" */ 1843 if (ec_tell(&enc) <= 8*nb_compr_bytes) 1844 { 1845 ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc); 1846 if (ret < 0) 1847 { 1848 RESTORE_STACK; 1849 return OPUS_INTERNAL_ERROR; 1850 } 1851 } 1852 } 1853 1854 /* 5 ms redundant frame for SILK->CELT */ 1855 if (redundancy && !celt_to_silk) 1856 { 1857 int err; 1858 unsigned char dummy[2]; 1859 int N2, N4; 1860 N2 = st->Fs/200; 1861 N4 = st->Fs/400; 1862 1863 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); 1864 celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); 1865 celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); 1866 1867 /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */ 1868 celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL); 1869 1870 err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL); 1871 if (err < 0) 1872 { 1873 RESTORE_STACK; 1874 return OPUS_INTERNAL_ERROR; 1875 } 1876 celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng)); 1877 } 1878 1879 1880 1881 /* Signalling the mode in the first byte */ 1882 data--; 1883 data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); 1884 1885 st->rangeFinal = enc.rng ^ redundant_rng; 1886 1887 if (to_celt) 1888 st->prev_mode = MODE_CELT_ONLY; 1889 else 1890 st->prev_mode = st->mode; 1891 st->prev_channels = st->stream_channels; 1892 st->prev_framesize = frame_size; 1893 1894 st->first = 0; 1895 1896 /* In the unlikely case that the SILK encoder busted its target, tell 1897 the decoder to call the PLC */ 1898 if (ec_tell(&enc) > (max_data_bytes-1)*8) 1899 { 1900 if (max_data_bytes < 2) 1901 { 1902 RESTORE_STACK; 1903 return OPUS_BUFFER_TOO_SMALL; 1904 } 1905 data[1] = 0; 1906 ret = 1; 1907 st->rangeFinal = 0; 1908 } else if (st->mode==MODE_SILK_ONLY&&!redundancy) 1909 { 1910 /*When in LPC only mode it's perfectly 1911 reasonable to strip off trailing zero bytes as 1912 the required range decoder behavior is to 1913 fill these in. This can't be done when the MDCT 1914 modes are used because the decoder needs to know 1915 the actual length for allocation purposes.*/ 1916 while(ret>2&&data[ret]==0)ret--; 1917 } 1918 /* Count ToC and redundancy */ 1919 ret += 1+redundancy_bytes; 1920 if (!st->use_vbr) 1921 { 1922 if (opus_packet_pad(data, ret, max_data_bytes) != OPUS_OK) 1923 1924 { 1925 RESTORE_STACK; 1926 return OPUS_INTERNAL_ERROR; 1927 } 1928 ret = max_data_bytes; 1929 } 1930 RESTORE_STACK; 1931 return ret; 1932 } 1933 1934 #ifdef FIXED_POINT 1935 1936 #ifndef DISABLE_FLOAT_API 1937 opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, 1938 unsigned char *data, opus_int32 max_data_bytes) 1939 { 1940 int i, ret; 1941 int frame_size; 1942 int delay_compensation; 1943 VARDECL(opus_int16, in); 1944 ALLOC_STACK; 1945 1946 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) 1947 delay_compensation = 0; 1948 else 1949 delay_compensation = st->delay_compensation; 1950 frame_size = compute_frame_size(pcm, analysis_frame_size, 1951 st->variable_duration, st->channels, st->Fs, st->bitrate_bps, 1952 delay_compensation, downmix_float, st->analysis.subframe_mem); 1953 1954 ALLOC(in, frame_size*st->channels, opus_int16); 1955 1956 for (i=0;i<frame_size*st->channels;i++) 1957 in[i] = FLOAT2INT16(pcm[i]); 1958 ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); 1959 RESTORE_STACK; 1960 return ret; 1961 } 1962 #endif 1963 1964 opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, 1965 unsigned char *data, opus_int32 out_data_bytes) 1966 { 1967 int frame_size; 1968 int delay_compensation; 1969 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) 1970 delay_compensation = 0; 1971 else 1972 delay_compensation = st->delay_compensation; 1973 frame_size = compute_frame_size(pcm, analysis_frame_size, 1974 st->variable_duration, st->channels, st->Fs, st->bitrate_bps, 1975 delay_compensation, downmix_int 1976 #ifndef DISABLE_FLOAT_API 1977 , st->analysis.subframe_mem 1978 #endif 1979 ); 1980 return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); 1981 } 1982 1983 #else 1984 opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, 1985 unsigned char *data, opus_int32 max_data_bytes) 1986 { 1987 int i, ret; 1988 int frame_size; 1989 int delay_compensation; 1990 VARDECL(float, in); 1991 ALLOC_STACK; 1992 1993 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) 1994 delay_compensation = 0; 1995 else 1996 delay_compensation = st->delay_compensation; 1997 frame_size = compute_frame_size(pcm, analysis_frame_size, 1998 st->variable_duration, st->channels, st->Fs, st->bitrate_bps, 1999 delay_compensation, downmix_int, st->analysis.subframe_mem); 2000 2001 ALLOC(in, frame_size*st->channels, float); 2002 2003 for (i=0;i<frame_size*st->channels;i++) 2004 in[i] = (1.0f/32768)*pcm[i]; 2005 ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); 2006 RESTORE_STACK; 2007 return ret; 2008 } 2009 opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, 2010 unsigned char *data, opus_int32 out_data_bytes) 2011 { 2012 int frame_size; 2013 int delay_compensation; 2014 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) 2015 delay_compensation = 0; 2016 else 2017 delay_compensation = st->delay_compensation; 2018 frame_size = compute_frame_size(pcm, analysis_frame_size, 2019 st->variable_duration, st->channels, st->Fs, st->bitrate_bps, 2020 delay_compensation, downmix_float, st->analysis.subframe_mem); 2021 return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, 2022 pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); 2023 } 2024 #endif 2025 2026 2027 int opus_encoder_ctl(OpusEncoder *st, int request, ...) 2028 { 2029 int ret; 2030 CELTEncoder *celt_enc; 2031 va_list ap; 2032 2033 ret = OPUS_OK; 2034 va_start(ap, request); 2035 2036 celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); 2037 2038 switch (request) 2039 { 2040 case OPUS_SET_APPLICATION_REQUEST: 2041 { 2042 opus_int32 value = va_arg(ap, opus_int32); 2043 if ( (value != OPUS_APPLICATION_VOIP && value != OPUS_APPLICATION_AUDIO 2044 && value != OPUS_APPLICATION_RESTRICTED_LOWDELAY) 2045 || (!st->first && st->application != value)) 2046 { 2047 ret = OPUS_BAD_ARG; 2048 break; 2049 } 2050 st->application = value; 2051 } 2052 break; 2053 case OPUS_GET_APPLICATION_REQUEST: 2054 { 2055 opus_int32 *value = va_arg(ap, opus_int32*); 2056 if (!value) 2057 { 2058 goto bad_arg; 2059 } 2060 *value = st->application; 2061 } 2062 break; 2063 case OPUS_SET_BITRATE_REQUEST: 2064 { 2065 opus_int32 value = va_arg(ap, opus_int32); 2066 if (value != OPUS_AUTO && value != OPUS_BITRATE_MAX) 2067 { 2068 if (value <= 0) 2069 goto bad_arg; 2070 else if (value <= 500) 2071 value = 500; 2072 else if (value > (opus_int32)300000*st->channels) 2073 value = (opus_int32)300000*st->channels; 2074 } 2075 st->user_bitrate_bps = value; 2076 } 2077 break; 2078 case OPUS_GET_BITRATE_REQUEST: 2079 { 2080 opus_int32 *value = va_arg(ap, opus_int32*); 2081 if (!value) 2082 { 2083 goto bad_arg; 2084 } 2085 *value = user_bitrate_to_bitrate(st, st->prev_framesize, 1276); 2086 } 2087 break; 2088 case OPUS_SET_FORCE_CHANNELS_REQUEST: 2089 { 2090 opus_int32 value = va_arg(ap, opus_int32); 2091 if((value<1 || value>st->channels) && value != OPUS_AUTO) 2092 { 2093 goto bad_arg; 2094 } 2095 st->force_channels = value; 2096 } 2097 break; 2098 case OPUS_GET_FORCE_CHANNELS_REQUEST: 2099 { 2100 opus_int32 *value = va_arg(ap, opus_int32*); 2101 if (!value) 2102 { 2103 goto bad_arg; 2104 } 2105 *value = st->force_channels; 2106 } 2107 break; 2108 case OPUS_SET_MAX_BANDWIDTH_REQUEST: 2109 { 2110 opus_int32 value = va_arg(ap, opus_int32); 2111 if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) 2112 { 2113 goto bad_arg; 2114 } 2115 st->max_bandwidth = value; 2116 if (st->max_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { 2117 st->silk_mode.maxInternalSampleRate = 8000; 2118 } else if (st->max_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { 2119 st->silk_mode.maxInternalSampleRate = 12000; 2120 } else { 2121 st->silk_mode.maxInternalSampleRate = 16000; 2122 } 2123 } 2124 break; 2125 case OPUS_GET_MAX_BANDWIDTH_REQUEST: 2126 { 2127 opus_int32 *value = va_arg(ap, opus_int32*); 2128 if (!value) 2129 { 2130 goto bad_arg; 2131 } 2132 *value = st->max_bandwidth; 2133 } 2134 break; 2135 case OPUS_SET_BANDWIDTH_REQUEST: 2136 { 2137 opus_int32 value = va_arg(ap, opus_int32); 2138 if ((value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) && value != OPUS_AUTO) 2139 { 2140 goto bad_arg; 2141 } 2142 st->user_bandwidth = value; 2143 if (st->user_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { 2144 st->silk_mode.maxInternalSampleRate = 8000; 2145 } else if (st->user_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { 2146 st->silk_mode.maxInternalSampleRate = 12000; 2147 } else { 2148 st->silk_mode.maxInternalSampleRate = 16000; 2149 } 2150 } 2151 break; 2152 case OPUS_GET_BANDWIDTH_REQUEST: 2153 { 2154 opus_int32 *value = va_arg(ap, opus_int32*); 2155 if (!value) 2156 { 2157 goto bad_arg; 2158 } 2159 *value = st->bandwidth; 2160 } 2161 break; 2162 case OPUS_SET_DTX_REQUEST: 2163 { 2164 opus_int32 value = va_arg(ap, opus_int32); 2165 if(value<0 || value>1) 2166 { 2167 goto bad_arg; 2168 } 2169 st->silk_mode.useDTX = value; 2170 } 2171 break; 2172 case OPUS_GET_DTX_REQUEST: 2173 { 2174 opus_int32 *value = va_arg(ap, opus_int32*); 2175 if (!value) 2176 { 2177 goto bad_arg; 2178 } 2179 *value = st->silk_mode.useDTX; 2180 } 2181 break; 2182 case OPUS_SET_COMPLEXITY_REQUEST: 2183 { 2184 opus_int32 value = va_arg(ap, opus_int32); 2185 if(value<0 || value>10) 2186 { 2187 goto bad_arg; 2188 } 2189 st->silk_mode.complexity = value; 2190 celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(value)); 2191 } 2192 break; 2193 case OPUS_GET_COMPLEXITY_REQUEST: 2194 { 2195 opus_int32 *value = va_arg(ap, opus_int32*); 2196 if (!value) 2197 { 2198 goto bad_arg; 2199 } 2200 *value = st->silk_mode.complexity; 2201 } 2202 break; 2203 case OPUS_SET_INBAND_FEC_REQUEST: 2204 { 2205 opus_int32 value = va_arg(ap, opus_int32); 2206 if(value<0 || value>1) 2207 { 2208 goto bad_arg; 2209 } 2210 st->silk_mode.useInBandFEC = value; 2211 } 2212 break; 2213 case OPUS_GET_INBAND_FEC_REQUEST: 2214 { 2215 opus_int32 *value = va_arg(ap, opus_int32*); 2216 if (!value) 2217 { 2218 goto bad_arg; 2219 } 2220 *value = st->silk_mode.useInBandFEC; 2221 } 2222 break; 2223 case OPUS_SET_PACKET_LOSS_PERC_REQUEST: 2224 { 2225 opus_int32 value = va_arg(ap, opus_int32); 2226 if (value < 0 || value > 100) 2227 { 2228 goto bad_arg; 2229 } 2230 st->silk_mode.packetLossPercentage = value; 2231 celt_encoder_ctl(celt_enc, OPUS_SET_PACKET_LOSS_PERC(value)); 2232 } 2233 break; 2234 case OPUS_GET_PACKET_LOSS_PERC_REQUEST: 2235 { 2236 opus_int32 *value = va_arg(ap, opus_int32*); 2237 if (!value) 2238 { 2239 goto bad_arg; 2240 } 2241 *value = st->silk_mode.packetLossPercentage; 2242 } 2243 break; 2244 case OPUS_SET_VBR_REQUEST: 2245 { 2246 opus_int32 value = va_arg(ap, opus_int32); 2247 if(value<0 || value>1) 2248 { 2249 goto bad_arg; 2250 } 2251 st->use_vbr = value; 2252 st->silk_mode.useCBR = 1-value; 2253 } 2254 break; 2255 case OPUS_GET_VBR_REQUEST: 2256 { 2257 opus_int32 *value = va_arg(ap, opus_int32*); 2258 if (!value) 2259 { 2260 goto bad_arg; 2261 } 2262 *value = st->use_vbr; 2263 } 2264 break; 2265 case OPUS_SET_VOICE_RATIO_REQUEST: 2266 { 2267 opus_int32 value = va_arg(ap, opus_int32); 2268 if (value<-1 || value>100) 2269 { 2270 goto bad_arg; 2271 } 2272 st->voice_ratio = value; 2273 } 2274 break; 2275 case OPUS_GET_VOICE_RATIO_REQUEST: 2276 { 2277 opus_int32 *value = va_arg(ap, opus_int32*); 2278 if (!value) 2279 { 2280 goto bad_arg; 2281 } 2282 *value = st->voice_ratio; 2283 } 2284 break; 2285 case OPUS_SET_VBR_CONSTRAINT_REQUEST: 2286 { 2287 opus_int32 value = va_arg(ap, opus_int32); 2288 if(value<0 || value>1) 2289 { 2290 goto bad_arg; 2291 } 2292 st->vbr_constraint = value; 2293 } 2294 break; 2295 case OPUS_GET_VBR_CONSTRAINT_REQUEST: 2296 { 2297 opus_int32 *value = va_arg(ap, opus_int32*); 2298 if (!value) 2299 { 2300 goto bad_arg; 2301 } 2302 *value = st->vbr_constraint; 2303 } 2304 break; 2305 case OPUS_SET_SIGNAL_REQUEST: 2306 { 2307 opus_int32 value = va_arg(ap, opus_int32); 2308 if(value!=OPUS_AUTO && value!=OPUS_SIGNAL_VOICE && value!=OPUS_SIGNAL_MUSIC) 2309 { 2310 goto bad_arg; 2311 } 2312 st->signal_type = value; 2313 } 2314 break; 2315 case OPUS_GET_SIGNAL_REQUEST: 2316 { 2317 opus_int32 *value = va_arg(ap, opus_int32*); 2318 if (!value) 2319 { 2320 goto bad_arg; 2321 } 2322 *value = st->signal_type; 2323 } 2324 break; 2325 case OPUS_GET_LOOKAHEAD_REQUEST: 2326 { 2327 opus_int32 *value = va_arg(ap, opus_int32*); 2328 if (!value) 2329 { 2330 goto bad_arg; 2331 } 2332 *value = st->Fs/400; 2333 if (st->application != OPUS_APPLICATION_RESTRICTED_LOWDELAY) 2334 *value += st->delay_compensation; 2335 } 2336 break; 2337 case OPUS_GET_SAMPLE_RATE_REQUEST: 2338 { 2339 opus_int32 *value = va_arg(ap, opus_int32*); 2340 if (!value) 2341 { 2342 goto bad_arg; 2343 } 2344 *value = st->Fs; 2345 } 2346 break; 2347 case OPUS_GET_FINAL_RANGE_REQUEST: 2348 { 2349 opus_uint32 *value = va_arg(ap, opus_uint32*); 2350 if (!value) 2351 { 2352 goto bad_arg; 2353 } 2354 *value = st->rangeFinal; 2355 } 2356 break; 2357 case OPUS_SET_LSB_DEPTH_REQUEST: 2358 { 2359 opus_int32 value = va_arg(ap, opus_int32); 2360 if (value<8 || value>24) 2361 { 2362 goto bad_arg; 2363 } 2364 st->lsb_depth=value; 2365 } 2366 break; 2367 case OPUS_GET_LSB_DEPTH_REQUEST: 2368 { 2369 opus_int32 *value = va_arg(ap, opus_int32*); 2370 if (!value) 2371 { 2372 goto bad_arg; 2373 } 2374 *value = st->lsb_depth; 2375 } 2376 break; 2377 case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: 2378 { 2379 opus_int32 value = va_arg(ap, opus_int32); 2380 if (value != OPUS_FRAMESIZE_ARG && value != OPUS_FRAMESIZE_2_5_MS && 2381 value != OPUS_FRAMESIZE_5_MS && value != OPUS_FRAMESIZE_10_MS && 2382 value != OPUS_FRAMESIZE_20_MS && value != OPUS_FRAMESIZE_40_MS && 2383 value != OPUS_FRAMESIZE_60_MS && value != OPUS_FRAMESIZE_VARIABLE) 2384 { 2385 goto bad_arg; 2386 } 2387 st->variable_duration = value; 2388 celt_encoder_ctl(celt_enc, OPUS_SET_EXPERT_FRAME_DURATION(value)); 2389 } 2390 break; 2391 case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST: 2392 { 2393 opus_int32 *value = va_arg(ap, opus_int32*); 2394 if (!value) 2395 { 2396 goto bad_arg; 2397 } 2398 *value = st->variable_duration; 2399 } 2400 break; 2401 case OPUS_SET_PREDICTION_DISABLED_REQUEST: 2402 { 2403 opus_int32 value = va_arg(ap, opus_int32); 2404 if (value > 1 || value < 0) 2405 goto bad_arg; 2406 st->silk_mode.reducedDependency = value; 2407 } 2408 break; 2409 case OPUS_GET_PREDICTION_DISABLED_REQUEST: 2410 { 2411 opus_int32 *value = va_arg(ap, opus_int32*); 2412 if (!value) 2413 goto bad_arg; 2414 *value = st->silk_mode.reducedDependency; 2415 } 2416 break; 2417 case OPUS_RESET_STATE: 2418 { 2419 void *silk_enc; 2420 silk_EncControlStruct dummy; 2421 silk_enc = (char*)st+st->silk_enc_offset; 2422 2423 OPUS_CLEAR((char*)&st->OPUS_ENCODER_RESET_START, 2424 sizeof(OpusEncoder)- 2425 ((char*)&st->OPUS_ENCODER_RESET_START - (char*)st)); 2426 2427 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); 2428 silk_InitEncoder( silk_enc, st->arch, &dummy ); 2429 st->stream_channels = st->channels; 2430 st->hybrid_stereo_width_Q14 = 1 << 14; 2431 st->prev_HB_gain = Q15ONE; 2432 st->first = 1; 2433 st->mode = MODE_HYBRID; 2434 st->bandwidth = OPUS_BANDWIDTH_FULLBAND; 2435 st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); 2436 } 2437 break; 2438 case OPUS_SET_FORCE_MODE_REQUEST: 2439 { 2440 opus_int32 value = va_arg(ap, opus_int32); 2441 if ((value < MODE_SILK_ONLY || value > MODE_CELT_ONLY) && value != OPUS_AUTO) 2442 { 2443 goto bad_arg; 2444 } 2445 st->user_forced_mode = value; 2446 } 2447 break; 2448 case OPUS_SET_LFE_REQUEST: 2449 { 2450 opus_int32 value = va_arg(ap, opus_int32); 2451 st->lfe = value; 2452 ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value)); 2453 } 2454 break; 2455 case OPUS_SET_ENERGY_MASK_REQUEST: 2456 { 2457 opus_val16 *value = va_arg(ap, opus_val16*); 2458 st->energy_masking = value; 2459 ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value)); 2460 } 2461 break; 2462 2463 case CELT_GET_MODE_REQUEST: 2464 { 2465 const CELTMode ** value = va_arg(ap, const CELTMode**); 2466 if (!value) 2467 { 2468 goto bad_arg; 2469 } 2470 ret = celt_encoder_ctl(celt_enc, CELT_GET_MODE(value)); 2471 } 2472 break; 2473 default: 2474 /* fprintf(stderr, "unknown opus_encoder_ctl() request: %d", request);*/ 2475 ret = OPUS_UNIMPLEMENTED; 2476 break; 2477 } 2478 va_end(ap); 2479 return ret; 2480 bad_arg: 2481 va_end(ap); 2482 return OPUS_BAD_ARG; 2483 } 2484 2485 void opus_encoder_destroy(OpusEncoder *st) 2486 { 2487 opus_free(st); 2488 } 2489