1 /* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited 2 Written by Jean-Marc Valin and Koen Vos */ 3 /* 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 8 - Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 11 - Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 32 #include <stdarg.h> 33 #include "celt.h" 34 #include "entenc.h" 35 #include "modes.h" 36 #include "API.h" 37 #include "stack_alloc.h" 38 #include "float_cast.h" 39 #include "opus.h" 40 #include "arch.h" 41 #include "opus_private.h" 42 #include "os_support.h" 43 #include "cpu_support.h" 44 #include "analysis.h" 45 #include "mathops.h" 46 #include "tuning_parameters.h" 47 #ifdef FIXED_POINT 48 #include "fixed/structs_FIX.h" 49 #else 50 #include "float/structs_FLP.h" 51 #endif 52 53 #define MAX_ENCODER_BUFFER 480 54 55 typedef struct { 56 opus_val32 XX, XY, YY; 57 opus_val16 smoothed_width; 58 opus_val16 max_follower; 59 } StereoWidthState; 60 61 struct OpusEncoder { 62 int celt_enc_offset; 63 int silk_enc_offset; 64 silk_EncControlStruct silk_mode; 65 int application; 66 int channels; 67 int delay_compensation; 68 int force_channels; 69 int signal_type; 70 int user_bandwidth; 71 int max_bandwidth; 72 int user_forced_mode; 73 int voice_ratio; 74 opus_int32 Fs; 75 int use_vbr; 76 int vbr_constraint; 77 int variable_duration; 78 opus_int32 bitrate_bps; 79 opus_int32 user_bitrate_bps; 80 int lsb_depth; 81 int encoder_buffer; 82 int lfe; 83 84 #define OPUS_ENCODER_RESET_START stream_channels 85 int stream_channels; 86 opus_int16 hybrid_stereo_width_Q14; 87 opus_int32 variable_HP_smth2_Q15; 88 opus_val16 prev_HB_gain; 89 opus_val32 hp_mem[4]; 90 int mode; 91 int prev_mode; 92 int prev_channels; 93 int prev_framesize; 94 int bandwidth; 95 int silk_bw_switch; 96 /* Sampling rate (at the API level) */ 97 int first; 98 opus_val16 * energy_masking; 99 StereoWidthState width_mem; 100 opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; 101 #ifndef DISABLE_FLOAT_API 102 TonalityAnalysisState analysis; 103 int detected_bandwidth; 104 int analysis_offset; 105 #endif 106 opus_uint32 rangeFinal; 107 int arch; 108 }; 109 110 /* Transition tables for the voice and music. First column is the 111 middle (memoriless) threshold. The second column is the hysteresis 112 (difference with the middle) */ 113 static const opus_int32 mono_voice_bandwidth_thresholds[8] = { 114 11000, 1000, /* NB<->MB */ 115 14000, 1000, /* MB<->WB */ 116 17000, 1000, /* WB<->SWB */ 117 20000, 1000, /* SWB<->FB */ 118 }; 119 static const opus_int32 mono_music_bandwidth_thresholds[8] = { 120 14000, 1000, /* MB not allowed */ 121 18000, 2000, /* MB<->WB */ 122 24000, 2000, /* WB<->SWB */ 123 33000, 2000, /* SWB<->FB */ 124 }; 125 static const opus_int32 stereo_voice_bandwidth_thresholds[8] = { 126 11000, 1000, /* NB<->MB */ 127 14000, 1000, /* MB<->WB */ 128 21000, 2000, /* WB<->SWB */ 129 32000, 2000, /* SWB<->FB */ 130 }; 131 static const opus_int32 stereo_music_bandwidth_thresholds[8] = { 132 14000, 1000, /* MB not allowed */ 133 18000, 2000, /* MB<->WB */ 134 24000, 2000, /* WB<->SWB */ 135 48000, 2000, /* SWB<->FB */ 136 }; 137 /* Threshold bit-rates for switching between mono and stereo */ 138 static const opus_int32 stereo_voice_threshold = 31000; 139 static const opus_int32 stereo_music_threshold = 31000; 140 141 /* Threshold bit-rate for switching between SILK/hybrid and CELT-only */ 142 static const opus_int32 mode_thresholds[2][2] = { 143 /* voice */ /* music */ 144 { 64000, 20000}, /* mono */ 145 { 36000, 20000}, /* stereo */ 146 }; 147 148 int opus_encoder_get_size(int channels) 149 { 150 int silkEncSizeBytes, celtEncSizeBytes; 151 int ret; 152 if (channels<1 || channels > 2) 153 return 0; 154 ret = silk_Get_Encoder_Size( &silkEncSizeBytes ); 155 if (ret) 156 return 0; 157 silkEncSizeBytes = align(silkEncSizeBytes); 158 celtEncSizeBytes = celt_encoder_get_size(channels); 159 return align(sizeof(OpusEncoder))+silkEncSizeBytes+celtEncSizeBytes; 160 } 161 162 int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int application) 163 { 164 void *silk_enc; 165 CELTEncoder *celt_enc; 166 int err; 167 int ret, silkEncSizeBytes; 168 169 if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)|| 170 (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO 171 && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)) 172 return OPUS_BAD_ARG; 173 174 OPUS_CLEAR((char*)st, opus_encoder_get_size(channels)); 175 /* Create SILK encoder */ 176 ret = silk_Get_Encoder_Size( &silkEncSizeBytes ); 177 if (ret) 178 return OPUS_BAD_ARG; 179 silkEncSizeBytes = align(silkEncSizeBytes); 180 st->silk_enc_offset = align(sizeof(OpusEncoder)); 181 st->celt_enc_offset = st->silk_enc_offset+silkEncSizeBytes; 182 silk_enc = (char*)st+st->silk_enc_offset; 183 celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); 184 185 st->stream_channels = st->channels = channels; 186 187 st->Fs = Fs; 188 189 st->arch = opus_select_arch(); 190 191 ret = silk_InitEncoder( silk_enc, &st->silk_mode ); 192 if(ret)return OPUS_INTERNAL_ERROR; 193 194 /* default SILK parameters */ 195 st->silk_mode.nChannelsAPI = channels; 196 st->silk_mode.nChannelsInternal = channels; 197 st->silk_mode.API_sampleRate = st->Fs; 198 st->silk_mode.maxInternalSampleRate = 16000; 199 st->silk_mode.minInternalSampleRate = 8000; 200 st->silk_mode.desiredInternalSampleRate = 16000; 201 st->silk_mode.payloadSize_ms = 20; 202 st->silk_mode.bitRate = 25000; 203 st->silk_mode.packetLossPercentage = 0; 204 st->silk_mode.complexity = 9; 205 st->silk_mode.useInBandFEC = 0; 206 st->silk_mode.useDTX = 0; 207 st->silk_mode.useCBR = 0; 208 209 /* Create CELT encoder */ 210 /* Initialize CELT encoder */ 211 err = celt_encoder_init(celt_enc, Fs, channels); 212 if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR; 213 214 celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0)); 215 celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity)); 216 217 st->use_vbr = 1; 218 /* Makes constrained VBR the default (safer for real-time use) */ 219 st->vbr_constraint = 1; 220 st->user_bitrate_bps = OPUS_AUTO; 221 st->bitrate_bps = 3000+Fs*channels; 222 st->application = application; 223 st->signal_type = OPUS_AUTO; 224 st->user_bandwidth = OPUS_AUTO; 225 st->max_bandwidth = OPUS_BANDWIDTH_FULLBAND; 226 st->force_channels = OPUS_AUTO; 227 st->user_forced_mode = OPUS_AUTO; 228 st->voice_ratio = -1; 229 st->encoder_buffer = st->Fs/100; 230 st->lsb_depth = 24; 231 st->variable_duration = OPUS_FRAMESIZE_ARG; 232 233 /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead 234 + 1.5 ms for SILK resamplers and stereo prediction) */ 235 st->delay_compensation = st->Fs/250; 236 237 st->hybrid_stereo_width_Q14 = 1 << 14; 238 st->prev_HB_gain = Q15ONE; 239 st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); 240 st->first = 1; 241 st->mode = MODE_HYBRID; 242 st->bandwidth = OPUS_BANDWIDTH_FULLBAND; 243 244 return OPUS_OK; 245 } 246 247 static int pad_frame(unsigned char *data, opus_int32 len, opus_int32 new_len) 248 { 249 if (len == new_len) 250 return 0; 251 if (len > new_len) 252 return 1; 253 254 if ((data[0]&0x3)==0) 255 { 256 int i; 257 int padding, nb_255s; 258 259 padding = new_len - len; 260 if (padding >= 2) 261 { 262 nb_255s = (padding-2)/255; 263 264 for (i=len-1;i>=1;i--) 265 data[i+nb_255s+2] = data[i]; 266 data[0] |= 0x3; 267 data[1] = 0x41; 268 for (i=0;i<nb_255s;i++) 269 data[i+2] = 255; 270 data[nb_255s+2] = padding-255*nb_255s-2; 271 for (i=len+3+nb_255s;i<new_len;i++) 272 data[i] = 0; 273 } else { 274 for (i=len-1;i>=1;i--) 275 data[i+1] = data[i]; 276 data[0] |= 0x3; 277 data[1] = 1; 278 } 279 return 0; 280 } else { 281 return 1; 282 } 283 } 284 285 static unsigned char gen_toc(int mode, int framerate, int bandwidth, int channels) 286 { 287 int period; 288 unsigned char toc; 289 period = 0; 290 while (framerate < 400) 291 { 292 framerate <<= 1; 293 period++; 294 } 295 if (mode == MODE_SILK_ONLY) 296 { 297 toc = (bandwidth-OPUS_BANDWIDTH_NARROWBAND)<<5; 298 toc |= (period-2)<<3; 299 } else if (mode == MODE_CELT_ONLY) 300 { 301 int tmp = bandwidth-OPUS_BANDWIDTH_MEDIUMBAND; 302 if (tmp < 0) 303 tmp = 0; 304 toc = 0x80; 305 toc |= tmp << 5; 306 toc |= period<<3; 307 } else /* Hybrid */ 308 { 309 toc = 0x60; 310 toc |= (bandwidth-OPUS_BANDWIDTH_SUPERWIDEBAND)<<4; 311 toc |= (period-2)<<3; 312 } 313 toc |= (channels==2)<<2; 314 return toc; 315 } 316 317 #ifndef FIXED_POINT 318 static void silk_biquad_float( 319 const opus_val16 *in, /* I: Input signal */ 320 const opus_int32 *B_Q28, /* I: MA coefficients [3] */ 321 const opus_int32 *A_Q28, /* I: AR coefficients [2] */ 322 opus_val32 *S, /* I/O: State vector [2] */ 323 opus_val16 *out, /* O: Output signal */ 324 const opus_int32 len, /* I: Signal length (must be even) */ 325 int stride 326 ) 327 { 328 /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */ 329 opus_int k; 330 opus_val32 vout; 331 opus_val32 inval; 332 opus_val32 A[2], B[3]; 333 334 A[0] = (opus_val32)(A_Q28[0] * (1.f/((opus_int32)1<<28))); 335 A[1] = (opus_val32)(A_Q28[1] * (1.f/((opus_int32)1<<28))); 336 B[0] = (opus_val32)(B_Q28[0] * (1.f/((opus_int32)1<<28))); 337 B[1] = (opus_val32)(B_Q28[1] * (1.f/((opus_int32)1<<28))); 338 B[2] = (opus_val32)(B_Q28[2] * (1.f/((opus_int32)1<<28))); 339 340 /* Negate A_Q28 values and split in two parts */ 341 342 for( k = 0; k < len; k++ ) { 343 /* S[ 0 ], S[ 1 ]: Q12 */ 344 inval = in[ k*stride ]; 345 vout = S[ 0 ] + B[0]*inval; 346 347 S[ 0 ] = S[1] - vout*A[0] + B[1]*inval; 348 349 S[ 1 ] = - vout*A[1] + B[2]*inval + VERY_SMALL; 350 351 /* Scale back to Q0 and saturate */ 352 out[ k*stride ] = vout; 353 } 354 } 355 #endif 356 357 static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) 358 { 359 opus_int32 B_Q28[ 3 ], A_Q28[ 2 ]; 360 opus_int32 Fc_Q19, r_Q28, r_Q22; 361 362 silk_assert( cutoff_Hz <= silk_int32_MAX / SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ) ); 363 Fc_Q19 = silk_DIV32_16( silk_SMULBB( SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ), cutoff_Hz ), Fs/1000 ); 364 silk_assert( Fc_Q19 > 0 && Fc_Q19 < 32768 ); 365 366 r_Q28 = SILK_FIX_CONST( 1.0, 28 ) - silk_MUL( SILK_FIX_CONST( 0.92, 9 ), Fc_Q19 ); 367 368 /* b = r * [ 1; -2; 1 ]; */ 369 /* a = [ 1; -2 * r * ( 1 - 0.5 * Fc^2 ); r^2 ]; */ 370 B_Q28[ 0 ] = r_Q28; 371 B_Q28[ 1 ] = silk_LSHIFT( -r_Q28, 1 ); 372 B_Q28[ 2 ] = r_Q28; 373 374 /* -r * ( 2 - Fc * Fc ); */ 375 r_Q22 = silk_RSHIFT( r_Q28, 6 ); 376 A_Q28[ 0 ] = silk_SMULWW( r_Q22, silk_SMULWW( Fc_Q19, Fc_Q19 ) - SILK_FIX_CONST( 2.0, 22 ) ); 377 A_Q28[ 1 ] = silk_SMULWW( r_Q22, r_Q22 ); 378 379 #ifdef FIXED_POINT 380 silk_biquad_alt( in, B_Q28, A_Q28, hp_mem, out, len, channels ); 381 if( channels == 2 ) { 382 silk_biquad_alt( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels ); 383 } 384 #else 385 silk_biquad_float( in, B_Q28, A_Q28, hp_mem, out, len, channels ); 386 if( channels == 2 ) { 387 silk_biquad_float( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels ); 388 } 389 #endif 390 } 391 392 #ifdef FIXED_POINT 393 static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) 394 { 395 int c, i; 396 int shift; 397 398 /* Approximates -round(log2(4.*cutoff_Hz/Fs)) */ 399 shift=celt_ilog2(Fs/(cutoff_Hz*3)); 400 for (c=0;c<channels;c++) 401 { 402 for (i=0;i<len;i++) 403 { 404 opus_val32 x, tmp, y; 405 x = SHL32(EXTEND32(in[channels*i+c]), 15); 406 /* First stage */ 407 tmp = x-hp_mem[2*c]; 408 hp_mem[2*c] = hp_mem[2*c] + PSHR32(x - hp_mem[2*c], shift); 409 /* Second stage */ 410 y = tmp - hp_mem[2*c+1]; 411 hp_mem[2*c+1] = hp_mem[2*c+1] + PSHR32(tmp - hp_mem[2*c+1], shift); 412 out[channels*i+c] = EXTRACT16(SATURATE(PSHR32(y, 15), 32767)); 413 } 414 } 415 } 416 417 #else 418 static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) 419 { 420 int c, i; 421 float coef; 422 423 coef = 4.0f*cutoff_Hz/Fs; 424 for (c=0;c<channels;c++) 425 { 426 for (i=0;i<len;i++) 427 { 428 opus_val32 x, tmp, y; 429 x = in[channels*i+c]; 430 /* First stage */ 431 tmp = x-hp_mem[2*c]; 432 hp_mem[2*c] = hp_mem[2*c] + coef*(x - hp_mem[2*c]) + VERY_SMALL; 433 /* Second stage */ 434 y = tmp - hp_mem[2*c+1]; 435 hp_mem[2*c+1] = hp_mem[2*c+1] + coef*(tmp - hp_mem[2*c+1]) + VERY_SMALL; 436 out[channels*i+c] = y; 437 } 438 } 439 } 440 #endif 441 442 static void stereo_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2, 443 int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs) 444 { 445 int i; 446 int overlap; 447 int inc; 448 inc = 48000/Fs; 449 overlap=overlap48/inc; 450 g1 = Q15ONE-g1; 451 g2 = Q15ONE-g2; 452 for (i=0;i<overlap;i++) 453 { 454 opus_val32 diff; 455 opus_val16 g, w; 456 w = MULT16_16_Q15(window[i*inc], window[i*inc]); 457 g = SHR32(MAC16_16(MULT16_16(w,g2), 458 Q15ONE-w, g1), 15); 459 diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1])); 460 diff = MULT16_16_Q15(g, diff); 461 out[i*channels] = out[i*channels] - diff; 462 out[i*channels+1] = out[i*channels+1] + diff; 463 } 464 for (;i<frame_size;i++) 465 { 466 opus_val32 diff; 467 diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1])); 468 diff = MULT16_16_Q15(g2, diff); 469 out[i*channels] = out[i*channels] - diff; 470 out[i*channels+1] = out[i*channels+1] + diff; 471 } 472 } 473 474 static void gain_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2, 475 int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs) 476 { 477 int i; 478 int inc; 479 int overlap; 480 int c; 481 inc = 48000/Fs; 482 overlap=overlap48/inc; 483 if (channels==1) 484 { 485 for (i=0;i<overlap;i++) 486 { 487 opus_val16 g, w; 488 w = MULT16_16_Q15(window[i*inc], window[i*inc]); 489 g = SHR32(MAC16_16(MULT16_16(w,g2), 490 Q15ONE-w, g1), 15); 491 out[i] = MULT16_16_Q15(g, in[i]); 492 } 493 } else { 494 for (i=0;i<overlap;i++) 495 { 496 opus_val16 g, w; 497 w = MULT16_16_Q15(window[i*inc], window[i*inc]); 498 g = SHR32(MAC16_16(MULT16_16(w,g2), 499 Q15ONE-w, g1), 15); 500 out[i*2] = MULT16_16_Q15(g, in[i*2]); 501 out[i*2+1] = MULT16_16_Q15(g, in[i*2+1]); 502 } 503 } 504 c=0;do { 505 for (i=overlap;i<frame_size;i++) 506 { 507 out[i*channels+c] = MULT16_16_Q15(g2, in[i*channels+c]); 508 } 509 } 510 while (++c<channels); 511 } 512 513 OpusEncoder *opus_encoder_create(opus_int32 Fs, int channels, int application, int *error) 514 { 515 int ret; 516 OpusEncoder *st; 517 if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)|| 518 (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO 519 && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)) 520 { 521 if (error) 522 *error = OPUS_BAD_ARG; 523 return NULL; 524 } 525 st = (OpusEncoder *)opus_alloc(opus_encoder_get_size(channels)); 526 if (st == NULL) 527 { 528 if (error) 529 *error = OPUS_ALLOC_FAIL; 530 return NULL; 531 } 532 ret = opus_encoder_init(st, Fs, channels, application); 533 if (error) 534 *error = ret; 535 if (ret != OPUS_OK) 536 { 537 opus_free(st); 538 st = NULL; 539 } 540 return st; 541 } 542 543 static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int max_data_bytes) 544 { 545 if(!frame_size)frame_size=st->Fs/400; 546 if (st->user_bitrate_bps==OPUS_AUTO) 547 return 60*st->Fs/frame_size + st->Fs*st->channels; 548 else if (st->user_bitrate_bps==OPUS_BITRATE_MAX) 549 return max_data_bytes*8*st->Fs/frame_size; 550 else 551 return st->user_bitrate_bps; 552 } 553 554 #ifndef DISABLE_FLOAT_API 555 /* Don't use more than 60 ms for the frame size analysis */ 556 #define MAX_DYNAMIC_FRAMESIZE 24 557 /* Estimates how much the bitrate will be boosted based on the sub-frame energy */ 558 static float transient_boost(const float *E, const float *E_1, int LM, int maxM) 559 { 560 int i; 561 int M; 562 float sumE=0, sumE_1=0; 563 float metric; 564 565 M = IMIN(maxM, (1<<LM)+1); 566 for (i=0;i<M;i++) 567 { 568 sumE += E[i]; 569 sumE_1 += E_1[i]; 570 } 571 metric = sumE*sumE_1/(M*M); 572 /*if (LM==3) 573 printf("%f\n", metric);*/ 574 /*return metric>10 ? 1 : 0;*/ 575 /*return MAX16(0,1-exp(-.25*(metric-2.)));*/ 576 return MIN16(1,(float)sqrt(MAX16(0,.05f*(metric-2)))); 577 } 578 579 /* Viterbi decoding trying to find the best frame size combination using look-ahead 580 581 State numbering: 582 0: unused 583 1: 2.5 ms 584 2: 5 ms (#1) 585 3: 5 ms (#2) 586 4: 10 ms (#1) 587 5: 10 ms (#2) 588 6: 10 ms (#3) 589 7: 10 ms (#4) 590 8: 20 ms (#1) 591 9: 20 ms (#2) 592 10: 20 ms (#3) 593 11: 20 ms (#4) 594 12: 20 ms (#5) 595 13: 20 ms (#6) 596 14: 20 ms (#7) 597 15: 20 ms (#8) 598 */ 599 static int transient_viterbi(const float *E, const float *E_1, int N, int frame_cost, int rate) 600 { 601 int i; 602 float cost[MAX_DYNAMIC_FRAMESIZE][16]; 603 int states[MAX_DYNAMIC_FRAMESIZE][16]; 604 float best_cost; 605 int best_state; 606 float factor; 607 /* Take into account that we damp VBR in the 32 kb/s to 64 kb/s range. */ 608 if (rate<80) 609 factor=0; 610 else if (rate>160) 611 factor=1; 612 else 613 factor = (rate-80.f)/80.f; 614 /* Makes variable framesize less aggressive at lower bitrates, but I can't 615 find any valid theoretical justification for this (other than it seems 616 to help) */ 617 for (i=0;i<16;i++) 618 { 619 /* Impossible state */ 620 states[0][i] = -1; 621 cost[0][i] = 1e10; 622 } 623 for (i=0;i<4;i++) 624 { 625 cost[0][1<<i] = (frame_cost + rate*(1<<i))*(1+factor*transient_boost(E, E_1, i, N+1)); 626 states[0][1<<i] = i; 627 } 628 for (i=1;i<N;i++) 629 { 630 int j; 631 632 /* Follow continuations */ 633 for (j=2;j<16;j++) 634 { 635 cost[i][j] = cost[i-1][j-1]; 636 states[i][j] = j-1; 637 } 638 639 /* New frames */ 640 for(j=0;j<4;j++) 641 { 642 int k; 643 float min_cost; 644 float curr_cost; 645 states[i][1<<j] = 1; 646 min_cost = cost[i-1][1]; 647 for(k=1;k<4;k++) 648 { 649 float tmp = cost[i-1][(1<<(k+1))-1]; 650 if (tmp < min_cost) 651 { 652 states[i][1<<j] = (1<<(k+1))-1; 653 min_cost = tmp; 654 } 655 } 656 curr_cost = (frame_cost + rate*(1<<j))*(1+factor*transient_boost(E+i, E_1+i, j, N-i+1)); 657 cost[i][1<<j] = min_cost; 658 /* If part of the frame is outside the analysis window, only count part of the cost */ 659 if (N-i < (1<<j)) 660 cost[i][1<<j] += curr_cost*(float)(N-i)/(1<<j); 661 else 662 cost[i][1<<j] += curr_cost; 663 } 664 } 665 666 best_state=1; 667 best_cost = cost[N-1][1]; 668 /* Find best end state (doesn't force a frame to end at N-1) */ 669 for (i=2;i<16;i++) 670 { 671 if (cost[N-1][i]<best_cost) 672 { 673 best_cost = cost[N-1][i]; 674 best_state = i; 675 } 676 } 677 678 /* Follow transitions back */ 679 for (i=N-1;i>=0;i--) 680 { 681 /*printf("%d ", best_state);*/ 682 best_state = states[i][best_state]; 683 } 684 /*printf("%d\n", best_state);*/ 685 return best_state; 686 } 687 688 int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, 689 int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering, 690 downmix_func downmix) 691 { 692 int N; 693 int i; 694 float e[MAX_DYNAMIC_FRAMESIZE+4]; 695 float e_1[MAX_DYNAMIC_FRAMESIZE+3]; 696 float memx; 697 int bestLM=0; 698 int subframe; 699 int pos; 700 VARDECL(opus_val32, sub); 701 702 subframe = Fs/400; 703 ALLOC(sub, subframe, opus_val32); 704 e[0]=mem[0]; 705 e_1[0]=1.f/(EPSILON+mem[0]); 706 if (buffering) 707 { 708 /* Consider the CELT delay when not in restricted-lowdelay */ 709 /* We assume the buffering is between 2.5 and 5 ms */ 710 int offset = 2*subframe - buffering; 711 celt_assert(offset>=0 && offset <= subframe); 712 x += C*offset; 713 len -= offset; 714 e[1]=mem[1]; 715 e_1[1]=1.f/(EPSILON+mem[1]); 716 e[2]=mem[2]; 717 e_1[2]=1.f/(EPSILON+mem[2]); 718 pos = 3; 719 } else { 720 pos=1; 721 } 722 N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE); 723 memx = x[0]; 724 for (i=0;i<N;i++) 725 { 726 float tmp; 727 float tmpx; 728 int j; 729 tmp=EPSILON; 730 731 downmix(x, sub, subframe, i*subframe, 0, -2, C); 732 if (i==0) 733 memx = sub[0]; 734 for (j=0;j<subframe;j++) 735 { 736 tmpx = sub[j]; 737 tmp += (tmpx-memx)*(tmpx-memx); 738 memx = tmpx; 739 } 740 e[i+pos] = tmp; 741 e_1[i+pos] = 1.f/tmp; 742 } 743 /* Hack to get 20 ms working with APPLICATION_AUDIO 744 The real problem is that the corresponding memory needs to use 1.5 ms 745 from this frame and 1 ms from the next frame */ 746 e[i+pos] = e[i+pos-1]; 747 if (buffering) 748 N=IMIN(MAX_DYNAMIC_FRAMESIZE, N+2); 749 bestLM = transient_viterbi(e, e_1, N, (int)((1.f+.5f*tonality)*(60*C+40)), bitrate/400); 750 mem[0] = e[1<<bestLM]; 751 if (buffering) 752 { 753 mem[1] = e[(1<<bestLM)+1]; 754 mem[2] = e[(1<<bestLM)+2]; 755 } 756 return bestLM; 757 } 758 759 #endif 760 761 #ifndef DISABLE_FLOAT_API 762 #ifdef FIXED_POINT 763 #define PCM2VAL(x) FLOAT2INT16(x) 764 #else 765 #define PCM2VAL(x) SCALEIN(x) 766 #endif 767 void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) 768 { 769 const float *x; 770 opus_val32 scale; 771 int j; 772 x = (const float *)_x; 773 for (j=0;j<subframe;j++) 774 sub[j] = PCM2VAL(x[(j+offset)*C+c1]); 775 if (c2>-1) 776 { 777 for (j=0;j<subframe;j++) 778 sub[j] += PCM2VAL(x[(j+offset)*C+c2]); 779 } else if (c2==-2) 780 { 781 int c; 782 for (c=1;c<C;c++) 783 { 784 for (j=0;j<subframe;j++) 785 sub[j] += PCM2VAL(x[(j+offset)*C+c]); 786 } 787 } 788 #ifdef FIXED_POINT 789 scale = (1<<SIG_SHIFT); 790 #else 791 scale = 1.f; 792 #endif 793 if (C==-2) 794 scale /= C; 795 else 796 scale /= 2; 797 for (j=0;j<subframe;j++) 798 sub[j] *= scale; 799 } 800 #endif 801 802 void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) 803 { 804 const opus_int16 *x; 805 opus_val32 scale; 806 int j; 807 x = (const opus_int16 *)_x; 808 for (j=0;j<subframe;j++) 809 sub[j] = x[(j+offset)*C+c1]; 810 if (c2>-1) 811 { 812 for (j=0;j<subframe;j++) 813 sub[j] += x[(j+offset)*C+c2]; 814 } else if (c2==-2) 815 { 816 int c; 817 for (c=1;c<C;c++) 818 { 819 for (j=0;j<subframe;j++) 820 sub[j] += x[(j+offset)*C+c]; 821 } 822 } 823 #ifdef FIXED_POINT 824 scale = (1<<SIG_SHIFT); 825 #else 826 scale = 1.f/32768; 827 #endif 828 if (C==-2) 829 scale /= C; 830 else 831 scale /= 2; 832 for (j=0;j<subframe;j++) 833 sub[j] *= scale; 834 } 835 836 opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs) 837 { 838 int new_size; 839 if (frame_size<Fs/400) 840 return -1; 841 if (variable_duration == OPUS_FRAMESIZE_ARG) 842 new_size = frame_size; 843 else if (variable_duration == OPUS_FRAMESIZE_VARIABLE) 844 new_size = Fs/50; 845 else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_60_MS) 846 new_size = IMIN(3*Fs/50, (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS)); 847 else 848 return -1; 849 if (new_size>frame_size) 850 return -1; 851 if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs && 852 50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs) 853 return -1; 854 return new_size; 855 } 856 857 opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, 858 int variable_duration, int C, opus_int32 Fs, int bitrate_bps, 859 int delay_compensation, downmix_func downmix, opus_val32 *subframe_mem) 860 { 861 #ifndef DISABLE_FLOAT_API 862 if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200) 863 { 864 int LM = 3; 865 LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps, 866 0, subframe_mem, delay_compensation, downmix); 867 while ((Fs/400<<LM)>frame_size) 868 LM--; 869 frame_size = (Fs/400<<LM); 870 } else 871 #endif 872 { 873 frame_size = frame_size_select(frame_size, variable_duration, Fs); 874 } 875 if (frame_size<0) 876 return -1; 877 return frame_size; 878 } 879 880 opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem) 881 { 882 opus_val16 corr; 883 opus_val16 ldiff; 884 opus_val16 width; 885 opus_val32 xx, xy, yy; 886 opus_val16 sqrt_xx, sqrt_yy; 887 opus_val16 qrrt_xx, qrrt_yy; 888 int frame_rate; 889 int i; 890 opus_val16 short_alpha; 891 892 frame_rate = Fs/frame_size; 893 short_alpha = Q15ONE - 25*Q15ONE/IMAX(50,frame_rate); 894 xx=xy=yy=0; 895 for (i=0;i<frame_size;i+=4) 896 { 897 opus_val32 pxx=0; 898 opus_val32 pxy=0; 899 opus_val32 pyy=0; 900 opus_val16 x, y; 901 x = pcm[2*i]; 902 y = pcm[2*i+1]; 903 pxx = SHR32(MULT16_16(x,x),2); 904 pxy = SHR32(MULT16_16(x,y),2); 905 pyy = SHR32(MULT16_16(y,y),2); 906 x = pcm[2*i+2]; 907 y = pcm[2*i+3]; 908 pxx += SHR32(MULT16_16(x,x),2); 909 pxy += SHR32(MULT16_16(x,y),2); 910 pyy += SHR32(MULT16_16(y,y),2); 911 x = pcm[2*i+4]; 912 y = pcm[2*i+5]; 913 pxx += SHR32(MULT16_16(x,x),2); 914 pxy += SHR32(MULT16_16(x,y),2); 915 pyy += SHR32(MULT16_16(y,y),2); 916 x = pcm[2*i+6]; 917 y = pcm[2*i+7]; 918 pxx += SHR32(MULT16_16(x,x),2); 919 pxy += SHR32(MULT16_16(x,y),2); 920 pyy += SHR32(MULT16_16(y,y),2); 921 922 xx += SHR32(pxx, 10); 923 xy += SHR32(pxy, 10); 924 yy += SHR32(pyy, 10); 925 } 926 mem->XX += MULT16_32_Q15(short_alpha, xx-mem->XX); 927 mem->XY += MULT16_32_Q15(short_alpha, xy-mem->XY); 928 mem->YY += MULT16_32_Q15(short_alpha, yy-mem->YY); 929 mem->XX = MAX32(0, mem->XX); 930 mem->XY = MAX32(0, mem->XY); 931 mem->YY = MAX32(0, mem->YY); 932 if (MAX32(mem->XX, mem->YY)>QCONST16(8e-4f, 18)) 933 { 934 sqrt_xx = celt_sqrt(mem->XX); 935 sqrt_yy = celt_sqrt(mem->YY); 936 qrrt_xx = celt_sqrt(sqrt_xx); 937 qrrt_yy = celt_sqrt(sqrt_yy); 938 /* Inter-channel correlation */ 939 mem->XY = MIN32(mem->XY, sqrt_xx*sqrt_yy); 940 corr = SHR32(frac_div32(mem->XY,EPSILON+MULT16_16(sqrt_xx,sqrt_yy)),16); 941 /* Approximate loudness difference */ 942 ldiff = Q15ONE*ABS16(qrrt_xx-qrrt_yy)/(EPSILON+qrrt_xx+qrrt_yy); 943 width = MULT16_16_Q15(celt_sqrt(QCONST32(1.f,30)-MULT16_16(corr,corr)), ldiff); 944 /* Smoothing over one second */ 945 mem->smoothed_width += (width-mem->smoothed_width)/frame_rate; 946 /* Peak follower */ 947 mem->max_follower = MAX16(mem->max_follower-QCONST16(.02f,15)/frame_rate, mem->smoothed_width); 948 } else { 949 width = 0; 950 corr=Q15ONE; 951 ldiff=0; 952 } 953 /*printf("%f %f %f %f %f ", corr/(float)Q15ONE, ldiff/(float)Q15ONE, width/(float)Q15ONE, mem->smoothed_width/(float)Q15ONE, mem->max_follower/(float)Q15ONE);*/ 954 return EXTRACT16(MIN32(Q15ONE,20*mem->max_follower)); 955 } 956 957 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, 958 unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, 959 const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix) 960 { 961 void *silk_enc; 962 CELTEncoder *celt_enc; 963 int i; 964 int ret=0; 965 opus_int32 nBytes; 966 ec_enc enc; 967 int bytes_target; 968 int prefill=0; 969 int start_band = 0; 970 int redundancy = 0; 971 int redundancy_bytes = 0; /* Number of bytes to use for redundancy frame */ 972 int celt_to_silk = 0; 973 VARDECL(opus_val16, pcm_buf); 974 int nb_compr_bytes; 975 int to_celt = 0; 976 opus_uint32 redundant_rng = 0; 977 int cutoff_Hz, hp_freq_smth1; 978 int voice_est; /* Probability of voice in Q7 */ 979 opus_int32 equiv_rate; 980 int delay_compensation; 981 int frame_rate; 982 opus_int32 max_rate; /* Max bitrate we're allowed to use */ 983 int curr_bandwidth; 984 opus_val16 HB_gain; 985 opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */ 986 int total_buffer; 987 opus_val16 stereo_width; 988 const CELTMode *celt_mode; 989 AnalysisInfo analysis_info; 990 int analysis_read_pos_bak=-1; 991 int analysis_read_subframe_bak=-1; 992 VARDECL(opus_val16, tmp_prefill); 993 994 ALLOC_STACK; 995 996 max_data_bytes = IMIN(1276, out_data_bytes); 997 998 st->rangeFinal = 0; 999 if ((!st->variable_duration && 400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs && 1000 50*frame_size != st->Fs && 25*frame_size != st->Fs && 50*frame_size != 3*st->Fs) 1001 || (400*frame_size < st->Fs) 1002 || max_data_bytes<=0 1003 ) 1004 { 1005 RESTORE_STACK; 1006 return OPUS_BAD_ARG; 1007 } 1008 silk_enc = (char*)st+st->silk_enc_offset; 1009 celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); 1010 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) 1011 delay_compensation = 0; 1012 else 1013 delay_compensation = st->delay_compensation; 1014 1015 lsb_depth = IMIN(lsb_depth, st->lsb_depth); 1016 1017 analysis_info.valid = 0; 1018 celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); 1019 #ifndef DISABLE_FLOAT_API 1020 #ifdef FIXED_POINT 1021 if (st->silk_mode.complexity >= 10 && st->Fs==48000) 1022 #else 1023 if (st->silk_mode.complexity >= 7 && st->Fs==48000) 1024 #endif 1025 { 1026 analysis_read_pos_bak = st->analysis.read_pos; 1027 analysis_read_subframe_bak = st->analysis.read_subframe; 1028 run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, 1029 c1, c2, analysis_channels, st->Fs, 1030 lsb_depth, downmix, &analysis_info); 1031 } 1032 #endif 1033 1034 st->voice_ratio = -1; 1035 1036 #ifndef DISABLE_FLOAT_API 1037 st->detected_bandwidth = 0; 1038 if (analysis_info.valid) 1039 { 1040 int analysis_bandwidth; 1041 if (st->signal_type == OPUS_AUTO) 1042 st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob)); 1043 1044 analysis_bandwidth = analysis_info.bandwidth; 1045 if (analysis_bandwidth<=12) 1046 st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND; 1047 else if (analysis_bandwidth<=14) 1048 st->detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; 1049 else if (analysis_bandwidth<=16) 1050 st->detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1051 else if (analysis_bandwidth<=18) 1052 st->detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; 1053 else 1054 st->detected_bandwidth = OPUS_BANDWIDTH_FULLBAND; 1055 } 1056 #endif 1057 1058 if (st->channels==2 && st->force_channels!=1) 1059 stereo_width = compute_stereo_width(pcm, frame_size, st->Fs, &st->width_mem); 1060 else 1061 stereo_width = 0; 1062 total_buffer = delay_compensation; 1063 st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes); 1064 1065 frame_rate = st->Fs/frame_size; 1066 if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8 1067 || (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400))) 1068 { 1069 /*If the space is too low to do something useful, emit 'PLC' frames.*/ 1070 int tocmode = st->mode; 1071 int bw = st->bandwidth == 0 ? OPUS_BANDWIDTH_NARROWBAND : st->bandwidth; 1072 if (tocmode==0) 1073 tocmode = MODE_SILK_ONLY; 1074 if (frame_rate>100) 1075 tocmode = MODE_CELT_ONLY; 1076 if (frame_rate < 50) 1077 tocmode = MODE_SILK_ONLY; 1078 if(tocmode==MODE_SILK_ONLY&&bw>OPUS_BANDWIDTH_WIDEBAND) 1079 bw=OPUS_BANDWIDTH_WIDEBAND; 1080 else if (tocmode==MODE_CELT_ONLY&&bw==OPUS_BANDWIDTH_MEDIUMBAND) 1081 bw=OPUS_BANDWIDTH_NARROWBAND; 1082 else if (bw<=OPUS_BANDWIDTH_SUPERWIDEBAND) 1083 bw=OPUS_BANDWIDTH_SUPERWIDEBAND; 1084 data[0] = gen_toc(tocmode, frame_rate, bw, st->stream_channels); 1085 RESTORE_STACK; 1086 return 1; 1087 } 1088 if (!st->use_vbr) 1089 { 1090 int cbrBytes; 1091 cbrBytes = IMIN( (st->bitrate_bps + 4*frame_rate)/(8*frame_rate) , max_data_bytes); 1092 st->bitrate_bps = cbrBytes * (8*frame_rate); 1093 max_data_bytes = cbrBytes; 1094 } 1095 max_rate = frame_rate*max_data_bytes*8; 1096 1097 /* Equivalent 20-ms rate for mode/channel/bandwidth decisions */ 1098 equiv_rate = st->bitrate_bps - 60*(st->Fs/frame_size - 50); 1099 1100 if (st->signal_type == OPUS_SIGNAL_VOICE) 1101 voice_est = 127; 1102 else if (st->signal_type == OPUS_SIGNAL_MUSIC) 1103 voice_est = 0; 1104 else if (st->voice_ratio >= 0) 1105 { 1106 voice_est = st->voice_ratio*327>>8; 1107 /* For AUDIO, never be more than 90% confident of having speech */ 1108 if (st->application == OPUS_APPLICATION_AUDIO) 1109 voice_est = IMIN(voice_est, 115); 1110 } else if (st->application == OPUS_APPLICATION_VOIP) 1111 voice_est = 115; 1112 else 1113 voice_est = 48; 1114 1115 if (st->force_channels!=OPUS_AUTO && st->channels == 2) 1116 { 1117 st->stream_channels = st->force_channels; 1118 } else { 1119 #ifdef FUZZING 1120 /* Random mono/stereo decision */ 1121 if (st->channels == 2 && (rand()&0x1F)==0) 1122 st->stream_channels = 3-st->stream_channels; 1123 #else 1124 /* Rate-dependent mono-stereo decision */ 1125 if (st->channels == 2) 1126 { 1127 opus_int32 stereo_threshold; 1128 stereo_threshold = stereo_music_threshold + ((voice_est*voice_est*(stereo_voice_threshold-stereo_music_threshold))>>14); 1129 if (st->stream_channels == 2) 1130 stereo_threshold -= 1000; 1131 else 1132 stereo_threshold += 1000; 1133 st->stream_channels = (equiv_rate > stereo_threshold) ? 2 : 1; 1134 } else { 1135 st->stream_channels = st->channels; 1136 } 1137 #endif 1138 } 1139 1140 /* Mode selection depending on application and signal type */ 1141 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) 1142 { 1143 st->mode = MODE_CELT_ONLY; 1144 } else if (st->user_forced_mode == OPUS_AUTO) 1145 { 1146 #ifdef FUZZING 1147 /* Random mode switching */ 1148 if ((rand()&0xF)==0) 1149 { 1150 if ((rand()&0x1)==0) 1151 st->mode = MODE_CELT_ONLY; 1152 else 1153 st->mode = MODE_SILK_ONLY; 1154 } else { 1155 if (st->prev_mode==MODE_CELT_ONLY) 1156 st->mode = MODE_CELT_ONLY; 1157 else 1158 st->mode = MODE_SILK_ONLY; 1159 } 1160 #else 1161 opus_int32 mode_voice, mode_music; 1162 opus_int32 threshold; 1163 1164 /* Interpolate based on stereo width */ 1165 mode_voice = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[0][0]) 1166 + MULT16_32_Q15(stereo_width,mode_thresholds[1][0])); 1167 mode_music = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[1][1]) 1168 + MULT16_32_Q15(stereo_width,mode_thresholds[1][1])); 1169 /* Interpolate based on speech/music probability */ 1170 threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14); 1171 /* Bias towards SILK for VoIP because of some useful features */ 1172 if (st->application == OPUS_APPLICATION_VOIP) 1173 threshold += 8000; 1174 1175 /*printf("%f %d\n", stereo_width/(float)Q15ONE, threshold);*/ 1176 /* Hysteresis */ 1177 if (st->prev_mode == MODE_CELT_ONLY) 1178 threshold -= 4000; 1179 else if (st->prev_mode>0) 1180 threshold += 4000; 1181 1182 st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY; 1183 1184 /* When FEC is enabled and there's enough packet loss, use SILK */ 1185 if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4) 1186 st->mode = MODE_SILK_ONLY; 1187 /* When encoding voice and DTX is enabled, set the encoder to SILK mode (at least for now) */ 1188 if (st->silk_mode.useDTX && voice_est > 100) 1189 st->mode = MODE_SILK_ONLY; 1190 #endif 1191 } else { 1192 st->mode = st->user_forced_mode; 1193 } 1194 1195 /* Override the chosen mode to make sure we meet the requested frame size */ 1196 if (st->mode != MODE_CELT_ONLY && frame_size < st->Fs/100) 1197 st->mode = MODE_CELT_ONLY; 1198 1199 if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0 1200 && st->mode != MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY) 1201 { 1202 /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */ 1203 st->silk_mode.toMono = 1; 1204 st->stream_channels = 2; 1205 } else { 1206 st->silk_mode.toMono = 0; 1207 } 1208 1209 if (st->prev_mode > 0 && 1210 ((st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) || 1211 (st->mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY))) 1212 { 1213 redundancy = 1; 1214 celt_to_silk = (st->mode != MODE_CELT_ONLY); 1215 if (!celt_to_silk) 1216 { 1217 /* Switch to SILK/hybrid if frame size is 10 ms or more*/ 1218 if (frame_size >= st->Fs/100) 1219 { 1220 st->mode = st->prev_mode; 1221 to_celt = 1; 1222 } else { 1223 redundancy=0; 1224 } 1225 } 1226 } 1227 /* For the first frame at a new SILK bandwidth */ 1228 if (st->silk_bw_switch) 1229 { 1230 redundancy = 1; 1231 celt_to_silk = 1; 1232 st->silk_bw_switch = 0; 1233 prefill=1; 1234 } 1235 1236 if (redundancy) 1237 { 1238 /* Fair share of the max size allowed */ 1239 redundancy_bytes = IMIN(257, max_data_bytes*(opus_int32)(st->Fs/200)/(frame_size+st->Fs/200)); 1240 /* For VBR, target the actual bitrate (subject to the limit above) */ 1241 if (st->use_vbr) 1242 redundancy_bytes = IMIN(redundancy_bytes, st->bitrate_bps/1600); 1243 } 1244 1245 if (st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) 1246 { 1247 silk_EncControlStruct dummy; 1248 silk_InitEncoder( silk_enc, &dummy); 1249 prefill=1; 1250 } 1251 1252 /* Automatic (rate-dependent) bandwidth selection */ 1253 if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch) 1254 { 1255 const opus_int32 *voice_bandwidth_thresholds, *music_bandwidth_thresholds; 1256 opus_int32 bandwidth_thresholds[8]; 1257 int bandwidth = OPUS_BANDWIDTH_FULLBAND; 1258 opus_int32 equiv_rate2; 1259 1260 equiv_rate2 = equiv_rate; 1261 if (st->mode != MODE_CELT_ONLY) 1262 { 1263 /* Adjust the threshold +/- 10% depending on complexity */ 1264 equiv_rate2 = equiv_rate2 * (45+st->silk_mode.complexity)/50; 1265 /* CBR is less efficient by ~1 kb/s */ 1266 if (!st->use_vbr) 1267 equiv_rate2 -= 1000; 1268 } 1269 if (st->channels==2 && st->force_channels!=1) 1270 { 1271 voice_bandwidth_thresholds = stereo_voice_bandwidth_thresholds; 1272 music_bandwidth_thresholds = stereo_music_bandwidth_thresholds; 1273 } else { 1274 voice_bandwidth_thresholds = mono_voice_bandwidth_thresholds; 1275 music_bandwidth_thresholds = mono_music_bandwidth_thresholds; 1276 } 1277 /* Interpolate bandwidth thresholds depending on voice estimation */ 1278 for (i=0;i<8;i++) 1279 { 1280 bandwidth_thresholds[i] = music_bandwidth_thresholds[i] 1281 + ((voice_est*voice_est*(voice_bandwidth_thresholds[i]-music_bandwidth_thresholds[i]))>>14); 1282 } 1283 do { 1284 int threshold, hysteresis; 1285 threshold = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)]; 1286 hysteresis = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)+1]; 1287 if (!st->first) 1288 { 1289 if (st->bandwidth >= bandwidth) 1290 threshold -= hysteresis; 1291 else 1292 threshold += hysteresis; 1293 } 1294 if (equiv_rate2 >= threshold) 1295 break; 1296 } while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND); 1297 st->bandwidth = bandwidth; 1298 /* Prevents any transition to SWB/FB until the SILK layer has fully 1299 switched to WB mode and turned the variable LP filter off */ 1300 if (!st->first && st->mode != MODE_CELT_ONLY && !st->silk_mode.inWBmodeWithoutVariableLP && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND) 1301 st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1302 } 1303 1304 if (st->bandwidth>st->max_bandwidth) 1305 st->bandwidth = st->max_bandwidth; 1306 1307 if (st->user_bandwidth != OPUS_AUTO) 1308 st->bandwidth = st->user_bandwidth; 1309 1310 /* This prevents us from using hybrid at unsafe CBR/max rates */ 1311 if (st->mode != MODE_CELT_ONLY && max_rate < 15000) 1312 { 1313 st->bandwidth = IMIN(st->bandwidth, OPUS_BANDWIDTH_WIDEBAND); 1314 } 1315 1316 /* Prevents Opus from wasting bits on frequencies that are above 1317 the Nyquist rate of the input signal */ 1318 if (st->Fs <= 24000 && st->bandwidth > OPUS_BANDWIDTH_SUPERWIDEBAND) 1319 st->bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; 1320 if (st->Fs <= 16000 && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND) 1321 st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1322 if (st->Fs <= 12000 && st->bandwidth > OPUS_BANDWIDTH_MEDIUMBAND) 1323 st->bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; 1324 if (st->Fs <= 8000 && st->bandwidth > OPUS_BANDWIDTH_NARROWBAND) 1325 st->bandwidth = OPUS_BANDWIDTH_NARROWBAND; 1326 #ifndef FIXED_POINT 1327 /* Use detected bandwidth to reduce the encoded bandwidth. */ 1328 if (st->detected_bandwidth && st->user_bandwidth == OPUS_AUTO) 1329 { 1330 int min_detected_bandwidth; 1331 /* Makes bandwidth detection more conservative just in case the detector 1332 gets it wrong when we could have coded a high bandwidth transparently. 1333 When operating in SILK/hybrid mode, we don't go below wideband to avoid 1334 more complicated switches that require redundancy. */ 1335 if (st->bitrate_bps <= 18000*st->stream_channels && st->mode == MODE_CELT_ONLY) 1336 min_detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND; 1337 else if (st->bitrate_bps <= 24000*st->stream_channels && st->mode == MODE_CELT_ONLY) 1338 min_detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; 1339 else if (st->bitrate_bps <= 30000*st->stream_channels) 1340 min_detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1341 else if (st->bitrate_bps <= 44000*st->stream_channels) 1342 min_detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; 1343 else 1344 min_detected_bandwidth = OPUS_BANDWIDTH_FULLBAND; 1345 1346 st->detected_bandwidth = IMAX(st->detected_bandwidth, min_detected_bandwidth); 1347 st->bandwidth = IMIN(st->bandwidth, st->detected_bandwidth); 1348 } 1349 #endif 1350 celt_encoder_ctl(celt_enc, OPUS_SET_LSB_DEPTH(lsb_depth)); 1351 1352 /* If max_data_bytes represents less than 8 kb/s, switch to CELT-only mode */ 1353 if (max_data_bytes < (frame_rate > 50 ? 12000 : 8000)*frame_size / (st->Fs * 8)) 1354 st->mode = MODE_CELT_ONLY; 1355 1356 /* CELT mode doesn't support mediumband, use wideband instead */ 1357 if (st->mode == MODE_CELT_ONLY && st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) 1358 st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1359 if (st->lfe) 1360 { 1361 st->bandwidth = OPUS_BANDWIDTH_NARROWBAND; 1362 st->mode = MODE_CELT_ONLY; 1363 } 1364 1365 /* Can't support higher than wideband for >20 ms frames */ 1366 if (frame_size > st->Fs/50 && (st->mode == MODE_CELT_ONLY || st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)) 1367 { 1368 VARDECL(unsigned char, tmp_data); 1369 int nb_frames; 1370 int bak_mode, bak_bandwidth, bak_channels, bak_to_mono; 1371 VARDECL(OpusRepacketizer, rp); 1372 opus_int32 bytes_per_frame; 1373 1374 if (analysis_read_pos_bak!= -1) 1375 { 1376 st->analysis.read_pos = analysis_read_pos_bak; 1377 st->analysis.read_subframe = analysis_read_subframe_bak; 1378 } 1379 1380 nb_frames = frame_size > st->Fs/25 ? 3 : 2; 1381 bytes_per_frame = IMIN(1276,(out_data_bytes-3)/nb_frames); 1382 1383 ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char); 1384 1385 ALLOC(rp, 1, OpusRepacketizer); 1386 opus_repacketizer_init(rp); 1387 1388 bak_mode = st->user_forced_mode; 1389 bak_bandwidth = st->user_bandwidth; 1390 bak_channels = st->force_channels; 1391 1392 st->user_forced_mode = st->mode; 1393 st->user_bandwidth = st->bandwidth; 1394 st->force_channels = st->stream_channels; 1395 bak_to_mono = st->silk_mode.toMono; 1396 1397 if (bak_to_mono) 1398 st->force_channels = 1; 1399 else 1400 st->prev_channels = st->stream_channels; 1401 for (i=0;i<nb_frames;i++) 1402 { 1403 int tmp_len; 1404 st->silk_mode.toMono = 0; 1405 /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */ 1406 if (to_celt && i==nb_frames-1) 1407 st->user_forced_mode = MODE_CELT_ONLY; 1408 tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, 1409 tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, 1410 NULL, 0, c1, c2, analysis_channels, downmix); 1411 if (tmp_len<0) 1412 { 1413 RESTORE_STACK; 1414 return OPUS_INTERNAL_ERROR; 1415 } 1416 ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len); 1417 if (ret<0) 1418 { 1419 RESTORE_STACK; 1420 return OPUS_INTERNAL_ERROR; 1421 } 1422 } 1423 ret = opus_repacketizer_out(rp, data, out_data_bytes); 1424 if (ret<0) 1425 { 1426 RESTORE_STACK; 1427 return OPUS_INTERNAL_ERROR; 1428 } 1429 st->user_forced_mode = bak_mode; 1430 st->user_bandwidth = bak_bandwidth; 1431 st->force_channels = bak_channels; 1432 st->silk_mode.toMono = bak_to_mono; 1433 RESTORE_STACK; 1434 return ret; 1435 } 1436 curr_bandwidth = st->bandwidth; 1437 1438 /* Chooses the appropriate mode for speech 1439 *NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */ 1440 if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND) 1441 st->mode = MODE_HYBRID; 1442 if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND) 1443 st->mode = MODE_SILK_ONLY; 1444 1445 /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, curr_bandwidth); */ 1446 bytes_target = IMIN(max_data_bytes-redundancy_bytes, st->bitrate_bps * frame_size / (st->Fs * 8)) - 1; 1447 1448 data += 1; 1449 1450 ec_enc_init(&enc, data, max_data_bytes-1); 1451 1452 ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16); 1453 for (i=0;i<total_buffer*st->channels;i++) 1454 pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels+i]; 1455 1456 if (st->mode == MODE_CELT_ONLY) 1457 hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); 1458 else 1459 hp_freq_smth1 = ((silk_encoder*)silk_enc)->state_Fxx[0].sCmn.variable_HP_smth1_Q15; 1460 1461 st->variable_HP_smth2_Q15 = silk_SMLAWB( st->variable_HP_smth2_Q15, 1462 hp_freq_smth1 - st->variable_HP_smth2_Q15, SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF2, 16 ) ); 1463 1464 /* convert from log scale to Hertz */ 1465 cutoff_Hz = silk_log2lin( silk_RSHIFT( st->variable_HP_smth2_Q15, 8 ) ); 1466 1467 if (st->application == OPUS_APPLICATION_VOIP) 1468 { 1469 hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); 1470 } else { 1471 dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); 1472 } 1473 1474 1475 1476 /* SILK processing */ 1477 HB_gain = Q15ONE; 1478 if (st->mode != MODE_CELT_ONLY) 1479 { 1480 opus_int32 total_bitRate, celt_rate; 1481 #ifdef FIXED_POINT 1482 const opus_int16 *pcm_silk; 1483 #else 1484 VARDECL(opus_int16, pcm_silk); 1485 ALLOC(pcm_silk, st->channels*frame_size, opus_int16); 1486 #endif 1487 1488 /* Distribute bits between SILK and CELT */ 1489 total_bitRate = 8 * bytes_target * frame_rate; 1490 if( st->mode == MODE_HYBRID ) { 1491 int HB_gain_ref; 1492 /* Base rate for SILK */ 1493 st->silk_mode.bitRate = st->stream_channels * ( 5000 + 1000 * ( st->Fs == 100 * frame_size ) ); 1494 if( curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND ) { 1495 /* SILK gets 2/3 of the remaining bits */ 1496 st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 2 / 3; 1497 } else { /* FULLBAND */ 1498 /* SILK gets 3/5 of the remaining bits */ 1499 st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 3 / 5; 1500 } 1501 /* Don't let SILK use more than 80% */ 1502 if( st->silk_mode.bitRate > total_bitRate * 4/5 ) { 1503 st->silk_mode.bitRate = total_bitRate * 4/5; 1504 } 1505 /* Increasingly attenuate high band when it gets allocated fewer bits */ 1506 celt_rate = total_bitRate - st->silk_mode.bitRate; 1507 HB_gain_ref = (curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND) ? 3000 : 3600; 1508 HB_gain = SHL32((opus_val32)celt_rate, 9) / SHR32((opus_val32)celt_rate + st->stream_channels * HB_gain_ref, 6); 1509 HB_gain = HB_gain < Q15ONE*6/7 ? HB_gain + Q15ONE/7 : Q15ONE; 1510 } else { 1511 /* SILK gets all bits */ 1512 st->silk_mode.bitRate = total_bitRate; 1513 } 1514 1515 /* Surround masking for SILK */ 1516 if (st->energy_masking && st->use_vbr && !st->lfe) 1517 { 1518 opus_val32 mask_sum=0; 1519 opus_val16 masking_depth; 1520 opus_int32 rate_offset; 1521 int c; 1522 int end = 17; 1523 opus_int16 srate = 16000; 1524 if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND) 1525 { 1526 end = 13; 1527 srate = 8000; 1528 } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) 1529 { 1530 end = 15; 1531 srate = 12000; 1532 } 1533 for (c=0;c<st->channels;c++) 1534 { 1535 for(i=0;i<end;i++) 1536 { 1537 opus_val16 mask; 1538 mask = MAX16(MIN16(st->energy_masking[21*c+i], 1539 QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT)); 1540 if (mask > 0) 1541 mask = HALF16(mask); 1542 mask_sum += mask; 1543 } 1544 } 1545 /* Conservative rate reduction, we cut the masking in half */ 1546 masking_depth = HALF16(mask_sum / end*st->channels); 1547 rate_offset = PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT); 1548 rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3); 1549 rate_offset += QCONST16(.4f, DB_SHIFT); 1550 st->silk_mode.bitRate += rate_offset; 1551 bytes_target += rate_offset * frame_size / (8 * st->Fs); 1552 } 1553 1554 st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs; 1555 st->silk_mode.nChannelsAPI = st->channels; 1556 st->silk_mode.nChannelsInternal = st->stream_channels; 1557 if (curr_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { 1558 st->silk_mode.desiredInternalSampleRate = 8000; 1559 } else if (curr_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { 1560 st->silk_mode.desiredInternalSampleRate = 12000; 1561 } else { 1562 silk_assert( st->mode == MODE_HYBRID || curr_bandwidth == OPUS_BANDWIDTH_WIDEBAND ); 1563 st->silk_mode.desiredInternalSampleRate = 16000; 1564 } 1565 if( st->mode == MODE_HYBRID ) { 1566 /* Don't allow bandwidth reduction at lowest bitrates in hybrid mode */ 1567 st->silk_mode.minInternalSampleRate = 16000; 1568 } else { 1569 st->silk_mode.minInternalSampleRate = 8000; 1570 } 1571 1572 if (st->mode == MODE_SILK_ONLY) 1573 { 1574 opus_int32 effective_max_rate = max_rate; 1575 st->silk_mode.maxInternalSampleRate = 16000; 1576 if (frame_rate > 50) 1577 effective_max_rate = effective_max_rate*2/3; 1578 if (effective_max_rate < 13000) 1579 { 1580 st->silk_mode.maxInternalSampleRate = 12000; 1581 st->silk_mode.desiredInternalSampleRate = IMIN(12000, st->silk_mode.desiredInternalSampleRate); 1582 } 1583 if (effective_max_rate < 9600) 1584 { 1585 st->silk_mode.maxInternalSampleRate = 8000; 1586 st->silk_mode.desiredInternalSampleRate = IMIN(8000, st->silk_mode.desiredInternalSampleRate); 1587 } 1588 } else { 1589 st->silk_mode.maxInternalSampleRate = 16000; 1590 } 1591 1592 st->silk_mode.useCBR = !st->use_vbr; 1593 1594 /* Call SILK encoder for the low band */ 1595 nBytes = IMIN(1275, max_data_bytes-1-redundancy_bytes); 1596 1597 st->silk_mode.maxBits = nBytes*8; 1598 /* Only allow up to 90% of the bits for hybrid mode*/ 1599 if (st->mode == MODE_HYBRID) 1600 st->silk_mode.maxBits = (opus_int32)st->silk_mode.maxBits*9/10; 1601 if (st->silk_mode.useCBR) 1602 { 1603 st->silk_mode.maxBits = (st->silk_mode.bitRate * frame_size / (st->Fs * 8))*8; 1604 /* Reduce the initial target to make it easier to reach the CBR rate */ 1605 st->silk_mode.bitRate = IMAX(1, st->silk_mode.bitRate-2000); 1606 } 1607 1608 if (prefill) 1609 { 1610 opus_int32 zero=0; 1611 int prefill_offset; 1612 /* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode 1613 a discontinuity. The exact location is what we need to avoid leaving any "gap" 1614 in the audio when mixing with the redundant CELT frame. Here we can afford to 1615 overwrite st->delay_buffer because the only thing that uses it before it gets 1616 rewritten is tmp_prefill[] and even then only the part after the ramp really 1617 gets used (rather than sent to the encoder and discarded) */ 1618 prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400); 1619 gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset, 1620 0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs); 1621 for(i=0;i<prefill_offset;i++) 1622 st->delay_buffer[i]=0; 1623 #ifdef FIXED_POINT 1624 pcm_silk = st->delay_buffer; 1625 #else 1626 for (i=0;i<st->encoder_buffer*st->channels;i++) 1627 pcm_silk[i] = FLOAT2INT16(st->delay_buffer[i]); 1628 #endif 1629 silk_Encode( silk_enc, &st->silk_mode, pcm_silk, st->encoder_buffer, NULL, &zero, 1 ); 1630 } 1631 1632 #ifdef FIXED_POINT 1633 pcm_silk = pcm_buf+total_buffer*st->channels; 1634 #else 1635 for (i=0;i<frame_size*st->channels;i++) 1636 pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]); 1637 #endif 1638 ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0 ); 1639 if( ret ) { 1640 /*fprintf (stderr, "SILK encode error: %d\n", ret);*/ 1641 /* Handle error */ 1642 RESTORE_STACK; 1643 return OPUS_INTERNAL_ERROR; 1644 } 1645 if (nBytes==0) 1646 { 1647 st->rangeFinal = 0; 1648 data[-1] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); 1649 RESTORE_STACK; 1650 return 1; 1651 } 1652 /* Extract SILK internal bandwidth for signaling in first byte */ 1653 if( st->mode == MODE_SILK_ONLY ) { 1654 if( st->silk_mode.internalSampleRate == 8000 ) { 1655 curr_bandwidth = OPUS_BANDWIDTH_NARROWBAND; 1656 } else if( st->silk_mode.internalSampleRate == 12000 ) { 1657 curr_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; 1658 } else if( st->silk_mode.internalSampleRate == 16000 ) { 1659 curr_bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1660 } 1661 } else { 1662 silk_assert( st->silk_mode.internalSampleRate == 16000 ); 1663 } 1664 1665 st->silk_mode.opusCanSwitch = st->silk_mode.switchReady; 1666 /* FIXME: How do we allocate the redundancy for CBR? */ 1667 if (st->silk_mode.opusCanSwitch) 1668 { 1669 redundancy = 1; 1670 celt_to_silk = 0; 1671 st->silk_bw_switch = 1; 1672 } 1673 } 1674 1675 /* CELT processing */ 1676 { 1677 int endband=21; 1678 1679 switch(curr_bandwidth) 1680 { 1681 case OPUS_BANDWIDTH_NARROWBAND: 1682 endband = 13; 1683 break; 1684 case OPUS_BANDWIDTH_MEDIUMBAND: 1685 case OPUS_BANDWIDTH_WIDEBAND: 1686 endband = 17; 1687 break; 1688 case OPUS_BANDWIDTH_SUPERWIDEBAND: 1689 endband = 19; 1690 break; 1691 case OPUS_BANDWIDTH_FULLBAND: 1692 endband = 21; 1693 break; 1694 } 1695 celt_encoder_ctl(celt_enc, CELT_SET_END_BAND(endband)); 1696 celt_encoder_ctl(celt_enc, CELT_SET_CHANNELS(st->stream_channels)); 1697 } 1698 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX)); 1699 if (st->mode != MODE_SILK_ONLY) 1700 { 1701 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); 1702 /* Allow prediction unless we decide to disable it later */ 1703 celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(2)); 1704 1705 if (st->mode == MODE_HYBRID) 1706 { 1707 int len; 1708 1709 len = (ec_tell(&enc)+7)>>3; 1710 if (redundancy) 1711 len += st->mode == MODE_HYBRID ? 3 : 1; 1712 if( st->use_vbr ) { 1713 nb_compr_bytes = len + bytes_target - (st->silk_mode.bitRate * frame_size) / (8 * st->Fs); 1714 } else { 1715 /* check if SILK used up too much */ 1716 nb_compr_bytes = len > bytes_target ? len : bytes_target; 1717 } 1718 } else { 1719 if (st->use_vbr) 1720 { 1721 opus_int32 bonus=0; 1722 #ifndef DISABLE_FLOAT_API 1723 if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50) 1724 { 1725 bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50); 1726 if (analysis_info.valid) 1727 bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality)); 1728 } 1729 #endif 1730 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1)); 1731 celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint)); 1732 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps+bonus)); 1733 nb_compr_bytes = max_data_bytes-1-redundancy_bytes; 1734 } else { 1735 nb_compr_bytes = bytes_target; 1736 } 1737 } 1738 1739 } else { 1740 nb_compr_bytes = 0; 1741 } 1742 1743 ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16); 1744 if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0) 1745 { 1746 for (i=0;i<st->channels*st->Fs/400;i++) 1747 tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i]; 1748 } 1749 1750 for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++) 1751 st->delay_buffer[i] = st->delay_buffer[i+st->channels*frame_size]; 1752 for (;i<st->encoder_buffer*st->channels;i++) 1753 st->delay_buffer[i] = pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels+i]; 1754 1755 /* gain_fade() and stereo_fade() need to be after the buffer copying 1756 because we don't want any of this to affect the SILK part */ 1757 if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) { 1758 gain_fade(pcm_buf, pcm_buf, 1759 st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs); 1760 } 1761 st->prev_HB_gain = HB_gain; 1762 if (st->mode != MODE_HYBRID || st->stream_channels==1) 1763 st->silk_mode.stereoWidth_Q14 = IMIN((1<<14),IMAX(0,st->bitrate_bps-32000)); 1764 if( !st->energy_masking && st->channels == 2 ) { 1765 /* Apply stereo width reduction (at low bitrates) */ 1766 if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) { 1767 opus_val16 g1, g2; 1768 g1 = st->hybrid_stereo_width_Q14; 1769 g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14); 1770 #ifdef FIXED_POINT 1771 g1 = g1==16384 ? Q15ONE : SHL16(g1,1); 1772 g2 = g2==16384 ? Q15ONE : SHL16(g2,1); 1773 #else 1774 g1 *= (1.f/16384); 1775 g2 *= (1.f/16384); 1776 #endif 1777 stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap, 1778 frame_size, st->channels, celt_mode->window, st->Fs); 1779 st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14; 1780 } 1781 } 1782 1783 if ( st->mode != MODE_CELT_ONLY && ec_tell(&enc)+17+20*(st->mode == MODE_HYBRID) <= 8*(max_data_bytes-1)) 1784 { 1785 /* For SILK mode, the redundancy is inferred from the length */ 1786 if (st->mode == MODE_HYBRID && (redundancy || ec_tell(&enc)+37 <= 8*nb_compr_bytes)) 1787 ec_enc_bit_logp(&enc, redundancy, 12); 1788 if (redundancy) 1789 { 1790 int max_redundancy; 1791 ec_enc_bit_logp(&enc, celt_to_silk, 1); 1792 if (st->mode == MODE_HYBRID) 1793 max_redundancy = (max_data_bytes-1)-nb_compr_bytes-1; 1794 else 1795 max_redundancy = (max_data_bytes-1)-((ec_tell(&enc)+7)>>3); 1796 /* Target the same bit-rate for redundancy as for the rest, 1797 up to a max of 257 bytes */ 1798 redundancy_bytes = IMIN(max_redundancy, st->bitrate_bps/1600); 1799 redundancy_bytes = IMIN(257, IMAX(2, redundancy_bytes)); 1800 if (st->mode == MODE_HYBRID) 1801 ec_enc_uint(&enc, redundancy_bytes-2, 256); 1802 } 1803 } else { 1804 redundancy = 0; 1805 } 1806 1807 if (!redundancy) 1808 { 1809 st->silk_bw_switch = 0; 1810 redundancy_bytes = 0; 1811 } 1812 if (st->mode != MODE_CELT_ONLY)start_band=17; 1813 1814 if (st->mode == MODE_SILK_ONLY) 1815 { 1816 ret = (ec_tell(&enc)+7)>>3; 1817 ec_enc_done(&enc); 1818 nb_compr_bytes = ret; 1819 } else { 1820 nb_compr_bytes = IMIN((max_data_bytes-1)-redundancy_bytes, nb_compr_bytes); 1821 ec_enc_shrink(&enc, nb_compr_bytes); 1822 } 1823 1824 #ifndef DISABLE_FLOAT_API 1825 if (redundancy || st->mode != MODE_SILK_ONLY) 1826 celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info)); 1827 #endif 1828 1829 /* 5 ms redundant frame for CELT->SILK */ 1830 if (redundancy && celt_to_silk) 1831 { 1832 int err; 1833 celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); 1834 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); 1835 err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL); 1836 if (err < 0) 1837 { 1838 RESTORE_STACK; 1839 return OPUS_INTERNAL_ERROR; 1840 } 1841 celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng)); 1842 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); 1843 } 1844 1845 celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(start_band)); 1846 1847 if (st->mode != MODE_SILK_ONLY) 1848 { 1849 if (st->mode != st->prev_mode && st->prev_mode > 0) 1850 { 1851 unsigned char dummy[2]; 1852 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); 1853 1854 /* Prefilling */ 1855 celt_encode_with_ec(celt_enc, tmp_prefill, st->Fs/400, dummy, 2, NULL); 1856 celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); 1857 } 1858 /* If false, we already busted the budget and we'll end up with a "PLC packet" */ 1859 if (ec_tell(&enc) <= 8*nb_compr_bytes) 1860 { 1861 ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc); 1862 if (ret < 0) 1863 { 1864 RESTORE_STACK; 1865 return OPUS_INTERNAL_ERROR; 1866 } 1867 } 1868 } 1869 1870 /* 5 ms redundant frame for SILK->CELT */ 1871 if (redundancy && !celt_to_silk) 1872 { 1873 int err; 1874 unsigned char dummy[2]; 1875 int N2, N4; 1876 N2 = st->Fs/200; 1877 N4 = st->Fs/400; 1878 1879 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); 1880 celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); 1881 celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); 1882 1883 /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */ 1884 celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL); 1885 1886 err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL); 1887 if (err < 0) 1888 { 1889 RESTORE_STACK; 1890 return OPUS_INTERNAL_ERROR; 1891 } 1892 celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng)); 1893 } 1894 1895 1896 1897 /* Signalling the mode in the first byte */ 1898 data--; 1899 data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); 1900 1901 st->rangeFinal = enc.rng ^ redundant_rng; 1902 1903 if (to_celt) 1904 st->prev_mode = MODE_CELT_ONLY; 1905 else 1906 st->prev_mode = st->mode; 1907 st->prev_channels = st->stream_channels; 1908 st->prev_framesize = frame_size; 1909 1910 st->first = 0; 1911 1912 /* In the unlikely case that the SILK encoder busted its target, tell 1913 the decoder to call the PLC */ 1914 if (ec_tell(&enc) > (max_data_bytes-1)*8) 1915 { 1916 if (max_data_bytes < 2) 1917 { 1918 RESTORE_STACK; 1919 return OPUS_BUFFER_TOO_SMALL; 1920 } 1921 data[1] = 0; 1922 ret = 1; 1923 st->rangeFinal = 0; 1924 } else if (st->mode==MODE_SILK_ONLY&&!redundancy) 1925 { 1926 /*When in LPC only mode it's perfectly 1927 reasonable to strip off trailing zero bytes as 1928 the required range decoder behavior is to 1929 fill these in. This can't be done when the MDCT 1930 modes are used because the decoder needs to know 1931 the actual length for allocation purposes.*/ 1932 while(ret>2&&data[ret]==0)ret--; 1933 } 1934 /* Count ToC and redundancy */ 1935 ret += 1+redundancy_bytes; 1936 if (!st->use_vbr && ret >= 3) 1937 { 1938 if (pad_frame(data, ret, max_data_bytes)) 1939 { 1940 RESTORE_STACK; 1941 return OPUS_INTERNAL_ERROR; 1942 } 1943 ret = max_data_bytes; 1944 } 1945 RESTORE_STACK; 1946 return ret; 1947 } 1948 1949 #ifdef FIXED_POINT 1950 1951 #ifndef DISABLE_FLOAT_API 1952 opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, 1953 unsigned char *data, opus_int32 max_data_bytes) 1954 { 1955 int i, ret; 1956 int frame_size; 1957 int delay_compensation; 1958 VARDECL(opus_int16, in); 1959 ALLOC_STACK; 1960 1961 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) 1962 delay_compensation = 0; 1963 else 1964 delay_compensation = st->delay_compensation; 1965 frame_size = compute_frame_size(pcm, analysis_frame_size, 1966 st->variable_duration, st->channels, st->Fs, st->bitrate_bps, 1967 delay_compensation, downmix_float, st->analysis.subframe_mem); 1968 1969 ALLOC(in, frame_size*st->channels, opus_int16); 1970 1971 for (i=0;i<frame_size*st->channels;i++) 1972 in[i] = FLOAT2INT16(pcm[i]); 1973 ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); 1974 RESTORE_STACK; 1975 return ret; 1976 } 1977 #endif 1978 1979 opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, 1980 unsigned char *data, opus_int32 out_data_bytes) 1981 { 1982 int frame_size; 1983 int delay_compensation; 1984 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) 1985 delay_compensation = 0; 1986 else 1987 delay_compensation = st->delay_compensation; 1988 frame_size = compute_frame_size(pcm, analysis_frame_size, 1989 st->variable_duration, st->channels, st->Fs, st->bitrate_bps, 1990 delay_compensation, downmix_float, st->analysis.subframe_mem); 1991 return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); 1992 } 1993 1994 #else 1995 opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, 1996 unsigned char *data, opus_int32 max_data_bytes) 1997 { 1998 int i, ret; 1999 int frame_size; 2000 int delay_compensation; 2001 VARDECL(float, in); 2002 ALLOC_STACK; 2003 2004 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) 2005 delay_compensation = 0; 2006 else 2007 delay_compensation = st->delay_compensation; 2008 frame_size = compute_frame_size(pcm, analysis_frame_size, 2009 st->variable_duration, st->channels, st->Fs, st->bitrate_bps, 2010 delay_compensation, downmix_float, st->analysis.subframe_mem); 2011 2012 ALLOC(in, frame_size*st->channels, float); 2013 2014 for (i=0;i<frame_size*st->channels;i++) 2015 in[i] = (1.0f/32768)*pcm[i]; 2016 ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); 2017 RESTORE_STACK; 2018 return ret; 2019 } 2020 opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, 2021 unsigned char *data, opus_int32 out_data_bytes) 2022 { 2023 int frame_size; 2024 int delay_compensation; 2025 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) 2026 delay_compensation = 0; 2027 else 2028 delay_compensation = st->delay_compensation; 2029 frame_size = compute_frame_size(pcm, analysis_frame_size, 2030 st->variable_duration, st->channels, st->Fs, st->bitrate_bps, 2031 delay_compensation, downmix_float, st->analysis.subframe_mem); 2032 return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, 2033 pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); 2034 } 2035 #endif 2036 2037 2038 int opus_encoder_ctl(OpusEncoder *st, int request, ...) 2039 { 2040 int ret; 2041 CELTEncoder *celt_enc; 2042 va_list ap; 2043 2044 ret = OPUS_OK; 2045 va_start(ap, request); 2046 2047 celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); 2048 2049 switch (request) 2050 { 2051 case OPUS_SET_APPLICATION_REQUEST: 2052 { 2053 opus_int32 value = va_arg(ap, opus_int32); 2054 if ( (value != OPUS_APPLICATION_VOIP && value != OPUS_APPLICATION_AUDIO 2055 && value != OPUS_APPLICATION_RESTRICTED_LOWDELAY) 2056 || (!st->first && st->application != value)) 2057 { 2058 ret = OPUS_BAD_ARG; 2059 break; 2060 } 2061 st->application = value; 2062 } 2063 break; 2064 case OPUS_GET_APPLICATION_REQUEST: 2065 { 2066 opus_int32 *value = va_arg(ap, opus_int32*); 2067 if (!value) 2068 { 2069 goto bad_arg; 2070 } 2071 *value = st->application; 2072 } 2073 break; 2074 case OPUS_SET_BITRATE_REQUEST: 2075 { 2076 opus_int32 value = va_arg(ap, opus_int32); 2077 if (value != OPUS_AUTO && value != OPUS_BITRATE_MAX) 2078 { 2079 if (value <= 0) 2080 goto bad_arg; 2081 else if (value <= 500) 2082 value = 500; 2083 else if (value > (opus_int32)300000*st->channels) 2084 value = (opus_int32)300000*st->channels; 2085 } 2086 st->user_bitrate_bps = value; 2087 } 2088 break; 2089 case OPUS_GET_BITRATE_REQUEST: 2090 { 2091 opus_int32 *value = va_arg(ap, opus_int32*); 2092 if (!value) 2093 { 2094 goto bad_arg; 2095 } 2096 *value = user_bitrate_to_bitrate(st, st->prev_framesize, 1276); 2097 } 2098 break; 2099 case OPUS_SET_FORCE_CHANNELS_REQUEST: 2100 { 2101 opus_int32 value = va_arg(ap, opus_int32); 2102 if((value<1 || value>st->channels) && value != OPUS_AUTO) 2103 { 2104 goto bad_arg; 2105 } 2106 st->force_channels = value; 2107 } 2108 break; 2109 case OPUS_GET_FORCE_CHANNELS_REQUEST: 2110 { 2111 opus_int32 *value = va_arg(ap, opus_int32*); 2112 if (!value) 2113 { 2114 goto bad_arg; 2115 } 2116 *value = st->force_channels; 2117 } 2118 break; 2119 case OPUS_SET_MAX_BANDWIDTH_REQUEST: 2120 { 2121 opus_int32 value = va_arg(ap, opus_int32); 2122 if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) 2123 { 2124 goto bad_arg; 2125 } 2126 st->max_bandwidth = value; 2127 if (st->max_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { 2128 st->silk_mode.maxInternalSampleRate = 8000; 2129 } else if (st->max_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { 2130 st->silk_mode.maxInternalSampleRate = 12000; 2131 } else { 2132 st->silk_mode.maxInternalSampleRate = 16000; 2133 } 2134 } 2135 break; 2136 case OPUS_GET_MAX_BANDWIDTH_REQUEST: 2137 { 2138 opus_int32 *value = va_arg(ap, opus_int32*); 2139 if (!value) 2140 { 2141 goto bad_arg; 2142 } 2143 *value = st->max_bandwidth; 2144 } 2145 break; 2146 case OPUS_SET_BANDWIDTH_REQUEST: 2147 { 2148 opus_int32 value = va_arg(ap, opus_int32); 2149 if ((value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) && value != OPUS_AUTO) 2150 { 2151 goto bad_arg; 2152 } 2153 st->user_bandwidth = value; 2154 if (st->user_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { 2155 st->silk_mode.maxInternalSampleRate = 8000; 2156 } else if (st->user_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { 2157 st->silk_mode.maxInternalSampleRate = 12000; 2158 } else { 2159 st->silk_mode.maxInternalSampleRate = 16000; 2160 } 2161 } 2162 break; 2163 case OPUS_GET_BANDWIDTH_REQUEST: 2164 { 2165 opus_int32 *value = va_arg(ap, opus_int32*); 2166 if (!value) 2167 { 2168 goto bad_arg; 2169 } 2170 *value = st->bandwidth; 2171 } 2172 break; 2173 case OPUS_SET_DTX_REQUEST: 2174 { 2175 opus_int32 value = va_arg(ap, opus_int32); 2176 if(value<0 || value>1) 2177 { 2178 goto bad_arg; 2179 } 2180 st->silk_mode.useDTX = value; 2181 } 2182 break; 2183 case OPUS_GET_DTX_REQUEST: 2184 { 2185 opus_int32 *value = va_arg(ap, opus_int32*); 2186 if (!value) 2187 { 2188 goto bad_arg; 2189 } 2190 *value = st->silk_mode.useDTX; 2191 } 2192 break; 2193 case OPUS_SET_COMPLEXITY_REQUEST: 2194 { 2195 opus_int32 value = va_arg(ap, opus_int32); 2196 if(value<0 || value>10) 2197 { 2198 goto bad_arg; 2199 } 2200 st->silk_mode.complexity = value; 2201 celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(value)); 2202 } 2203 break; 2204 case OPUS_GET_COMPLEXITY_REQUEST: 2205 { 2206 opus_int32 *value = va_arg(ap, opus_int32*); 2207 if (!value) 2208 { 2209 goto bad_arg; 2210 } 2211 *value = st->silk_mode.complexity; 2212 } 2213 break; 2214 case OPUS_SET_INBAND_FEC_REQUEST: 2215 { 2216 opus_int32 value = va_arg(ap, opus_int32); 2217 if(value<0 || value>1) 2218 { 2219 goto bad_arg; 2220 } 2221 st->silk_mode.useInBandFEC = value; 2222 } 2223 break; 2224 case OPUS_GET_INBAND_FEC_REQUEST: 2225 { 2226 opus_int32 *value = va_arg(ap, opus_int32*); 2227 if (!value) 2228 { 2229 goto bad_arg; 2230 } 2231 *value = st->silk_mode.useInBandFEC; 2232 } 2233 break; 2234 case OPUS_SET_PACKET_LOSS_PERC_REQUEST: 2235 { 2236 opus_int32 value = va_arg(ap, opus_int32); 2237 if (value < 0 || value > 100) 2238 { 2239 goto bad_arg; 2240 } 2241 st->silk_mode.packetLossPercentage = value; 2242 celt_encoder_ctl(celt_enc, OPUS_SET_PACKET_LOSS_PERC(value)); 2243 } 2244 break; 2245 case OPUS_GET_PACKET_LOSS_PERC_REQUEST: 2246 { 2247 opus_int32 *value = va_arg(ap, opus_int32*); 2248 if (!value) 2249 { 2250 goto bad_arg; 2251 } 2252 *value = st->silk_mode.packetLossPercentage; 2253 } 2254 break; 2255 case OPUS_SET_VBR_REQUEST: 2256 { 2257 opus_int32 value = va_arg(ap, opus_int32); 2258 if(value<0 || value>1) 2259 { 2260 goto bad_arg; 2261 } 2262 st->use_vbr = value; 2263 st->silk_mode.useCBR = 1-value; 2264 } 2265 break; 2266 case OPUS_GET_VBR_REQUEST: 2267 { 2268 opus_int32 *value = va_arg(ap, opus_int32*); 2269 if (!value) 2270 { 2271 goto bad_arg; 2272 } 2273 *value = st->use_vbr; 2274 } 2275 break; 2276 case OPUS_SET_VOICE_RATIO_REQUEST: 2277 { 2278 opus_int32 value = va_arg(ap, opus_int32); 2279 if (value<-1 || value>100) 2280 { 2281 goto bad_arg; 2282 } 2283 st->voice_ratio = value; 2284 } 2285 break; 2286 case OPUS_GET_VOICE_RATIO_REQUEST: 2287 { 2288 opus_int32 *value = va_arg(ap, opus_int32*); 2289 if (!value) 2290 { 2291 goto bad_arg; 2292 } 2293 *value = st->voice_ratio; 2294 } 2295 break; 2296 case OPUS_SET_VBR_CONSTRAINT_REQUEST: 2297 { 2298 opus_int32 value = va_arg(ap, opus_int32); 2299 if(value<0 || value>1) 2300 { 2301 goto bad_arg; 2302 } 2303 st->vbr_constraint = value; 2304 } 2305 break; 2306 case OPUS_GET_VBR_CONSTRAINT_REQUEST: 2307 { 2308 opus_int32 *value = va_arg(ap, opus_int32*); 2309 if (!value) 2310 { 2311 goto bad_arg; 2312 } 2313 *value = st->vbr_constraint; 2314 } 2315 break; 2316 case OPUS_SET_SIGNAL_REQUEST: 2317 { 2318 opus_int32 value = va_arg(ap, opus_int32); 2319 if(value!=OPUS_AUTO && value!=OPUS_SIGNAL_VOICE && value!=OPUS_SIGNAL_MUSIC) 2320 { 2321 goto bad_arg; 2322 } 2323 st->signal_type = value; 2324 } 2325 break; 2326 case OPUS_GET_SIGNAL_REQUEST: 2327 { 2328 opus_int32 *value = va_arg(ap, opus_int32*); 2329 if (!value) 2330 { 2331 goto bad_arg; 2332 } 2333 *value = st->signal_type; 2334 } 2335 break; 2336 case OPUS_GET_LOOKAHEAD_REQUEST: 2337 { 2338 opus_int32 *value = va_arg(ap, opus_int32*); 2339 if (!value) 2340 { 2341 goto bad_arg; 2342 } 2343 *value = st->Fs/400; 2344 if (st->application != OPUS_APPLICATION_RESTRICTED_LOWDELAY) 2345 *value += st->delay_compensation; 2346 } 2347 break; 2348 case OPUS_GET_SAMPLE_RATE_REQUEST: 2349 { 2350 opus_int32 *value = va_arg(ap, opus_int32*); 2351 if (!value) 2352 { 2353 goto bad_arg; 2354 } 2355 *value = st->Fs; 2356 } 2357 break; 2358 case OPUS_GET_FINAL_RANGE_REQUEST: 2359 { 2360 opus_uint32 *value = va_arg(ap, opus_uint32*); 2361 if (!value) 2362 { 2363 goto bad_arg; 2364 } 2365 *value = st->rangeFinal; 2366 } 2367 break; 2368 case OPUS_SET_LSB_DEPTH_REQUEST: 2369 { 2370 opus_int32 value = va_arg(ap, opus_int32); 2371 if (value<8 || value>24) 2372 { 2373 goto bad_arg; 2374 } 2375 st->lsb_depth=value; 2376 } 2377 break; 2378 case OPUS_GET_LSB_DEPTH_REQUEST: 2379 { 2380 opus_int32 *value = va_arg(ap, opus_int32*); 2381 if (!value) 2382 { 2383 goto bad_arg; 2384 } 2385 *value = st->lsb_depth; 2386 } 2387 break; 2388 case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: 2389 { 2390 opus_int32 value = va_arg(ap, opus_int32); 2391 if (value != OPUS_FRAMESIZE_ARG && value != OPUS_FRAMESIZE_2_5_MS && 2392 value != OPUS_FRAMESIZE_5_MS && value != OPUS_FRAMESIZE_10_MS && 2393 value != OPUS_FRAMESIZE_20_MS && value != OPUS_FRAMESIZE_40_MS && 2394 value != OPUS_FRAMESIZE_60_MS && value != OPUS_FRAMESIZE_VARIABLE) 2395 { 2396 goto bad_arg; 2397 } 2398 st->variable_duration = value; 2399 celt_encoder_ctl(celt_enc, OPUS_SET_EXPERT_FRAME_DURATION(value)); 2400 } 2401 break; 2402 case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST: 2403 { 2404 opus_int32 *value = va_arg(ap, opus_int32*); 2405 if (!value) 2406 { 2407 goto bad_arg; 2408 } 2409 *value = st->variable_duration; 2410 } 2411 break; 2412 case OPUS_RESET_STATE: 2413 { 2414 void *silk_enc; 2415 silk_EncControlStruct dummy; 2416 silk_enc = (char*)st+st->silk_enc_offset; 2417 2418 OPUS_CLEAR((char*)&st->OPUS_ENCODER_RESET_START, 2419 sizeof(OpusEncoder)- 2420 ((char*)&st->OPUS_ENCODER_RESET_START - (char*)st)); 2421 2422 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); 2423 silk_InitEncoder( silk_enc, &dummy ); 2424 st->stream_channels = st->channels; 2425 st->hybrid_stereo_width_Q14 = 1 << 14; 2426 st->prev_HB_gain = Q15ONE; 2427 st->first = 1; 2428 st->mode = MODE_HYBRID; 2429 st->bandwidth = OPUS_BANDWIDTH_FULLBAND; 2430 st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); 2431 } 2432 break; 2433 case OPUS_SET_FORCE_MODE_REQUEST: 2434 { 2435 opus_int32 value = va_arg(ap, opus_int32); 2436 if ((value < MODE_SILK_ONLY || value > MODE_CELT_ONLY) && value != OPUS_AUTO) 2437 { 2438 goto bad_arg; 2439 } 2440 st->user_forced_mode = value; 2441 } 2442 break; 2443 case OPUS_SET_LFE_REQUEST: 2444 { 2445 opus_int32 value = va_arg(ap, opus_int32); 2446 st->lfe = value; 2447 ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value)); 2448 } 2449 break; 2450 case OPUS_SET_ENERGY_MASK_REQUEST: 2451 { 2452 opus_val16 *value = va_arg(ap, opus_val16*); 2453 st->energy_masking = value; 2454 ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value)); 2455 } 2456 break; 2457 2458 case CELT_GET_MODE_REQUEST: 2459 { 2460 const CELTMode ** value = va_arg(ap, const CELTMode**); 2461 if (!value) 2462 { 2463 goto bad_arg; 2464 } 2465 ret = celt_encoder_ctl(celt_enc, CELT_GET_MODE(value)); 2466 } 2467 break; 2468 default: 2469 /* fprintf(stderr, "unknown opus_encoder_ctl() request: %d", request);*/ 2470 ret = OPUS_UNIMPLEMENTED; 2471 break; 2472 } 2473 va_end(ap); 2474 return ret; 2475 bad_arg: 2476 va_end(ap); 2477 return OPUS_BAD_ARG; 2478 } 2479 2480 void opus_encoder_destroy(OpusEncoder *st) 2481 { 2482 opus_free(st); 2483 } 2484