1 /* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited 2 Written by Jean-Marc Valin and Koen Vos */ 3 /* 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 8 - Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 11 - Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 32 #include <stdarg.h> 33 #include "celt.h" 34 #include "entenc.h" 35 #include "modes.h" 36 #include "API.h" 37 #include "stack_alloc.h" 38 #include "float_cast.h" 39 #include "opus.h" 40 #include "arch.h" 41 #include "pitch.h" 42 #include "opus_private.h" 43 #include "os_support.h" 44 #include "cpu_support.h" 45 #include "analysis.h" 46 #include "mathops.h" 47 #include "tuning_parameters.h" 48 #ifdef FIXED_POINT 49 #include "fixed/structs_FIX.h" 50 #else 51 #include "float/structs_FLP.h" 52 #endif 53 54 #define MAX_ENCODER_BUFFER 480 55 56 #ifndef DISABLE_FLOAT_API 57 #define PSEUDO_SNR_THRESHOLD 316.23f /* 10^(25/10) */ 58 #endif 59 60 typedef struct { 61 opus_val32 XX, XY, YY; 62 opus_val16 smoothed_width; 63 opus_val16 max_follower; 64 } StereoWidthState; 65 66 struct OpusEncoder { 67 int celt_enc_offset; 68 int silk_enc_offset; 69 silk_EncControlStruct silk_mode; 70 int application; 71 int channels; 72 int delay_compensation; 73 int force_channels; 74 int signal_type; 75 int user_bandwidth; 76 int max_bandwidth; 77 int user_forced_mode; 78 int voice_ratio; 79 opus_int32 Fs; 80 int use_vbr; 81 int vbr_constraint; 82 int variable_duration; 83 opus_int32 bitrate_bps; 84 opus_int32 user_bitrate_bps; 85 int lsb_depth; 86 int encoder_buffer; 87 int lfe; 88 int arch; 89 int use_dtx; /* general DTX for both SILK and CELT */ 90 #ifndef DISABLE_FLOAT_API 91 TonalityAnalysisState analysis; 92 #endif 93 94 #define OPUS_ENCODER_RESET_START stream_channels 95 int stream_channels; 96 opus_int16 hybrid_stereo_width_Q14; 97 opus_int32 variable_HP_smth2_Q15; 98 opus_val16 prev_HB_gain; 99 opus_val32 hp_mem[4]; 100 int mode; 101 int prev_mode; 102 int prev_channels; 103 int prev_framesize; 104 int bandwidth; 105 /* Bandwidth determined automatically from the rate (before any other adjustment) */ 106 int auto_bandwidth; 107 int silk_bw_switch; 108 /* Sampling rate (at the API level) */ 109 int first; 110 opus_val16 * energy_masking; 111 StereoWidthState width_mem; 112 opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; 113 #ifndef DISABLE_FLOAT_API 114 int detected_bandwidth; 115 int nb_no_activity_frames; 116 opus_val32 peak_signal_energy; 117 #endif 118 int nonfinal_frame; /* current frame is not the final in a packet */ 119 opus_uint32 rangeFinal; 120 }; 121 122 /* Transition tables for the voice and music. First column is the 123 middle (memoriless) threshold. The second column is the hysteresis 124 (difference with the middle) */ 125 static const opus_int32 mono_voice_bandwidth_thresholds[8] = { 126 10000, 1000, /* NB<->MB */ 127 11000, 1000, /* MB<->WB */ 128 13500, 1000, /* WB<->SWB */ 129 14000, 2000, /* SWB<->FB */ 130 }; 131 static const opus_int32 mono_music_bandwidth_thresholds[8] = { 132 10000, 1000, /* NB<->MB */ 133 11000, 1000, /* MB<->WB */ 134 13500, 1000, /* WB<->SWB */ 135 14000, 2000, /* SWB<->FB */ 136 }; 137 static const opus_int32 stereo_voice_bandwidth_thresholds[8] = { 138 10000, 1000, /* NB<->MB */ 139 11000, 1000, /* MB<->WB */ 140 13500, 1000, /* WB<->SWB */ 141 14000, 2000, /* SWB<->FB */ 142 }; 143 static const opus_int32 stereo_music_bandwidth_thresholds[8] = { 144 10000, 1000, /* NB<->MB */ 145 11000, 1000, /* MB<->WB */ 146 13500, 1000, /* WB<->SWB */ 147 14000, 2000, /* SWB<->FB */ 148 }; 149 /* Threshold bit-rates for switching between mono and stereo */ 150 static const opus_int32 stereo_voice_threshold = 24000; 151 static const opus_int32 stereo_music_threshold = 24000; 152 153 /* Threshold bit-rate for switching between SILK/hybrid and CELT-only */ 154 static const opus_int32 mode_thresholds[2][2] = { 155 /* voice */ /* music */ 156 { 64000, 16000}, /* mono */ 157 { 36000, 16000}, /* stereo */ 158 }; 159 160 static const opus_int32 fec_thresholds[] = { 161 12000, 1000, /* NB */ 162 14000, 1000, /* MB */ 163 16000, 1000, /* WB */ 164 20000, 1000, /* SWB */ 165 22000, 1000, /* FB */ 166 }; 167 168 int opus_encoder_get_size(int channels) 169 { 170 int silkEncSizeBytes, celtEncSizeBytes; 171 int ret; 172 if (channels<1 || channels > 2) 173 return 0; 174 ret = silk_Get_Encoder_Size( &silkEncSizeBytes ); 175 if (ret) 176 return 0; 177 silkEncSizeBytes = align(silkEncSizeBytes); 178 celtEncSizeBytes = celt_encoder_get_size(channels); 179 return align(sizeof(OpusEncoder))+silkEncSizeBytes+celtEncSizeBytes; 180 } 181 182 int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int application) 183 { 184 void *silk_enc; 185 CELTEncoder *celt_enc; 186 int err; 187 int ret, silkEncSizeBytes; 188 189 if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)|| 190 (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO 191 && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)) 192 return OPUS_BAD_ARG; 193 194 OPUS_CLEAR((char*)st, opus_encoder_get_size(channels)); 195 /* Create SILK encoder */ 196 ret = silk_Get_Encoder_Size( &silkEncSizeBytes ); 197 if (ret) 198 return OPUS_BAD_ARG; 199 silkEncSizeBytes = align(silkEncSizeBytes); 200 st->silk_enc_offset = align(sizeof(OpusEncoder)); 201 st->celt_enc_offset = st->silk_enc_offset+silkEncSizeBytes; 202 silk_enc = (char*)st+st->silk_enc_offset; 203 celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); 204 205 st->stream_channels = st->channels = channels; 206 207 st->Fs = Fs; 208 209 st->arch = opus_select_arch(); 210 211 ret = silk_InitEncoder( silk_enc, st->arch, &st->silk_mode ); 212 if(ret)return OPUS_INTERNAL_ERROR; 213 214 /* default SILK parameters */ 215 st->silk_mode.nChannelsAPI = channels; 216 st->silk_mode.nChannelsInternal = channels; 217 st->silk_mode.API_sampleRate = st->Fs; 218 st->silk_mode.maxInternalSampleRate = 16000; 219 st->silk_mode.minInternalSampleRate = 8000; 220 st->silk_mode.desiredInternalSampleRate = 16000; 221 st->silk_mode.payloadSize_ms = 20; 222 st->silk_mode.bitRate = 25000; 223 st->silk_mode.packetLossPercentage = 0; 224 st->silk_mode.complexity = 9; 225 st->silk_mode.useInBandFEC = 0; 226 st->silk_mode.useDTX = 0; 227 st->silk_mode.useCBR = 0; 228 st->silk_mode.reducedDependency = 0; 229 230 /* Create CELT encoder */ 231 /* Initialize CELT encoder */ 232 err = celt_encoder_init(celt_enc, Fs, channels, st->arch); 233 if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR; 234 235 celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0)); 236 celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity)); 237 238 st->use_vbr = 1; 239 /* Makes constrained VBR the default (safer for real-time use) */ 240 st->vbr_constraint = 1; 241 st->user_bitrate_bps = OPUS_AUTO; 242 st->bitrate_bps = 3000+Fs*channels; 243 st->application = application; 244 st->signal_type = OPUS_AUTO; 245 st->user_bandwidth = OPUS_AUTO; 246 st->max_bandwidth = OPUS_BANDWIDTH_FULLBAND; 247 st->force_channels = OPUS_AUTO; 248 st->user_forced_mode = OPUS_AUTO; 249 st->voice_ratio = -1; 250 st->encoder_buffer = st->Fs/100; 251 st->lsb_depth = 24; 252 st->variable_duration = OPUS_FRAMESIZE_ARG; 253 254 /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead 255 + 1.5 ms for SILK resamplers and stereo prediction) */ 256 st->delay_compensation = st->Fs/250; 257 258 st->hybrid_stereo_width_Q14 = 1 << 14; 259 st->prev_HB_gain = Q15ONE; 260 st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); 261 st->first = 1; 262 st->mode = MODE_HYBRID; 263 st->bandwidth = OPUS_BANDWIDTH_FULLBAND; 264 265 #ifndef DISABLE_FLOAT_API 266 tonality_analysis_init(&st->analysis, st->Fs); 267 st->analysis.application = st->application; 268 #endif 269 270 return OPUS_OK; 271 } 272 273 static unsigned char gen_toc(int mode, int framerate, int bandwidth, int channels) 274 { 275 int period; 276 unsigned char toc; 277 period = 0; 278 while (framerate < 400) 279 { 280 framerate <<= 1; 281 period++; 282 } 283 if (mode == MODE_SILK_ONLY) 284 { 285 toc = (bandwidth-OPUS_BANDWIDTH_NARROWBAND)<<5; 286 toc |= (period-2)<<3; 287 } else if (mode == MODE_CELT_ONLY) 288 { 289 int tmp = bandwidth-OPUS_BANDWIDTH_MEDIUMBAND; 290 if (tmp < 0) 291 tmp = 0; 292 toc = 0x80; 293 toc |= tmp << 5; 294 toc |= period<<3; 295 } else /* Hybrid */ 296 { 297 toc = 0x60; 298 toc |= (bandwidth-OPUS_BANDWIDTH_SUPERWIDEBAND)<<4; 299 toc |= (period-2)<<3; 300 } 301 toc |= (channels==2)<<2; 302 return toc; 303 } 304 305 #ifndef FIXED_POINT 306 static void silk_biquad_float( 307 const opus_val16 *in, /* I: Input signal */ 308 const opus_int32 *B_Q28, /* I: MA coefficients [3] */ 309 const opus_int32 *A_Q28, /* I: AR coefficients [2] */ 310 opus_val32 *S, /* I/O: State vector [2] */ 311 opus_val16 *out, /* O: Output signal */ 312 const opus_int32 len, /* I: Signal length (must be even) */ 313 int stride 314 ) 315 { 316 /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */ 317 opus_int k; 318 opus_val32 vout; 319 opus_val32 inval; 320 opus_val32 A[2], B[3]; 321 322 A[0] = (opus_val32)(A_Q28[0] * (1.f/((opus_int32)1<<28))); 323 A[1] = (opus_val32)(A_Q28[1] * (1.f/((opus_int32)1<<28))); 324 B[0] = (opus_val32)(B_Q28[0] * (1.f/((opus_int32)1<<28))); 325 B[1] = (opus_val32)(B_Q28[1] * (1.f/((opus_int32)1<<28))); 326 B[2] = (opus_val32)(B_Q28[2] * (1.f/((opus_int32)1<<28))); 327 328 /* Negate A_Q28 values and split in two parts */ 329 330 for( k = 0; k < len; k++ ) { 331 /* S[ 0 ], S[ 1 ]: Q12 */ 332 inval = in[ k*stride ]; 333 vout = S[ 0 ] + B[0]*inval; 334 335 S[ 0 ] = S[1] - vout*A[0] + B[1]*inval; 336 337 S[ 1 ] = - vout*A[1] + B[2]*inval + VERY_SMALL; 338 339 /* Scale back to Q0 and saturate */ 340 out[ k*stride ] = vout; 341 } 342 } 343 #endif 344 345 static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs, int arch) 346 { 347 opus_int32 B_Q28[ 3 ], A_Q28[ 2 ]; 348 opus_int32 Fc_Q19, r_Q28, r_Q22; 349 (void)arch; 350 351 silk_assert( cutoff_Hz <= silk_int32_MAX / SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ) ); 352 Fc_Q19 = silk_DIV32_16( silk_SMULBB( SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ), cutoff_Hz ), Fs/1000 ); 353 silk_assert( Fc_Q19 > 0 && Fc_Q19 < 32768 ); 354 355 r_Q28 = SILK_FIX_CONST( 1.0, 28 ) - silk_MUL( SILK_FIX_CONST( 0.92, 9 ), Fc_Q19 ); 356 357 /* b = r * [ 1; -2; 1 ]; */ 358 /* a = [ 1; -2 * r * ( 1 - 0.5 * Fc^2 ); r^2 ]; */ 359 B_Q28[ 0 ] = r_Q28; 360 B_Q28[ 1 ] = silk_LSHIFT( -r_Q28, 1 ); 361 B_Q28[ 2 ] = r_Q28; 362 363 /* -r * ( 2 - Fc * Fc ); */ 364 r_Q22 = silk_RSHIFT( r_Q28, 6 ); 365 A_Q28[ 0 ] = silk_SMULWW( r_Q22, silk_SMULWW( Fc_Q19, Fc_Q19 ) - SILK_FIX_CONST( 2.0, 22 ) ); 366 A_Q28[ 1 ] = silk_SMULWW( r_Q22, r_Q22 ); 367 368 #ifdef FIXED_POINT 369 if( channels == 1 ) { 370 silk_biquad_alt_stride1( in, B_Q28, A_Q28, hp_mem, out, len ); 371 } else { 372 silk_biquad_alt_stride2( in, B_Q28, A_Q28, hp_mem, out, len, arch ); 373 } 374 #else 375 silk_biquad_float( in, B_Q28, A_Q28, hp_mem, out, len, channels ); 376 if( channels == 2 ) { 377 silk_biquad_float( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels ); 378 } 379 #endif 380 } 381 382 #ifdef FIXED_POINT 383 static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) 384 { 385 int c, i; 386 int shift; 387 388 /* Approximates -round(log2(4.*cutoff_Hz/Fs)) */ 389 shift=celt_ilog2(Fs/(cutoff_Hz*3)); 390 for (c=0;c<channels;c++) 391 { 392 for (i=0;i<len;i++) 393 { 394 opus_val32 x, tmp, y; 395 x = SHL32(EXTEND32(in[channels*i+c]), 14); 396 /* First stage */ 397 tmp = x-hp_mem[2*c]; 398 hp_mem[2*c] = hp_mem[2*c] + PSHR32(x - hp_mem[2*c], shift); 399 /* Second stage */ 400 y = tmp - hp_mem[2*c+1]; 401 hp_mem[2*c+1] = hp_mem[2*c+1] + PSHR32(tmp - hp_mem[2*c+1], shift); 402 out[channels*i+c] = EXTRACT16(SATURATE(PSHR32(y, 14), 32767)); 403 } 404 } 405 } 406 407 #else 408 static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) 409 { 410 int i; 411 float coef, coef2; 412 coef = 4.0f*cutoff_Hz/Fs; 413 coef2 = 1-coef; 414 if (channels==2) 415 { 416 float m0, m1, m2, m3; 417 m0 = hp_mem[0]; 418 m1 = hp_mem[1]; 419 m2 = hp_mem[2]; 420 m3 = hp_mem[3]; 421 for (i=0;i<len;i++) 422 { 423 opus_val32 x0, x1, tmp0, tmp1, out0, out1; 424 x0 = in[2*i+0]; 425 x1 = in[2*i+1]; 426 /* First stage */ 427 tmp0 = x0-m0; 428 tmp1 = x1-m2; 429 m0 = coef*x0 + VERY_SMALL + coef2*m0; 430 m2 = coef*x1 + VERY_SMALL + coef2*m2; 431 /* Second stage */ 432 out0 = tmp0 - m1; 433 out1 = tmp1 - m3; 434 m1 = coef*tmp0 + VERY_SMALL + coef2*m1; 435 m3 = coef*tmp1 + VERY_SMALL + coef2*m3; 436 out[2*i+0] = out0; 437 out[2*i+1] = out1; 438 } 439 hp_mem[0] = m0; 440 hp_mem[1] = m1; 441 hp_mem[2] = m2; 442 hp_mem[3] = m3; 443 } else { 444 float m0, m1; 445 m0 = hp_mem[0]; 446 m1 = hp_mem[1]; 447 for (i=0;i<len;i++) 448 { 449 opus_val32 x, tmp, y; 450 x = in[i]; 451 /* First stage */ 452 tmp = x-m0; 453 m0 = coef*x + VERY_SMALL + coef2*m0; 454 /* Second stage */ 455 y = tmp - m1; 456 m1 = coef*tmp + VERY_SMALL + coef2*m1; 457 out[i] = y; 458 } 459 hp_mem[0] = m0; 460 hp_mem[1] = m1; 461 } 462 } 463 #endif 464 465 static void stereo_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2, 466 int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs) 467 { 468 int i; 469 int overlap; 470 int inc; 471 inc = 48000/Fs; 472 overlap=overlap48/inc; 473 g1 = Q15ONE-g1; 474 g2 = Q15ONE-g2; 475 for (i=0;i<overlap;i++) 476 { 477 opus_val32 diff; 478 opus_val16 g, w; 479 w = MULT16_16_Q15(window[i*inc], window[i*inc]); 480 g = SHR32(MAC16_16(MULT16_16(w,g2), 481 Q15ONE-w, g1), 15); 482 diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1])); 483 diff = MULT16_16_Q15(g, diff); 484 out[i*channels] = out[i*channels] - diff; 485 out[i*channels+1] = out[i*channels+1] + diff; 486 } 487 for (;i<frame_size;i++) 488 { 489 opus_val32 diff; 490 diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1])); 491 diff = MULT16_16_Q15(g2, diff); 492 out[i*channels] = out[i*channels] - diff; 493 out[i*channels+1] = out[i*channels+1] + diff; 494 } 495 } 496 497 static void gain_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2, 498 int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs) 499 { 500 int i; 501 int inc; 502 int overlap; 503 int c; 504 inc = 48000/Fs; 505 overlap=overlap48/inc; 506 if (channels==1) 507 { 508 for (i=0;i<overlap;i++) 509 { 510 opus_val16 g, w; 511 w = MULT16_16_Q15(window[i*inc], window[i*inc]); 512 g = SHR32(MAC16_16(MULT16_16(w,g2), 513 Q15ONE-w, g1), 15); 514 out[i] = MULT16_16_Q15(g, in[i]); 515 } 516 } else { 517 for (i=0;i<overlap;i++) 518 { 519 opus_val16 g, w; 520 w = MULT16_16_Q15(window[i*inc], window[i*inc]); 521 g = SHR32(MAC16_16(MULT16_16(w,g2), 522 Q15ONE-w, g1), 15); 523 out[i*2] = MULT16_16_Q15(g, in[i*2]); 524 out[i*2+1] = MULT16_16_Q15(g, in[i*2+1]); 525 } 526 } 527 c=0;do { 528 for (i=overlap;i<frame_size;i++) 529 { 530 out[i*channels+c] = MULT16_16_Q15(g2, in[i*channels+c]); 531 } 532 } 533 while (++c<channels); 534 } 535 536 OpusEncoder *opus_encoder_create(opus_int32 Fs, int channels, int application, int *error) 537 { 538 int ret; 539 OpusEncoder *st; 540 if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)|| 541 (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO 542 && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)) 543 { 544 if (error) 545 *error = OPUS_BAD_ARG; 546 return NULL; 547 } 548 st = (OpusEncoder *)opus_alloc(opus_encoder_get_size(channels)); 549 if (st == NULL) 550 { 551 if (error) 552 *error = OPUS_ALLOC_FAIL; 553 return NULL; 554 } 555 ret = opus_encoder_init(st, Fs, channels, application); 556 if (error) 557 *error = ret; 558 if (ret != OPUS_OK) 559 { 560 opus_free(st); 561 st = NULL; 562 } 563 return st; 564 } 565 566 static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int max_data_bytes) 567 { 568 if(!frame_size)frame_size=st->Fs/400; 569 if (st->user_bitrate_bps==OPUS_AUTO) 570 return 60*st->Fs/frame_size + st->Fs*st->channels; 571 else if (st->user_bitrate_bps==OPUS_BITRATE_MAX) 572 return max_data_bytes*8*st->Fs/frame_size; 573 else 574 return st->user_bitrate_bps; 575 } 576 577 #ifndef DISABLE_FLOAT_API 578 #ifdef FIXED_POINT 579 #define PCM2VAL(x) FLOAT2INT16(x) 580 #else 581 #define PCM2VAL(x) SCALEIN(x) 582 #endif 583 584 void downmix_float(const void *_x, opus_val32 *y, int subframe, int offset, int c1, int c2, int C) 585 { 586 const float *x; 587 int j; 588 589 x = (const float *)_x; 590 for (j=0;j<subframe;j++) 591 y[j] = PCM2VAL(x[(j+offset)*C+c1]); 592 if (c2>-1) 593 { 594 for (j=0;j<subframe;j++) 595 y[j] += PCM2VAL(x[(j+offset)*C+c2]); 596 } else if (c2==-2) 597 { 598 int c; 599 for (c=1;c<C;c++) 600 { 601 for (j=0;j<subframe;j++) 602 y[j] += PCM2VAL(x[(j+offset)*C+c]); 603 } 604 } 605 } 606 #endif 607 608 void downmix_int(const void *_x, opus_val32 *y, int subframe, int offset, int c1, int c2, int C) 609 { 610 const opus_int16 *x; 611 int j; 612 613 x = (const opus_int16 *)_x; 614 for (j=0;j<subframe;j++) 615 y[j] = x[(j+offset)*C+c1]; 616 if (c2>-1) 617 { 618 for (j=0;j<subframe;j++) 619 y[j] += x[(j+offset)*C+c2]; 620 } else if (c2==-2) 621 { 622 int c; 623 for (c=1;c<C;c++) 624 { 625 for (j=0;j<subframe;j++) 626 y[j] += x[(j+offset)*C+c]; 627 } 628 } 629 } 630 631 opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs) 632 { 633 int new_size; 634 if (frame_size<Fs/400) 635 return -1; 636 if (variable_duration == OPUS_FRAMESIZE_ARG) 637 new_size = frame_size; 638 else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_120_MS) 639 { 640 if (variable_duration <= OPUS_FRAMESIZE_40_MS) 641 new_size = (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS); 642 else 643 new_size = (variable_duration-OPUS_FRAMESIZE_2_5_MS-2)*Fs/50; 644 } 645 else 646 return -1; 647 if (new_size>frame_size) 648 return -1; 649 if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs && 650 50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs && 651 50*new_size!=4*Fs && 50*new_size!=5*Fs && 50*new_size!=6*Fs) 652 return -1; 653 return new_size; 654 } 655 656 opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem) 657 { 658 opus_val32 xx, xy, yy; 659 opus_val16 sqrt_xx, sqrt_yy; 660 opus_val16 qrrt_xx, qrrt_yy; 661 int frame_rate; 662 int i; 663 opus_val16 short_alpha; 664 665 frame_rate = Fs/frame_size; 666 short_alpha = Q15ONE - MULT16_16(25, Q15ONE)/IMAX(50,frame_rate); 667 xx=xy=yy=0; 668 /* Unroll by 4. The frame size is always a multiple of 4 *except* for 669 2.5 ms frames at 12 kHz. Since this setting is very rare (and very 670 stupid), we just discard the last two samples. */ 671 for (i=0;i<frame_size-3;i+=4) 672 { 673 opus_val32 pxx=0; 674 opus_val32 pxy=0; 675 opus_val32 pyy=0; 676 opus_val16 x, y; 677 x = pcm[2*i]; 678 y = pcm[2*i+1]; 679 pxx = SHR32(MULT16_16(x,x),2); 680 pxy = SHR32(MULT16_16(x,y),2); 681 pyy = SHR32(MULT16_16(y,y),2); 682 x = pcm[2*i+2]; 683 y = pcm[2*i+3]; 684 pxx += SHR32(MULT16_16(x,x),2); 685 pxy += SHR32(MULT16_16(x,y),2); 686 pyy += SHR32(MULT16_16(y,y),2); 687 x = pcm[2*i+4]; 688 y = pcm[2*i+5]; 689 pxx += SHR32(MULT16_16(x,x),2); 690 pxy += SHR32(MULT16_16(x,y),2); 691 pyy += SHR32(MULT16_16(y,y),2); 692 x = pcm[2*i+6]; 693 y = pcm[2*i+7]; 694 pxx += SHR32(MULT16_16(x,x),2); 695 pxy += SHR32(MULT16_16(x,y),2); 696 pyy += SHR32(MULT16_16(y,y),2); 697 698 xx += SHR32(pxx, 10); 699 xy += SHR32(pxy, 10); 700 yy += SHR32(pyy, 10); 701 } 702 mem->XX += MULT16_32_Q15(short_alpha, xx-mem->XX); 703 mem->XY += MULT16_32_Q15(short_alpha, xy-mem->XY); 704 mem->YY += MULT16_32_Q15(short_alpha, yy-mem->YY); 705 mem->XX = MAX32(0, mem->XX); 706 mem->XY = MAX32(0, mem->XY); 707 mem->YY = MAX32(0, mem->YY); 708 if (MAX32(mem->XX, mem->YY)>QCONST16(8e-4f, 18)) 709 { 710 opus_val16 corr; 711 opus_val16 ldiff; 712 opus_val16 width; 713 sqrt_xx = celt_sqrt(mem->XX); 714 sqrt_yy = celt_sqrt(mem->YY); 715 qrrt_xx = celt_sqrt(sqrt_xx); 716 qrrt_yy = celt_sqrt(sqrt_yy); 717 /* Inter-channel correlation */ 718 mem->XY = MIN32(mem->XY, sqrt_xx*sqrt_yy); 719 corr = SHR32(frac_div32(mem->XY,EPSILON+MULT16_16(sqrt_xx,sqrt_yy)),16); 720 /* Approximate loudness difference */ 721 ldiff = MULT16_16(Q15ONE, ABS16(qrrt_xx-qrrt_yy))/(EPSILON+qrrt_xx+qrrt_yy); 722 width = MULT16_16_Q15(celt_sqrt(QCONST32(1.f,30)-MULT16_16(corr,corr)), ldiff); 723 /* Smoothing over one second */ 724 mem->smoothed_width += (width-mem->smoothed_width)/frame_rate; 725 /* Peak follower */ 726 mem->max_follower = MAX16(mem->max_follower-QCONST16(.02f,15)/frame_rate, mem->smoothed_width); 727 } 728 /*printf("%f %f %f %f %f ", corr/(float)Q15ONE, ldiff/(float)Q15ONE, width/(float)Q15ONE, mem->smoothed_width/(float)Q15ONE, mem->max_follower/(float)Q15ONE);*/ 729 return EXTRACT16(MIN32(Q15ONE, MULT16_16(20, mem->max_follower))); 730 } 731 732 static int decide_fec(int useInBandFEC, int PacketLoss_perc, int last_fec, int mode, int *bandwidth, opus_int32 rate) 733 { 734 int orig_bandwidth; 735 if (!useInBandFEC || PacketLoss_perc == 0 || mode == MODE_CELT_ONLY) 736 return 0; 737 orig_bandwidth = *bandwidth; 738 for (;;) 739 { 740 opus_int32 hysteresis; 741 opus_int32 LBRR_rate_thres_bps; 742 /* Compute threshold for using FEC at the current bandwidth setting */ 743 LBRR_rate_thres_bps = fec_thresholds[2*(*bandwidth - OPUS_BANDWIDTH_NARROWBAND)]; 744 hysteresis = fec_thresholds[2*(*bandwidth - OPUS_BANDWIDTH_NARROWBAND) + 1]; 745 if (last_fec == 1) LBRR_rate_thres_bps -= hysteresis; 746 if (last_fec == 0) LBRR_rate_thres_bps += hysteresis; 747 LBRR_rate_thres_bps = silk_SMULWB( silk_MUL( LBRR_rate_thres_bps, 748 125 - silk_min( PacketLoss_perc, 25 ) ), SILK_FIX_CONST( 0.01, 16 ) ); 749 /* If loss <= 5%, we look at whether we have enough rate to enable FEC. 750 If loss > 5%, we decrease the bandwidth until we can enable FEC. */ 751 if (rate > LBRR_rate_thres_bps) 752 return 1; 753 else if (PacketLoss_perc <= 5) 754 return 0; 755 else if (*bandwidth > OPUS_BANDWIDTH_NARROWBAND) 756 (*bandwidth)--; 757 else 758 break; 759 } 760 /* Couldn't find any bandwidth to enable FEC, keep original bandwidth. */ 761 *bandwidth = orig_bandwidth; 762 return 0; 763 } 764 765 static int compute_silk_rate_for_hybrid(int rate, int bandwidth, int frame20ms, int vbr, int fec) { 766 int entry; 767 int i; 768 int N; 769 int silk_rate; 770 static int rate_table[][5] = { 771 /* |total| |-------- SILK------------| 772 |-- No FEC -| |--- FEC ---| 773 10ms 20ms 10ms 20ms */ 774 { 0, 0, 0, 0, 0}, 775 {12000, 10000, 10000, 11000, 11000}, 776 {16000, 13500, 13500, 15000, 15000}, 777 {20000, 16000, 16000, 18000, 18000}, 778 {24000, 18000, 18000, 21000, 21000}, 779 {32000, 22000, 22000, 28000, 28000}, 780 {64000, 38000, 38000, 50000, 50000} 781 }; 782 entry = 1 + frame20ms + 2*fec; 783 N = sizeof(rate_table)/sizeof(rate_table[0]); 784 for (i=1;i<N;i++) 785 { 786 if (rate_table[i][0] > rate) break; 787 } 788 if (i == N) 789 { 790 silk_rate = rate_table[i-1][entry]; 791 /* For now, just give 50% of the extra bits to SILK. */ 792 silk_rate += (rate-rate_table[i-1][0])/2; 793 } else { 794 opus_int32 lo, hi, x0, x1; 795 lo = rate_table[i-1][entry]; 796 hi = rate_table[i][entry]; 797 x0 = rate_table[i-1][0]; 798 x1 = rate_table[i][0]; 799 silk_rate = (lo*(x1-rate) + hi*(rate-x0))/(x1-x0); 800 } 801 if (!vbr) 802 { 803 /* Tiny boost to SILK for CBR. We should probably tune this better. */ 804 silk_rate += 100; 805 } 806 if (bandwidth==OPUS_BANDWIDTH_SUPERWIDEBAND) 807 silk_rate += 300; 808 return silk_rate; 809 } 810 811 /* Returns the equivalent bitrate corresponding to 20 ms frames, 812 complexity 10 VBR operation. */ 813 static opus_int32 compute_equiv_rate(opus_int32 bitrate, int channels, 814 int frame_rate, int vbr, int mode, int complexity, int loss) 815 { 816 opus_int32 equiv; 817 equiv = bitrate; 818 /* Take into account overhead from smaller frames. */ 819 equiv -= (40*channels+20)*(frame_rate - 50); 820 /* CBR is about a 8% penalty for both SILK and CELT. */ 821 if (!vbr) 822 equiv -= equiv/12; 823 /* Complexity makes about 10% difference (from 0 to 10) in general. */ 824 equiv = equiv * (90+complexity)/100; 825 if (mode == MODE_SILK_ONLY || mode == MODE_HYBRID) 826 { 827 /* SILK complexity 0-1 uses the non-delayed-decision NSQ, which 828 costs about 20%. */ 829 if (complexity<2) 830 equiv = equiv*4/5; 831 equiv -= equiv*loss/(6*loss + 10); 832 } else if (mode == MODE_CELT_ONLY) { 833 /* CELT complexity 0-4 doesn't have the pitch filter, which costs 834 about 10%. */ 835 if (complexity<5) 836 equiv = equiv*9/10; 837 } else { 838 /* Mode not known yet */ 839 /* Half the SILK loss*/ 840 equiv -= equiv*loss/(12*loss + 20); 841 } 842 return equiv; 843 } 844 845 #ifndef DISABLE_FLOAT_API 846 847 static int is_digital_silence(const opus_val16* pcm, int frame_size, int channels, int lsb_depth) 848 { 849 int silence = 0; 850 opus_val32 sample_max = 0; 851 #ifdef MLP_TRAINING 852 return 0; 853 #endif 854 sample_max = celt_maxabs16(pcm, frame_size*channels); 855 856 #ifdef FIXED_POINT 857 silence = (sample_max == 0); 858 (void)lsb_depth; 859 #else 860 silence = (sample_max <= (opus_val16) 1 / (1 << lsb_depth)); 861 #endif 862 863 return silence; 864 } 865 866 #ifdef FIXED_POINT 867 static opus_val32 compute_frame_energy(const opus_val16 *pcm, int frame_size, int channels, int arch) 868 { 869 int i; 870 opus_val32 sample_max; 871 int max_shift; 872 int shift; 873 opus_val32 energy = 0; 874 int len = frame_size*channels; 875 (void)arch; 876 /* Max amplitude in the signal */ 877 sample_max = celt_maxabs16(pcm, len); 878 879 /* Compute the right shift required in the MAC to avoid an overflow */ 880 max_shift = celt_ilog2(len); 881 shift = IMAX(0, (celt_ilog2(sample_max) << 1) + max_shift - 28); 882 883 /* Compute the energy */ 884 for (i=0; i<len; i++) 885 energy += SHR32(MULT16_16(pcm[i], pcm[i]), shift); 886 887 /* Normalize energy by the frame size and left-shift back to the original position */ 888 energy /= len; 889 energy = SHL32(energy, shift); 890 891 return energy; 892 } 893 #else 894 static opus_val32 compute_frame_energy(const opus_val16 *pcm, int frame_size, int channels, int arch) 895 { 896 int len = frame_size*channels; 897 return celt_inner_prod(pcm, pcm, len, arch)/len; 898 } 899 #endif 900 901 /* Decides if DTX should be turned on (=1) or off (=0) */ 902 static int decide_dtx_mode(float activity_probability, /* probability that current frame contains speech/music */ 903 int *nb_no_activity_frames, /* number of consecutive frames with no activity */ 904 opus_val32 peak_signal_energy, /* peak energy of desired signal detected so far */ 905 const opus_val16 *pcm, /* input pcm signal */ 906 int frame_size, /* frame size */ 907 int channels, 908 int is_silence, /* only digital silence detected in this frame */ 909 int arch 910 ) 911 { 912 int is_noise; 913 opus_val32 noise_energy; 914 int is_sufficiently_quiet; 915 916 if (!is_silence) 917 { 918 is_noise = activity_probability < DTX_ACTIVITY_THRESHOLD; 919 if (is_noise) 920 { 921 noise_energy = compute_frame_energy(pcm, frame_size, channels, arch); 922 is_sufficiently_quiet = peak_signal_energy >= (PSEUDO_SNR_THRESHOLD * noise_energy); 923 } 924 } 925 926 if (is_silence || (is_noise && is_sufficiently_quiet)) 927 { 928 /* The number of consecutive DTX frames should be within the allowed bounds */ 929 (*nb_no_activity_frames)++; 930 931 if (*nb_no_activity_frames > NB_SPEECH_FRAMES_BEFORE_DTX) 932 { 933 if (*nb_no_activity_frames <= (NB_SPEECH_FRAMES_BEFORE_DTX + MAX_CONSECUTIVE_DTX)) 934 /* Valid frame for DTX! */ 935 return 1; 936 else 937 (*nb_no_activity_frames) = NB_SPEECH_FRAMES_BEFORE_DTX; 938 } 939 } else 940 (*nb_no_activity_frames) = 0; 941 942 return 0; 943 } 944 945 #endif 946 947 static opus_int32 encode_multiframe_packet(OpusEncoder *st, 948 const opus_val16 *pcm, 949 int nb_frames, 950 int frame_size, 951 unsigned char *data, 952 opus_int32 out_data_bytes, 953 int to_celt, 954 int lsb_depth, 955 int float_api) 956 { 957 int i; 958 int ret = 0; 959 VARDECL(unsigned char, tmp_data); 960 int bak_mode, bak_bandwidth, bak_channels, bak_to_mono; 961 VARDECL(OpusRepacketizer, rp); 962 int max_header_bytes; 963 opus_int32 bytes_per_frame; 964 opus_int32 cbr_bytes; 965 opus_int32 repacketize_len; 966 int tmp_len; 967 ALLOC_STACK; 968 969 /* Worst cases: 970 * 2 frames: Code 2 with different compressed sizes 971 * >2 frames: Code 3 VBR */ 972 max_header_bytes = nb_frames == 2 ? 3 : (2+(nb_frames-1)*2); 973 974 if (st->use_vbr || st->user_bitrate_bps==OPUS_BITRATE_MAX) 975 repacketize_len = out_data_bytes; 976 else { 977 cbr_bytes = 3*st->bitrate_bps/(3*8*st->Fs/(frame_size*nb_frames)); 978 repacketize_len = IMIN(cbr_bytes, out_data_bytes); 979 } 980 bytes_per_frame = IMIN(1276, 1+(repacketize_len-max_header_bytes)/nb_frames); 981 982 ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char); 983 ALLOC(rp, 1, OpusRepacketizer); 984 opus_repacketizer_init(rp); 985 986 bak_mode = st->user_forced_mode; 987 bak_bandwidth = st->user_bandwidth; 988 bak_channels = st->force_channels; 989 990 st->user_forced_mode = st->mode; 991 st->user_bandwidth = st->bandwidth; 992 st->force_channels = st->stream_channels; 993 994 bak_to_mono = st->silk_mode.toMono; 995 if (bak_to_mono) 996 st->force_channels = 1; 997 else 998 st->prev_channels = st->stream_channels; 999 1000 for (i=0;i<nb_frames;i++) 1001 { 1002 st->silk_mode.toMono = 0; 1003 st->nonfinal_frame = i<(nb_frames-1); 1004 1005 /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */ 1006 if (to_celt && i==nb_frames-1) 1007 st->user_forced_mode = MODE_CELT_ONLY; 1008 1009 tmp_len = opus_encode_native(st, pcm+i*(st->channels*frame_size), frame_size, 1010 tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, NULL, 0, 0, 0, 0, 1011 NULL, float_api); 1012 1013 if (tmp_len<0) 1014 { 1015 RESTORE_STACK; 1016 return OPUS_INTERNAL_ERROR; 1017 } 1018 1019 ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len); 1020 1021 if (ret<0) 1022 { 1023 RESTORE_STACK; 1024 return OPUS_INTERNAL_ERROR; 1025 } 1026 } 1027 1028 ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr); 1029 1030 if (ret<0) 1031 { 1032 RESTORE_STACK; 1033 return OPUS_INTERNAL_ERROR; 1034 } 1035 1036 /* Discard configs that were forced locally for the purpose of repacketization */ 1037 st->user_forced_mode = bak_mode; 1038 st->user_bandwidth = bak_bandwidth; 1039 st->force_channels = bak_channels; 1040 st->silk_mode.toMono = bak_to_mono; 1041 1042 RESTORE_STACK; 1043 return ret; 1044 } 1045 1046 static int compute_redundancy_bytes(opus_int32 max_data_bytes, opus_int32 bitrate_bps, int frame_rate, int channels) 1047 { 1048 int redundancy_bytes_cap; 1049 int redundancy_bytes; 1050 opus_int32 redundancy_rate; 1051 int base_bits; 1052 opus_int32 available_bits; 1053 base_bits = (40*channels+20); 1054 1055 /* Equivalent rate for 5 ms frames. */ 1056 redundancy_rate = bitrate_bps + base_bits*(200 - frame_rate); 1057 /* For VBR, further increase the bitrate if we can afford it. It's pretty short 1058 and we'll avoid artefacts. */ 1059 redundancy_rate = 3*redundancy_rate/2; 1060 redundancy_bytes = redundancy_rate/1600; 1061 1062 /* Compute the max rate we can use given CBR or VBR with cap. */ 1063 available_bits = max_data_bytes*8 - 2*base_bits; 1064 redundancy_bytes_cap = (available_bits*240/(240+48000/frame_rate) + base_bits)/8; 1065 redundancy_bytes = IMIN(redundancy_bytes, redundancy_bytes_cap); 1066 /* It we can't get enough bits for redundancy to be worth it, rely on the decoder PLC. */ 1067 if (redundancy_bytes > 4 + 8*channels) 1068 redundancy_bytes = IMIN(257, redundancy_bytes); 1069 else 1070 redundancy_bytes = 0; 1071 return redundancy_bytes; 1072 } 1073 1074 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, 1075 unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, 1076 const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, 1077 int analysis_channels, downmix_func downmix, int float_api) 1078 { 1079 void *silk_enc; 1080 CELTEncoder *celt_enc; 1081 int i; 1082 int ret=0; 1083 opus_int32 nBytes; 1084 ec_enc enc; 1085 int bytes_target; 1086 int prefill=0; 1087 int start_band = 0; 1088 int redundancy = 0; 1089 int redundancy_bytes = 0; /* Number of bytes to use for redundancy frame */ 1090 int celt_to_silk = 0; 1091 VARDECL(opus_val16, pcm_buf); 1092 int nb_compr_bytes; 1093 int to_celt = 0; 1094 opus_uint32 redundant_rng = 0; 1095 int cutoff_Hz, hp_freq_smth1; 1096 int voice_est; /* Probability of voice in Q7 */ 1097 opus_int32 equiv_rate; 1098 int delay_compensation; 1099 int frame_rate; 1100 opus_int32 max_rate; /* Max bitrate we're allowed to use */ 1101 int curr_bandwidth; 1102 opus_val16 HB_gain; 1103 opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */ 1104 int total_buffer; 1105 opus_val16 stereo_width; 1106 const CELTMode *celt_mode; 1107 #ifndef DISABLE_FLOAT_API 1108 AnalysisInfo analysis_info; 1109 int analysis_read_pos_bak=-1; 1110 int analysis_read_subframe_bak=-1; 1111 int is_silence = 0; 1112 #endif 1113 VARDECL(opus_val16, tmp_prefill); 1114 1115 ALLOC_STACK; 1116 1117 max_data_bytes = IMIN(1276, out_data_bytes); 1118 1119 st->rangeFinal = 0; 1120 if (frame_size <= 0 || max_data_bytes <= 0) 1121 { 1122 RESTORE_STACK; 1123 return OPUS_BAD_ARG; 1124 } 1125 1126 /* Cannot encode 100 ms in 1 byte */ 1127 if (max_data_bytes==1 && st->Fs==(frame_size*10)) 1128 { 1129 RESTORE_STACK; 1130 return OPUS_BUFFER_TOO_SMALL; 1131 } 1132 1133 silk_enc = (char*)st+st->silk_enc_offset; 1134 celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); 1135 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) 1136 delay_compensation = 0; 1137 else 1138 delay_compensation = st->delay_compensation; 1139 1140 lsb_depth = IMIN(lsb_depth, st->lsb_depth); 1141 1142 celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); 1143 #ifndef DISABLE_FLOAT_API 1144 analysis_info.valid = 0; 1145 #ifdef FIXED_POINT 1146 if (st->silk_mode.complexity >= 10 && st->Fs>=16000) 1147 #else 1148 if (st->silk_mode.complexity >= 7 && st->Fs>=16000) 1149 #endif 1150 { 1151 if (is_digital_silence(pcm, frame_size, st->channels, lsb_depth)) 1152 { 1153 is_silence = 1; 1154 } else { 1155 analysis_read_pos_bak = st->analysis.read_pos; 1156 analysis_read_subframe_bak = st->analysis.read_subframe; 1157 run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, 1158 c1, c2, analysis_channels, st->Fs, 1159 lsb_depth, downmix, &analysis_info); 1160 } 1161 1162 /* Track the peak signal energy */ 1163 if (!is_silence && analysis_info.activity_probability > DTX_ACTIVITY_THRESHOLD) 1164 st->peak_signal_energy = MAX32(MULT16_32_Q15(QCONST16(0.999f, 15), st->peak_signal_energy), 1165 compute_frame_energy(pcm, frame_size, st->channels, st->arch)); 1166 } 1167 #else 1168 (void)analysis_pcm; 1169 (void)analysis_size; 1170 (void)c1; 1171 (void)c2; 1172 (void)analysis_channels; 1173 (void)downmix; 1174 #endif 1175 1176 #ifndef DISABLE_FLOAT_API 1177 /* Reset voice_ratio if this frame is not silent or if analysis is disabled. 1178 * Otherwise, preserve voice_ratio from the last non-silent frame */ 1179 if (!is_silence) 1180 st->voice_ratio = -1; 1181 1182 st->detected_bandwidth = 0; 1183 if (analysis_info.valid) 1184 { 1185 int analysis_bandwidth; 1186 if (st->signal_type == OPUS_AUTO) 1187 st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob)); 1188 1189 analysis_bandwidth = analysis_info.bandwidth; 1190 if (analysis_bandwidth<=12) 1191 st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND; 1192 else if (analysis_bandwidth<=14) 1193 st->detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; 1194 else if (analysis_bandwidth<=16) 1195 st->detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1196 else if (analysis_bandwidth<=18) 1197 st->detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; 1198 else 1199 st->detected_bandwidth = OPUS_BANDWIDTH_FULLBAND; 1200 } 1201 #else 1202 st->voice_ratio = -1; 1203 #endif 1204 1205 if (st->channels==2 && st->force_channels!=1) 1206 stereo_width = compute_stereo_width(pcm, frame_size, st->Fs, &st->width_mem); 1207 else 1208 stereo_width = 0; 1209 total_buffer = delay_compensation; 1210 st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes); 1211 1212 frame_rate = st->Fs/frame_size; 1213 if (!st->use_vbr) 1214 { 1215 int cbrBytes; 1216 /* Multiply by 12 to make sure the division is exact. */ 1217 int frame_rate12 = 12*st->Fs/frame_size; 1218 /* We need to make sure that "int" values always fit in 16 bits. */ 1219 cbrBytes = IMIN( (12*st->bitrate_bps/8 + frame_rate12/2)/frame_rate12, max_data_bytes); 1220 st->bitrate_bps = cbrBytes*(opus_int32)frame_rate12*8/12; 1221 /* Make sure we provide at least one byte to avoid failing. */ 1222 max_data_bytes = IMAX(1, cbrBytes); 1223 } 1224 if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8 1225 || (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400))) 1226 { 1227 /*If the space is too low to do something useful, emit 'PLC' frames.*/ 1228 int tocmode = st->mode; 1229 int bw = st->bandwidth == 0 ? OPUS_BANDWIDTH_NARROWBAND : st->bandwidth; 1230 int packet_code = 0; 1231 int num_multiframes = 0; 1232 1233 if (tocmode==0) 1234 tocmode = MODE_SILK_ONLY; 1235 if (frame_rate>100) 1236 tocmode = MODE_CELT_ONLY; 1237 /* 40 ms -> 2 x 20 ms if in CELT_ONLY or HYBRID mode */ 1238 if (frame_rate==25 && tocmode!=MODE_SILK_ONLY) 1239 { 1240 frame_rate = 50; 1241 packet_code = 1; 1242 } 1243 1244 /* >= 60 ms frames */ 1245 if (frame_rate<=16) 1246 { 1247 /* 1 x 60 ms, 2 x 40 ms, 2 x 60 ms */ 1248 if (out_data_bytes==1 || (tocmode==MODE_SILK_ONLY && frame_rate!=10)) 1249 { 1250 tocmode = MODE_SILK_ONLY; 1251 1252 packet_code = frame_rate <= 12; 1253 frame_rate = frame_rate == 12 ? 25 : 16; 1254 } 1255 else 1256 { 1257 num_multiframes = 50/frame_rate; 1258 frame_rate = 50; 1259 packet_code = 3; 1260 } 1261 } 1262 1263 if(tocmode==MODE_SILK_ONLY&&bw>OPUS_BANDWIDTH_WIDEBAND) 1264 bw=OPUS_BANDWIDTH_WIDEBAND; 1265 else if (tocmode==MODE_CELT_ONLY&&bw==OPUS_BANDWIDTH_MEDIUMBAND) 1266 bw=OPUS_BANDWIDTH_NARROWBAND; 1267 else if (tocmode==MODE_HYBRID&&bw<=OPUS_BANDWIDTH_SUPERWIDEBAND) 1268 bw=OPUS_BANDWIDTH_SUPERWIDEBAND; 1269 1270 data[0] = gen_toc(tocmode, frame_rate, bw, st->stream_channels); 1271 data[0] |= packet_code; 1272 1273 ret = packet_code <= 1 ? 1 : 2; 1274 1275 max_data_bytes = IMAX(max_data_bytes, ret); 1276 1277 if (packet_code==3) 1278 data[1] = num_multiframes; 1279 1280 if (!st->use_vbr) 1281 { 1282 ret = opus_packet_pad(data, ret, max_data_bytes); 1283 if (ret == OPUS_OK) 1284 ret = max_data_bytes; 1285 else 1286 ret = OPUS_INTERNAL_ERROR; 1287 } 1288 RESTORE_STACK; 1289 return ret; 1290 } 1291 max_rate = frame_rate*max_data_bytes*8; 1292 1293 /* Equivalent 20-ms rate for mode/channel/bandwidth decisions */ 1294 equiv_rate = compute_equiv_rate(st->bitrate_bps, st->channels, st->Fs/frame_size, 1295 st->use_vbr, 0, st->silk_mode.complexity, st->silk_mode.packetLossPercentage); 1296 1297 if (st->signal_type == OPUS_SIGNAL_VOICE) 1298 voice_est = 127; 1299 else if (st->signal_type == OPUS_SIGNAL_MUSIC) 1300 voice_est = 0; 1301 else if (st->voice_ratio >= 0) 1302 { 1303 voice_est = st->voice_ratio*327>>8; 1304 /* For AUDIO, never be more than 90% confident of having speech */ 1305 if (st->application == OPUS_APPLICATION_AUDIO) 1306 voice_est = IMIN(voice_est, 115); 1307 } else if (st->application == OPUS_APPLICATION_VOIP) 1308 voice_est = 115; 1309 else 1310 voice_est = 48; 1311 1312 if (st->force_channels!=OPUS_AUTO && st->channels == 2) 1313 { 1314 st->stream_channels = st->force_channels; 1315 } else { 1316 #ifdef FUZZING 1317 /* Random mono/stereo decision */ 1318 if (st->channels == 2 && (rand()&0x1F)==0) 1319 st->stream_channels = 3-st->stream_channels; 1320 #else 1321 /* Rate-dependent mono-stereo decision */ 1322 if (st->channels == 2) 1323 { 1324 opus_int32 stereo_threshold; 1325 stereo_threshold = stereo_music_threshold + ((voice_est*voice_est*(stereo_voice_threshold-stereo_music_threshold))>>14); 1326 if (st->stream_channels == 2) 1327 stereo_threshold -= 1000; 1328 else 1329 stereo_threshold += 1000; 1330 st->stream_channels = (equiv_rate > stereo_threshold) ? 2 : 1; 1331 } else { 1332 st->stream_channels = st->channels; 1333 } 1334 #endif 1335 } 1336 /* Update equivalent rate for channels decision. */ 1337 equiv_rate = compute_equiv_rate(st->bitrate_bps, st->stream_channels, st->Fs/frame_size, 1338 st->use_vbr, 0, st->silk_mode.complexity, st->silk_mode.packetLossPercentage); 1339 1340 /* Mode selection depending on application and signal type */ 1341 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) 1342 { 1343 st->mode = MODE_CELT_ONLY; 1344 } else if (st->user_forced_mode == OPUS_AUTO) 1345 { 1346 #ifdef FUZZING 1347 /* Random mode switching */ 1348 if ((rand()&0xF)==0) 1349 { 1350 if ((rand()&0x1)==0) 1351 st->mode = MODE_CELT_ONLY; 1352 else 1353 st->mode = MODE_SILK_ONLY; 1354 } else { 1355 if (st->prev_mode==MODE_CELT_ONLY) 1356 st->mode = MODE_CELT_ONLY; 1357 else 1358 st->mode = MODE_SILK_ONLY; 1359 } 1360 #else 1361 opus_int32 mode_voice, mode_music; 1362 opus_int32 threshold; 1363 1364 /* Interpolate based on stereo width */ 1365 mode_voice = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[0][0]) 1366 + MULT16_32_Q15(stereo_width,mode_thresholds[1][0])); 1367 mode_music = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[1][1]) 1368 + MULT16_32_Q15(stereo_width,mode_thresholds[1][1])); 1369 /* Interpolate based on speech/music probability */ 1370 threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14); 1371 /* Bias towards SILK for VoIP because of some useful features */ 1372 if (st->application == OPUS_APPLICATION_VOIP) 1373 threshold += 8000; 1374 1375 /*printf("%f %d\n", stereo_width/(float)Q15ONE, threshold);*/ 1376 /* Hysteresis */ 1377 if (st->prev_mode == MODE_CELT_ONLY) 1378 threshold -= 4000; 1379 else if (st->prev_mode>0) 1380 threshold += 4000; 1381 1382 st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY; 1383 1384 /* When FEC is enabled and there's enough packet loss, use SILK */ 1385 if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4) 1386 st->mode = MODE_SILK_ONLY; 1387 /* When encoding voice and DTX is enabled but the generalized DTX cannot be used, 1388 because of complexity and sampling frequency settings, switch to SILK DTX and 1389 set the encoder to SILK mode */ 1390 #ifndef DISABLE_FLOAT_API 1391 st->silk_mode.useDTX = st->use_dtx && !(analysis_info.valid || is_silence); 1392 #else 1393 st->silk_mode.useDTX = st->use_dtx; 1394 #endif 1395 if (st->silk_mode.useDTX && voice_est > 100) 1396 st->mode = MODE_SILK_ONLY; 1397 #endif 1398 1399 /* If max_data_bytes represents less than 6 kb/s, switch to CELT-only mode */ 1400 if (max_data_bytes < (frame_rate > 50 ? 9000 : 6000)*frame_size / (st->Fs * 8)) 1401 st->mode = MODE_CELT_ONLY; 1402 } else { 1403 st->mode = st->user_forced_mode; 1404 } 1405 1406 /* Override the chosen mode to make sure we meet the requested frame size */ 1407 if (st->mode != MODE_CELT_ONLY && frame_size < st->Fs/100) 1408 st->mode = MODE_CELT_ONLY; 1409 if (st->lfe) 1410 st->mode = MODE_CELT_ONLY; 1411 1412 if (st->prev_mode > 0 && 1413 ((st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) || 1414 (st->mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY))) 1415 { 1416 redundancy = 1; 1417 celt_to_silk = (st->mode != MODE_CELT_ONLY); 1418 if (!celt_to_silk) 1419 { 1420 /* Switch to SILK/hybrid if frame size is 10 ms or more*/ 1421 if (frame_size >= st->Fs/100) 1422 { 1423 st->mode = st->prev_mode; 1424 to_celt = 1; 1425 } else { 1426 redundancy=0; 1427 } 1428 } 1429 } 1430 1431 /* When encoding multiframes, we can ask for a switch to CELT only in the last frame. This switch 1432 * is processed above as the requested mode shouldn't interrupt stereo->mono transition. */ 1433 if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0 1434 && st->mode != MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY) 1435 { 1436 /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */ 1437 st->silk_mode.toMono = 1; 1438 st->stream_channels = 2; 1439 } else { 1440 st->silk_mode.toMono = 0; 1441 } 1442 1443 /* Update equivalent rate with mode decision. */ 1444 equiv_rate = compute_equiv_rate(st->bitrate_bps, st->stream_channels, st->Fs/frame_size, 1445 st->use_vbr, st->mode, st->silk_mode.complexity, st->silk_mode.packetLossPercentage); 1446 1447 if (st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) 1448 { 1449 silk_EncControlStruct dummy; 1450 silk_InitEncoder( silk_enc, st->arch, &dummy); 1451 prefill=1; 1452 } 1453 1454 /* Automatic (rate-dependent) bandwidth selection */ 1455 if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch) 1456 { 1457 const opus_int32 *voice_bandwidth_thresholds, *music_bandwidth_thresholds; 1458 opus_int32 bandwidth_thresholds[8]; 1459 int bandwidth = OPUS_BANDWIDTH_FULLBAND; 1460 1461 if (st->channels==2 && st->force_channels!=1) 1462 { 1463 voice_bandwidth_thresholds = stereo_voice_bandwidth_thresholds; 1464 music_bandwidth_thresholds = stereo_music_bandwidth_thresholds; 1465 } else { 1466 voice_bandwidth_thresholds = mono_voice_bandwidth_thresholds; 1467 music_bandwidth_thresholds = mono_music_bandwidth_thresholds; 1468 } 1469 /* Interpolate bandwidth thresholds depending on voice estimation */ 1470 for (i=0;i<8;i++) 1471 { 1472 bandwidth_thresholds[i] = music_bandwidth_thresholds[i] 1473 + ((voice_est*voice_est*(voice_bandwidth_thresholds[i]-music_bandwidth_thresholds[i]))>>14); 1474 } 1475 do { 1476 int threshold, hysteresis; 1477 threshold = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)]; 1478 hysteresis = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)+1]; 1479 if (!st->first) 1480 { 1481 if (st->auto_bandwidth >= bandwidth) 1482 threshold -= hysteresis; 1483 else 1484 threshold += hysteresis; 1485 } 1486 if (equiv_rate >= threshold) 1487 break; 1488 } while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND); 1489 st->bandwidth = st->auto_bandwidth = bandwidth; 1490 /* Prevents any transition to SWB/FB until the SILK layer has fully 1491 switched to WB mode and turned the variable LP filter off */ 1492 if (!st->first && st->mode != MODE_CELT_ONLY && !st->silk_mode.inWBmodeWithoutVariableLP && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND) 1493 st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1494 } 1495 1496 if (st->bandwidth>st->max_bandwidth) 1497 st->bandwidth = st->max_bandwidth; 1498 1499 if (st->user_bandwidth != OPUS_AUTO) 1500 st->bandwidth = st->user_bandwidth; 1501 1502 /* This prevents us from using hybrid at unsafe CBR/max rates */ 1503 if (st->mode != MODE_CELT_ONLY && max_rate < 15000) 1504 { 1505 st->bandwidth = IMIN(st->bandwidth, OPUS_BANDWIDTH_WIDEBAND); 1506 } 1507 1508 /* Prevents Opus from wasting bits on frequencies that are above 1509 the Nyquist rate of the input signal */ 1510 if (st->Fs <= 24000 && st->bandwidth > OPUS_BANDWIDTH_SUPERWIDEBAND) 1511 st->bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; 1512 if (st->Fs <= 16000 && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND) 1513 st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1514 if (st->Fs <= 12000 && st->bandwidth > OPUS_BANDWIDTH_MEDIUMBAND) 1515 st->bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; 1516 if (st->Fs <= 8000 && st->bandwidth > OPUS_BANDWIDTH_NARROWBAND) 1517 st->bandwidth = OPUS_BANDWIDTH_NARROWBAND; 1518 #ifndef DISABLE_FLOAT_API 1519 /* Use detected bandwidth to reduce the encoded bandwidth. */ 1520 if (st->detected_bandwidth && st->user_bandwidth == OPUS_AUTO) 1521 { 1522 int min_detected_bandwidth; 1523 /* Makes bandwidth detection more conservative just in case the detector 1524 gets it wrong when we could have coded a high bandwidth transparently. 1525 When operating in SILK/hybrid mode, we don't go below wideband to avoid 1526 more complicated switches that require redundancy. */ 1527 if (equiv_rate <= 18000*st->stream_channels && st->mode == MODE_CELT_ONLY) 1528 min_detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND; 1529 else if (equiv_rate <= 24000*st->stream_channels && st->mode == MODE_CELT_ONLY) 1530 min_detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; 1531 else if (equiv_rate <= 30000*st->stream_channels) 1532 min_detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1533 else if (equiv_rate <= 44000*st->stream_channels) 1534 min_detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; 1535 else 1536 min_detected_bandwidth = OPUS_BANDWIDTH_FULLBAND; 1537 1538 st->detected_bandwidth = IMAX(st->detected_bandwidth, min_detected_bandwidth); 1539 st->bandwidth = IMIN(st->bandwidth, st->detected_bandwidth); 1540 } 1541 #endif 1542 st->silk_mode.LBRR_coded = decide_fec(st->silk_mode.useInBandFEC, st->silk_mode.packetLossPercentage, 1543 st->silk_mode.LBRR_coded, st->mode, &st->bandwidth, equiv_rate); 1544 celt_encoder_ctl(celt_enc, OPUS_SET_LSB_DEPTH(lsb_depth)); 1545 1546 /* CELT mode doesn't support mediumband, use wideband instead */ 1547 if (st->mode == MODE_CELT_ONLY && st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) 1548 st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1549 if (st->lfe) 1550 st->bandwidth = OPUS_BANDWIDTH_NARROWBAND; 1551 1552 curr_bandwidth = st->bandwidth; 1553 1554 /* Chooses the appropriate mode for speech 1555 *NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */ 1556 if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND) 1557 st->mode = MODE_HYBRID; 1558 if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND) 1559 st->mode = MODE_SILK_ONLY; 1560 1561 /* Can't support higher than >60 ms frames, and >20 ms when in Hybrid or CELT-only modes */ 1562 if ((frame_size > st->Fs/50 && (st->mode != MODE_SILK_ONLY)) || frame_size > 3*st->Fs/50) 1563 { 1564 int enc_frame_size; 1565 int nb_frames; 1566 1567 if (st->mode == MODE_SILK_ONLY) 1568 { 1569 if (frame_size == 2*st->Fs/25) /* 80 ms -> 2x 40 ms */ 1570 enc_frame_size = st->Fs/25; 1571 else if (frame_size == 3*st->Fs/25) /* 120 ms -> 2x 60 ms */ 1572 enc_frame_size = 3*st->Fs/50; 1573 else /* 100 ms -> 5x 20 ms */ 1574 enc_frame_size = st->Fs/50; 1575 } 1576 else 1577 enc_frame_size = st->Fs/50; 1578 1579 nb_frames = frame_size/enc_frame_size; 1580 1581 #ifndef DISABLE_FLOAT_API 1582 if (analysis_read_pos_bak!= -1) 1583 { 1584 st->analysis.read_pos = analysis_read_pos_bak; 1585 st->analysis.read_subframe = analysis_read_subframe_bak; 1586 } 1587 #endif 1588 1589 ret = encode_multiframe_packet(st, pcm, nb_frames, enc_frame_size, data, 1590 out_data_bytes, to_celt, lsb_depth, float_api); 1591 1592 RESTORE_STACK; 1593 return ret; 1594 } 1595 1596 /* For the first frame at a new SILK bandwidth */ 1597 if (st->silk_bw_switch) 1598 { 1599 redundancy = 1; 1600 celt_to_silk = 1; 1601 st->silk_bw_switch = 0; 1602 prefill=1; 1603 } 1604 1605 /* If we decided to go with CELT, make sure redundancy is off, no matter what 1606 we decided earlier. */ 1607 if (st->mode == MODE_CELT_ONLY) 1608 redundancy = 0; 1609 1610 if (redundancy) 1611 { 1612 redundancy_bytes = compute_redundancy_bytes(max_data_bytes, st->bitrate_bps, frame_rate, st->stream_channels); 1613 if (redundancy_bytes == 0) 1614 redundancy = 0; 1615 } 1616 1617 /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, curr_bandwidth); */ 1618 bytes_target = IMIN(max_data_bytes-redundancy_bytes, st->bitrate_bps * frame_size / (st->Fs * 8)) - 1; 1619 1620 data += 1; 1621 1622 ec_enc_init(&enc, data, max_data_bytes-1); 1623 1624 ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16); 1625 OPUS_COPY(pcm_buf, &st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels], total_buffer*st->channels); 1626 1627 if (st->mode == MODE_CELT_ONLY) 1628 hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); 1629 else 1630 hp_freq_smth1 = ((silk_encoder*)silk_enc)->state_Fxx[0].sCmn.variable_HP_smth1_Q15; 1631 1632 st->variable_HP_smth2_Q15 = silk_SMLAWB( st->variable_HP_smth2_Q15, 1633 hp_freq_smth1 - st->variable_HP_smth2_Q15, SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF2, 16 ) ); 1634 1635 /* convert from log scale to Hertz */ 1636 cutoff_Hz = silk_log2lin( silk_RSHIFT( st->variable_HP_smth2_Q15, 8 ) ); 1637 1638 if (st->application == OPUS_APPLICATION_VOIP) 1639 { 1640 hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs, st->arch); 1641 } else { 1642 dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); 1643 } 1644 #ifndef FIXED_POINT 1645 if (float_api) 1646 { 1647 opus_val32 sum; 1648 sum = celt_inner_prod(&pcm_buf[total_buffer*st->channels], &pcm_buf[total_buffer*st->channels], frame_size*st->channels, st->arch); 1649 /* This should filter out both NaNs and ridiculous signals that could 1650 cause NaNs further down. */ 1651 if (!(sum < 1e9f) || celt_isnan(sum)) 1652 { 1653 OPUS_CLEAR(&pcm_buf[total_buffer*st->channels], frame_size*st->channels); 1654 st->hp_mem[0] = st->hp_mem[1] = st->hp_mem[2] = st->hp_mem[3] = 0; 1655 } 1656 } 1657 #endif 1658 1659 1660 /* SILK processing */ 1661 HB_gain = Q15ONE; 1662 if (st->mode != MODE_CELT_ONLY) 1663 { 1664 opus_int32 total_bitRate, celt_rate; 1665 #ifdef FIXED_POINT 1666 const opus_int16 *pcm_silk; 1667 #else 1668 VARDECL(opus_int16, pcm_silk); 1669 ALLOC(pcm_silk, st->channels*frame_size, opus_int16); 1670 #endif 1671 1672 /* Distribute bits between SILK and CELT */ 1673 total_bitRate = 8 * bytes_target * frame_rate; 1674 if( st->mode == MODE_HYBRID ) { 1675 /* Base rate for SILK */ 1676 st->silk_mode.bitRate = compute_silk_rate_for_hybrid(total_bitRate, 1677 curr_bandwidth, st->Fs == 50 * frame_size, st->use_vbr, st->silk_mode.LBRR_coded); 1678 if (!st->energy_masking) 1679 { 1680 /* Increasingly attenuate high band when it gets allocated fewer bits */ 1681 celt_rate = total_bitRate - st->silk_mode.bitRate; 1682 HB_gain = Q15ONE - SHR32(celt_exp2(-celt_rate * QCONST16(1.f/1024, 10)), 1); 1683 } 1684 } else { 1685 /* SILK gets all bits */ 1686 st->silk_mode.bitRate = total_bitRate; 1687 } 1688 1689 /* Surround masking for SILK */ 1690 if (st->energy_masking && st->use_vbr && !st->lfe) 1691 { 1692 opus_val32 mask_sum=0; 1693 opus_val16 masking_depth; 1694 opus_int32 rate_offset; 1695 int c; 1696 int end = 17; 1697 opus_int16 srate = 16000; 1698 if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND) 1699 { 1700 end = 13; 1701 srate = 8000; 1702 } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) 1703 { 1704 end = 15; 1705 srate = 12000; 1706 } 1707 for (c=0;c<st->channels;c++) 1708 { 1709 for(i=0;i<end;i++) 1710 { 1711 opus_val16 mask; 1712 mask = MAX16(MIN16(st->energy_masking[21*c+i], 1713 QCONST16(.5f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT)); 1714 if (mask > 0) 1715 mask = HALF16(mask); 1716 mask_sum += mask; 1717 } 1718 } 1719 /* Conservative rate reduction, we cut the masking in half */ 1720 masking_depth = mask_sum / end*st->channels; 1721 masking_depth += QCONST16(.2f, DB_SHIFT); 1722 rate_offset = (opus_int32)PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT); 1723 rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3); 1724 /* Split the rate change between the SILK and CELT part for hybrid. */ 1725 if (st->bandwidth==OPUS_BANDWIDTH_SUPERWIDEBAND || st->bandwidth==OPUS_BANDWIDTH_FULLBAND) 1726 st->silk_mode.bitRate += 3*rate_offset/5; 1727 else 1728 st->silk_mode.bitRate += rate_offset; 1729 } 1730 1731 st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs; 1732 st->silk_mode.nChannelsAPI = st->channels; 1733 st->silk_mode.nChannelsInternal = st->stream_channels; 1734 if (curr_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { 1735 st->silk_mode.desiredInternalSampleRate = 8000; 1736 } else if (curr_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { 1737 st->silk_mode.desiredInternalSampleRate = 12000; 1738 } else { 1739 silk_assert( st->mode == MODE_HYBRID || curr_bandwidth == OPUS_BANDWIDTH_WIDEBAND ); 1740 st->silk_mode.desiredInternalSampleRate = 16000; 1741 } 1742 if( st->mode == MODE_HYBRID ) { 1743 /* Don't allow bandwidth reduction at lowest bitrates in hybrid mode */ 1744 st->silk_mode.minInternalSampleRate = 16000; 1745 } else { 1746 st->silk_mode.minInternalSampleRate = 8000; 1747 } 1748 1749 st->silk_mode.maxInternalSampleRate = 16000; 1750 if (st->mode == MODE_SILK_ONLY) 1751 { 1752 opus_int32 effective_max_rate = max_rate; 1753 if (frame_rate > 50) 1754 effective_max_rate = effective_max_rate*2/3; 1755 if (effective_max_rate < 8000) 1756 { 1757 st->silk_mode.maxInternalSampleRate = 12000; 1758 st->silk_mode.desiredInternalSampleRate = IMIN(12000, st->silk_mode.desiredInternalSampleRate); 1759 } 1760 if (effective_max_rate < 7000) 1761 { 1762 st->silk_mode.maxInternalSampleRate = 8000; 1763 st->silk_mode.desiredInternalSampleRate = IMIN(8000, st->silk_mode.desiredInternalSampleRate); 1764 } 1765 } 1766 1767 st->silk_mode.useCBR = !st->use_vbr; 1768 1769 /* Call SILK encoder for the low band */ 1770 1771 /* Max bits for SILK, counting ToC, redundancy bytes, and optionally redundancy. */ 1772 st->silk_mode.maxBits = (max_data_bytes-1)*8; 1773 if (redundancy && redundancy_bytes >= 2) 1774 { 1775 /* Counting 1 bit for redundancy position and 20 bits for flag+size (only for hybrid). */ 1776 st->silk_mode.maxBits -= redundancy_bytes*8 + 1; 1777 if (st->mode == MODE_HYBRID) 1778 st->silk_mode.maxBits -= 20; 1779 } 1780 if (st->silk_mode.useCBR) 1781 { 1782 if (st->mode == MODE_HYBRID) 1783 { 1784 st->silk_mode.maxBits = IMIN(st->silk_mode.maxBits, st->silk_mode.bitRate * frame_size / st->Fs); 1785 } 1786 } else { 1787 /* Constrained VBR. */ 1788 if (st->mode == MODE_HYBRID) 1789 { 1790 /* Compute SILK bitrate corresponding to the max total bits available */ 1791 opus_int32 maxBitRate = compute_silk_rate_for_hybrid(st->silk_mode.maxBits*st->Fs / frame_size, 1792 curr_bandwidth, st->Fs == 50 * frame_size, st->use_vbr, st->silk_mode.LBRR_coded); 1793 st->silk_mode.maxBits = maxBitRate * frame_size / st->Fs; 1794 } 1795 } 1796 1797 if (prefill) 1798 { 1799 opus_int32 zero=0; 1800 int prefill_offset; 1801 /* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode 1802 a discontinuity. The exact location is what we need to avoid leaving any "gap" 1803 in the audio when mixing with the redundant CELT frame. Here we can afford to 1804 overwrite st->delay_buffer because the only thing that uses it before it gets 1805 rewritten is tmp_prefill[] and even then only the part after the ramp really 1806 gets used (rather than sent to the encoder and discarded) */ 1807 prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400); 1808 gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset, 1809 0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs); 1810 OPUS_CLEAR(st->delay_buffer, prefill_offset); 1811 #ifdef FIXED_POINT 1812 pcm_silk = st->delay_buffer; 1813 #else 1814 for (i=0;i<st->encoder_buffer*st->channels;i++) 1815 pcm_silk[i] = FLOAT2INT16(st->delay_buffer[i]); 1816 #endif 1817 silk_Encode( silk_enc, &st->silk_mode, pcm_silk, st->encoder_buffer, NULL, &zero, 1 ); 1818 } 1819 1820 #ifdef FIXED_POINT 1821 pcm_silk = pcm_buf+total_buffer*st->channels; 1822 #else 1823 for (i=0;i<frame_size*st->channels;i++) 1824 pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]); 1825 #endif 1826 ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0 ); 1827 if( ret ) { 1828 /*fprintf (stderr, "SILK encode error: %d\n", ret);*/ 1829 /* Handle error */ 1830 RESTORE_STACK; 1831 return OPUS_INTERNAL_ERROR; 1832 } 1833 1834 /* Extract SILK internal bandwidth for signaling in first byte */ 1835 if( st->mode == MODE_SILK_ONLY ) { 1836 if( st->silk_mode.internalSampleRate == 8000 ) { 1837 curr_bandwidth = OPUS_BANDWIDTH_NARROWBAND; 1838 } else if( st->silk_mode.internalSampleRate == 12000 ) { 1839 curr_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; 1840 } else if( st->silk_mode.internalSampleRate == 16000 ) { 1841 curr_bandwidth = OPUS_BANDWIDTH_WIDEBAND; 1842 } 1843 } else { 1844 silk_assert( st->silk_mode.internalSampleRate == 16000 ); 1845 } 1846 1847 st->silk_mode.opusCanSwitch = st->silk_mode.switchReady && !st->nonfinal_frame; 1848 1849 if (nBytes==0) 1850 { 1851 st->rangeFinal = 0; 1852 data[-1] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); 1853 RESTORE_STACK; 1854 return 1; 1855 } 1856 1857 /* FIXME: How do we allocate the redundancy for CBR? */ 1858 if (st->silk_mode.opusCanSwitch) 1859 { 1860 redundancy_bytes = compute_redundancy_bytes(max_data_bytes, st->bitrate_bps, frame_rate, st->stream_channels); 1861 redundancy = (redundancy_bytes != 0); 1862 celt_to_silk = 0; 1863 st->silk_bw_switch = 1; 1864 } 1865 } 1866 1867 /* CELT processing */ 1868 { 1869 int endband=21; 1870 1871 switch(curr_bandwidth) 1872 { 1873 case OPUS_BANDWIDTH_NARROWBAND: 1874 endband = 13; 1875 break; 1876 case OPUS_BANDWIDTH_MEDIUMBAND: 1877 case OPUS_BANDWIDTH_WIDEBAND: 1878 endband = 17; 1879 break; 1880 case OPUS_BANDWIDTH_SUPERWIDEBAND: 1881 endband = 19; 1882 break; 1883 case OPUS_BANDWIDTH_FULLBAND: 1884 endband = 21; 1885 break; 1886 } 1887 celt_encoder_ctl(celt_enc, CELT_SET_END_BAND(endband)); 1888 celt_encoder_ctl(celt_enc, CELT_SET_CHANNELS(st->stream_channels)); 1889 } 1890 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX)); 1891 if (st->mode != MODE_SILK_ONLY) 1892 { 1893 opus_val32 celt_pred=2; 1894 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); 1895 /* We may still decide to disable prediction later */ 1896 if (st->silk_mode.reducedDependency) 1897 celt_pred = 0; 1898 celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(celt_pred)); 1899 1900 if (st->mode == MODE_HYBRID) 1901 { 1902 if( st->use_vbr ) { 1903 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps-st->silk_mode.bitRate)); 1904 celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(0)); 1905 } 1906 } else { 1907 if (st->use_vbr) 1908 { 1909 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1)); 1910 celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint)); 1911 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps)); 1912 } 1913 } 1914 } 1915 1916 ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16); 1917 if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0) 1918 { 1919 OPUS_COPY(tmp_prefill, &st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels], st->channels*st->Fs/400); 1920 } 1921 1922 if (st->channels*(st->encoder_buffer-(frame_size+total_buffer)) > 0) 1923 { 1924 OPUS_MOVE(st->delay_buffer, &st->delay_buffer[st->channels*frame_size], st->channels*(st->encoder_buffer-frame_size-total_buffer)); 1925 OPUS_COPY(&st->delay_buffer[st->channels*(st->encoder_buffer-frame_size-total_buffer)], 1926 &pcm_buf[0], 1927 (frame_size+total_buffer)*st->channels); 1928 } else { 1929 OPUS_COPY(st->delay_buffer, &pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels], st->encoder_buffer*st->channels); 1930 } 1931 /* gain_fade() and stereo_fade() need to be after the buffer copying 1932 because we don't want any of this to affect the SILK part */ 1933 if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) { 1934 gain_fade(pcm_buf, pcm_buf, 1935 st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs); 1936 } 1937 st->prev_HB_gain = HB_gain; 1938 if (st->mode != MODE_HYBRID || st->stream_channels==1) 1939 st->silk_mode.stereoWidth_Q14 = IMIN((1<<14),2*IMAX(0,equiv_rate-24000)); 1940 if( !st->energy_masking && st->channels == 2 ) { 1941 /* Apply stereo width reduction (at low bitrates) */ 1942 if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) { 1943 opus_val16 g1, g2; 1944 g1 = st->hybrid_stereo_width_Q14; 1945 g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14); 1946 #ifdef FIXED_POINT 1947 g1 = g1==16384 ? Q15ONE : SHL16(g1,1); 1948 g2 = g2==16384 ? Q15ONE : SHL16(g2,1); 1949 #else 1950 g1 *= (1.f/16384); 1951 g2 *= (1.f/16384); 1952 #endif 1953 stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap, 1954 frame_size, st->channels, celt_mode->window, st->Fs); 1955 st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14; 1956 } 1957 } 1958 1959 if ( st->mode != MODE_CELT_ONLY && ec_tell(&enc)+17+20*(st->mode == MODE_HYBRID) <= 8*(max_data_bytes-1)) 1960 { 1961 /* For SILK mode, the redundancy is inferred from the length */ 1962 if (st->mode == MODE_HYBRID) 1963 ec_enc_bit_logp(&enc, redundancy, 12); 1964 if (redundancy) 1965 { 1966 int max_redundancy; 1967 ec_enc_bit_logp(&enc, celt_to_silk, 1); 1968 if (st->mode == MODE_HYBRID) 1969 { 1970 /* Reserve the 8 bits needed for the redundancy length, 1971 and at least a few bits for CELT if possible */ 1972 max_redundancy = (max_data_bytes-1)-((ec_tell(&enc)+8+3+7)>>3); 1973 } 1974 else 1975 max_redundancy = (max_data_bytes-1)-((ec_tell(&enc)+7)>>3); 1976 /* Target the same bit-rate for redundancy as for the rest, 1977 up to a max of 257 bytes */ 1978 redundancy_bytes = IMIN(max_redundancy, redundancy_bytes); 1979 redundancy_bytes = IMIN(257, IMAX(2, redundancy_bytes)); 1980 if (st->mode == MODE_HYBRID) 1981 ec_enc_uint(&enc, redundancy_bytes-2, 256); 1982 } 1983 } else { 1984 redundancy = 0; 1985 } 1986 1987 if (!redundancy) 1988 { 1989 st->silk_bw_switch = 0; 1990 redundancy_bytes = 0; 1991 } 1992 if (st->mode != MODE_CELT_ONLY)start_band=17; 1993 1994 if (st->mode == MODE_SILK_ONLY) 1995 { 1996 ret = (ec_tell(&enc)+7)>>3; 1997 ec_enc_done(&enc); 1998 nb_compr_bytes = ret; 1999 } else { 2000 nb_compr_bytes = (max_data_bytes-1)-redundancy_bytes; 2001 ec_enc_shrink(&enc, nb_compr_bytes); 2002 } 2003 2004 #ifndef DISABLE_FLOAT_API 2005 if (redundancy || st->mode != MODE_SILK_ONLY) 2006 celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info)); 2007 #endif 2008 if (st->mode == MODE_HYBRID) { 2009 SILKInfo info; 2010 info.signalType = st->silk_mode.signalType; 2011 info.offset = st->silk_mode.offset; 2012 celt_encoder_ctl(celt_enc, CELT_SET_SILK_INFO(&info)); 2013 } else { 2014 celt_encoder_ctl(celt_enc, CELT_SET_SILK_INFO((SILKInfo*)NULL)); 2015 } 2016 2017 /* 5 ms redundant frame for CELT->SILK */ 2018 if (redundancy && celt_to_silk) 2019 { 2020 int err; 2021 celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); 2022 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); 2023 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX)); 2024 err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL); 2025 if (err < 0) 2026 { 2027 RESTORE_STACK; 2028 return OPUS_INTERNAL_ERROR; 2029 } 2030 celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng)); 2031 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); 2032 } 2033 2034 celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(start_band)); 2035 2036 if (st->mode != MODE_SILK_ONLY) 2037 { 2038 if (st->mode != st->prev_mode && st->prev_mode > 0) 2039 { 2040 unsigned char dummy[2]; 2041 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); 2042 2043 /* Prefilling */ 2044 celt_encode_with_ec(celt_enc, tmp_prefill, st->Fs/400, dummy, 2, NULL); 2045 celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); 2046 } 2047 /* If false, we already busted the budget and we'll end up with a "PLC frame" */ 2048 if (ec_tell(&enc) <= 8*nb_compr_bytes) 2049 { 2050 /* Set the bitrate again if it was overridden in the redundancy code above*/ 2051 if (redundancy && celt_to_silk && st->mode==MODE_HYBRID && st->use_vbr) 2052 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps-st->silk_mode.bitRate)); 2053 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(st->use_vbr)); 2054 ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc); 2055 if (ret < 0) 2056 { 2057 RESTORE_STACK; 2058 return OPUS_INTERNAL_ERROR; 2059 } 2060 /* Put CELT->SILK redundancy data in the right place. */ 2061 if (redundancy && celt_to_silk && st->mode==MODE_HYBRID && st->use_vbr) 2062 { 2063 OPUS_MOVE(data+ret, data+nb_compr_bytes, redundancy_bytes); 2064 nb_compr_bytes = nb_compr_bytes+redundancy_bytes; 2065 } 2066 } 2067 } 2068 2069 /* 5 ms redundant frame for SILK->CELT */ 2070 if (redundancy && !celt_to_silk) 2071 { 2072 int err; 2073 unsigned char dummy[2]; 2074 int N2, N4; 2075 N2 = st->Fs/200; 2076 N4 = st->Fs/400; 2077 2078 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); 2079 celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); 2080 celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); 2081 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); 2082 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX)); 2083 2084 if (st->mode == MODE_HYBRID) 2085 { 2086 /* Shrink packet to what the encoder actually used. */ 2087 nb_compr_bytes = ret; 2088 ec_enc_shrink(&enc, nb_compr_bytes); 2089 } 2090 /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */ 2091 celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL); 2092 2093 err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL); 2094 if (err < 0) 2095 { 2096 RESTORE_STACK; 2097 return OPUS_INTERNAL_ERROR; 2098 } 2099 celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng)); 2100 } 2101 2102 2103 2104 /* Signalling the mode in the first byte */ 2105 data--; 2106 data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); 2107 2108 st->rangeFinal = enc.rng ^ redundant_rng; 2109 2110 if (to_celt) 2111 st->prev_mode = MODE_CELT_ONLY; 2112 else 2113 st->prev_mode = st->mode; 2114 st->prev_channels = st->stream_channels; 2115 st->prev_framesize = frame_size; 2116 2117 st->first = 0; 2118 2119 /* DTX decision */ 2120 #ifndef DISABLE_FLOAT_API 2121 if (st->use_dtx && (analysis_info.valid || is_silence)) 2122 { 2123 if (decide_dtx_mode(analysis_info.activity_probability, &st->nb_no_activity_frames, 2124 st->peak_signal_energy, pcm, frame_size, st->channels, is_silence, st->arch)) 2125 { 2126 st->rangeFinal = 0; 2127 data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); 2128 RESTORE_STACK; 2129 return 1; 2130 } 2131 } 2132 #endif 2133 2134 /* In the unlikely case that the SILK encoder busted its target, tell 2135 the decoder to call the PLC */ 2136 if (ec_tell(&enc) > (max_data_bytes-1)*8) 2137 { 2138 if (max_data_bytes < 2) 2139 { 2140 RESTORE_STACK; 2141 return OPUS_BUFFER_TOO_SMALL; 2142 } 2143 data[1] = 0; 2144 ret = 1; 2145 st->rangeFinal = 0; 2146 } else if (st->mode==MODE_SILK_ONLY&&!redundancy) 2147 { 2148 /*When in LPC only mode it's perfectly 2149 reasonable to strip off trailing zero bytes as 2150 the required range decoder behavior is to 2151 fill these in. This can't be done when the MDCT 2152 modes are used because the decoder needs to know 2153 the actual length for allocation purposes.*/ 2154 while(ret>2&&data[ret]==0)ret--; 2155 } 2156 /* Count ToC and redundancy */ 2157 ret += 1+redundancy_bytes; 2158 if (!st->use_vbr) 2159 { 2160 if (opus_packet_pad(data, ret, max_data_bytes) != OPUS_OK) 2161 { 2162 RESTORE_STACK; 2163 return OPUS_INTERNAL_ERROR; 2164 } 2165 ret = max_data_bytes; 2166 } 2167 RESTORE_STACK; 2168 return ret; 2169 } 2170 2171 #ifdef FIXED_POINT 2172 2173 #ifndef DISABLE_FLOAT_API 2174 opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, 2175 unsigned char *data, opus_int32 max_data_bytes) 2176 { 2177 int i, ret; 2178 int frame_size; 2179 VARDECL(opus_int16, in); 2180 ALLOC_STACK; 2181 2182 frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs); 2183 if (frame_size <= 0) 2184 { 2185 RESTORE_STACK; 2186 return OPUS_BAD_ARG; 2187 } 2188 ALLOC(in, frame_size*st->channels, opus_int16); 2189 2190 for (i=0;i<frame_size*st->channels;i++) 2191 in[i] = FLOAT2INT16(pcm[i]); 2192 ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, 2193 pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1); 2194 RESTORE_STACK; 2195 return ret; 2196 } 2197 #endif 2198 2199 opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, 2200 unsigned char *data, opus_int32 out_data_bytes) 2201 { 2202 int frame_size; 2203 frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs); 2204 return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, 2205 pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0); 2206 } 2207 2208 #else 2209 opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, 2210 unsigned char *data, opus_int32 max_data_bytes) 2211 { 2212 int i, ret; 2213 int frame_size; 2214 VARDECL(float, in); 2215 ALLOC_STACK; 2216 2217 frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs); 2218 if (frame_size <= 0) 2219 { 2220 RESTORE_STACK; 2221 return OPUS_BAD_ARG; 2222 } 2223 ALLOC(in, frame_size*st->channels, float); 2224 2225 for (i=0;i<frame_size*st->channels;i++) 2226 in[i] = (1.0f/32768)*pcm[i]; 2227 ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, 2228 pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0); 2229 RESTORE_STACK; 2230 return ret; 2231 } 2232 opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, 2233 unsigned char *data, opus_int32 out_data_bytes) 2234 { 2235 int frame_size; 2236 frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs); 2237 return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, 2238 pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1); 2239 } 2240 #endif 2241 2242 2243 int opus_encoder_ctl(OpusEncoder *st, int request, ...) 2244 { 2245 int ret; 2246 CELTEncoder *celt_enc; 2247 va_list ap; 2248 2249 ret = OPUS_OK; 2250 va_start(ap, request); 2251 2252 celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); 2253 2254 switch (request) 2255 { 2256 case OPUS_SET_APPLICATION_REQUEST: 2257 { 2258 opus_int32 value = va_arg(ap, opus_int32); 2259 if ( (value != OPUS_APPLICATION_VOIP && value != OPUS_APPLICATION_AUDIO 2260 && value != OPUS_APPLICATION_RESTRICTED_LOWDELAY) 2261 || (!st->first && st->application != value)) 2262 { 2263 ret = OPUS_BAD_ARG; 2264 break; 2265 } 2266 st->application = value; 2267 #ifndef DISABLE_FLOAT_API 2268 st->analysis.application = value; 2269 #endif 2270 } 2271 break; 2272 case OPUS_GET_APPLICATION_REQUEST: 2273 { 2274 opus_int32 *value = va_arg(ap, opus_int32*); 2275 if (!value) 2276 { 2277 goto bad_arg; 2278 } 2279 *value = st->application; 2280 } 2281 break; 2282 case OPUS_SET_BITRATE_REQUEST: 2283 { 2284 opus_int32 value = va_arg(ap, opus_int32); 2285 if (value != OPUS_AUTO && value != OPUS_BITRATE_MAX) 2286 { 2287 if (value <= 0) 2288 goto bad_arg; 2289 else if (value <= 500) 2290 value = 500; 2291 else if (value > (opus_int32)300000*st->channels) 2292 value = (opus_int32)300000*st->channels; 2293 } 2294 st->user_bitrate_bps = value; 2295 } 2296 break; 2297 case OPUS_GET_BITRATE_REQUEST: 2298 { 2299 opus_int32 *value = va_arg(ap, opus_int32*); 2300 if (!value) 2301 { 2302 goto bad_arg; 2303 } 2304 *value = user_bitrate_to_bitrate(st, st->prev_framesize, 1276); 2305 } 2306 break; 2307 case OPUS_SET_FORCE_CHANNELS_REQUEST: 2308 { 2309 opus_int32 value = va_arg(ap, opus_int32); 2310 if((value<1 || value>st->channels) && value != OPUS_AUTO) 2311 { 2312 goto bad_arg; 2313 } 2314 st->force_channels = value; 2315 } 2316 break; 2317 case OPUS_GET_FORCE_CHANNELS_REQUEST: 2318 { 2319 opus_int32 *value = va_arg(ap, opus_int32*); 2320 if (!value) 2321 { 2322 goto bad_arg; 2323 } 2324 *value = st->force_channels; 2325 } 2326 break; 2327 case OPUS_SET_MAX_BANDWIDTH_REQUEST: 2328 { 2329 opus_int32 value = va_arg(ap, opus_int32); 2330 if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) 2331 { 2332 goto bad_arg; 2333 } 2334 st->max_bandwidth = value; 2335 if (st->max_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { 2336 st->silk_mode.maxInternalSampleRate = 8000; 2337 } else if (st->max_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { 2338 st->silk_mode.maxInternalSampleRate = 12000; 2339 } else { 2340 st->silk_mode.maxInternalSampleRate = 16000; 2341 } 2342 } 2343 break; 2344 case OPUS_GET_MAX_BANDWIDTH_REQUEST: 2345 { 2346 opus_int32 *value = va_arg(ap, opus_int32*); 2347 if (!value) 2348 { 2349 goto bad_arg; 2350 } 2351 *value = st->max_bandwidth; 2352 } 2353 break; 2354 case OPUS_SET_BANDWIDTH_REQUEST: 2355 { 2356 opus_int32 value = va_arg(ap, opus_int32); 2357 if ((value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) && value != OPUS_AUTO) 2358 { 2359 goto bad_arg; 2360 } 2361 st->user_bandwidth = value; 2362 if (st->user_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { 2363 st->silk_mode.maxInternalSampleRate = 8000; 2364 } else if (st->user_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { 2365 st->silk_mode.maxInternalSampleRate = 12000; 2366 } else { 2367 st->silk_mode.maxInternalSampleRate = 16000; 2368 } 2369 } 2370 break; 2371 case OPUS_GET_BANDWIDTH_REQUEST: 2372 { 2373 opus_int32 *value = va_arg(ap, opus_int32*); 2374 if (!value) 2375 { 2376 goto bad_arg; 2377 } 2378 *value = st->bandwidth; 2379 } 2380 break; 2381 case OPUS_SET_DTX_REQUEST: 2382 { 2383 opus_int32 value = va_arg(ap, opus_int32); 2384 if(value<0 || value>1) 2385 { 2386 goto bad_arg; 2387 } 2388 st->use_dtx = value; 2389 } 2390 break; 2391 case OPUS_GET_DTX_REQUEST: 2392 { 2393 opus_int32 *value = va_arg(ap, opus_int32*); 2394 if (!value) 2395 { 2396 goto bad_arg; 2397 } 2398 *value = st->use_dtx; 2399 } 2400 break; 2401 case OPUS_SET_COMPLEXITY_REQUEST: 2402 { 2403 opus_int32 value = va_arg(ap, opus_int32); 2404 if(value<0 || value>10) 2405 { 2406 goto bad_arg; 2407 } 2408 st->silk_mode.complexity = value; 2409 celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(value)); 2410 } 2411 break; 2412 case OPUS_GET_COMPLEXITY_REQUEST: 2413 { 2414 opus_int32 *value = va_arg(ap, opus_int32*); 2415 if (!value) 2416 { 2417 goto bad_arg; 2418 } 2419 *value = st->silk_mode.complexity; 2420 } 2421 break; 2422 case OPUS_SET_INBAND_FEC_REQUEST: 2423 { 2424 opus_int32 value = va_arg(ap, opus_int32); 2425 if(value<0 || value>1) 2426 { 2427 goto bad_arg; 2428 } 2429 st->silk_mode.useInBandFEC = value; 2430 } 2431 break; 2432 case OPUS_GET_INBAND_FEC_REQUEST: 2433 { 2434 opus_int32 *value = va_arg(ap, opus_int32*); 2435 if (!value) 2436 { 2437 goto bad_arg; 2438 } 2439 *value = st->silk_mode.useInBandFEC; 2440 } 2441 break; 2442 case OPUS_SET_PACKET_LOSS_PERC_REQUEST: 2443 { 2444 opus_int32 value = va_arg(ap, opus_int32); 2445 if (value < 0 || value > 100) 2446 { 2447 goto bad_arg; 2448 } 2449 st->silk_mode.packetLossPercentage = value; 2450 celt_encoder_ctl(celt_enc, OPUS_SET_PACKET_LOSS_PERC(value)); 2451 } 2452 break; 2453 case OPUS_GET_PACKET_LOSS_PERC_REQUEST: 2454 { 2455 opus_int32 *value = va_arg(ap, opus_int32*); 2456 if (!value) 2457 { 2458 goto bad_arg; 2459 } 2460 *value = st->silk_mode.packetLossPercentage; 2461 } 2462 break; 2463 case OPUS_SET_VBR_REQUEST: 2464 { 2465 opus_int32 value = va_arg(ap, opus_int32); 2466 if(value<0 || value>1) 2467 { 2468 goto bad_arg; 2469 } 2470 st->use_vbr = value; 2471 st->silk_mode.useCBR = 1-value; 2472 } 2473 break; 2474 case OPUS_GET_VBR_REQUEST: 2475 { 2476 opus_int32 *value = va_arg(ap, opus_int32*); 2477 if (!value) 2478 { 2479 goto bad_arg; 2480 } 2481 *value = st->use_vbr; 2482 } 2483 break; 2484 case OPUS_SET_VOICE_RATIO_REQUEST: 2485 { 2486 opus_int32 value = va_arg(ap, opus_int32); 2487 if (value<-1 || value>100) 2488 { 2489 goto bad_arg; 2490 } 2491 st->voice_ratio = value; 2492 } 2493 break; 2494 case OPUS_GET_VOICE_RATIO_REQUEST: 2495 { 2496 opus_int32 *value = va_arg(ap, opus_int32*); 2497 if (!value) 2498 { 2499 goto bad_arg; 2500 } 2501 *value = st->voice_ratio; 2502 } 2503 break; 2504 case OPUS_SET_VBR_CONSTRAINT_REQUEST: 2505 { 2506 opus_int32 value = va_arg(ap, opus_int32); 2507 if(value<0 || value>1) 2508 { 2509 goto bad_arg; 2510 } 2511 st->vbr_constraint = value; 2512 } 2513 break; 2514 case OPUS_GET_VBR_CONSTRAINT_REQUEST: 2515 { 2516 opus_int32 *value = va_arg(ap, opus_int32*); 2517 if (!value) 2518 { 2519 goto bad_arg; 2520 } 2521 *value = st->vbr_constraint; 2522 } 2523 break; 2524 case OPUS_SET_SIGNAL_REQUEST: 2525 { 2526 opus_int32 value = va_arg(ap, opus_int32); 2527 if(value!=OPUS_AUTO && value!=OPUS_SIGNAL_VOICE && value!=OPUS_SIGNAL_MUSIC) 2528 { 2529 goto bad_arg; 2530 } 2531 st->signal_type = value; 2532 } 2533 break; 2534 case OPUS_GET_SIGNAL_REQUEST: 2535 { 2536 opus_int32 *value = va_arg(ap, opus_int32*); 2537 if (!value) 2538 { 2539 goto bad_arg; 2540 } 2541 *value = st->signal_type; 2542 } 2543 break; 2544 case OPUS_GET_LOOKAHEAD_REQUEST: 2545 { 2546 opus_int32 *value = va_arg(ap, opus_int32*); 2547 if (!value) 2548 { 2549 goto bad_arg; 2550 } 2551 *value = st->Fs/400; 2552 if (st->application != OPUS_APPLICATION_RESTRICTED_LOWDELAY) 2553 *value += st->delay_compensation; 2554 } 2555 break; 2556 case OPUS_GET_SAMPLE_RATE_REQUEST: 2557 { 2558 opus_int32 *value = va_arg(ap, opus_int32*); 2559 if (!value) 2560 { 2561 goto bad_arg; 2562 } 2563 *value = st->Fs; 2564 } 2565 break; 2566 case OPUS_GET_FINAL_RANGE_REQUEST: 2567 { 2568 opus_uint32 *value = va_arg(ap, opus_uint32*); 2569 if (!value) 2570 { 2571 goto bad_arg; 2572 } 2573 *value = st->rangeFinal; 2574 } 2575 break; 2576 case OPUS_SET_LSB_DEPTH_REQUEST: 2577 { 2578 opus_int32 value = va_arg(ap, opus_int32); 2579 if (value<8 || value>24) 2580 { 2581 goto bad_arg; 2582 } 2583 st->lsb_depth=value; 2584 } 2585 break; 2586 case OPUS_GET_LSB_DEPTH_REQUEST: 2587 { 2588 opus_int32 *value = va_arg(ap, opus_int32*); 2589 if (!value) 2590 { 2591 goto bad_arg; 2592 } 2593 *value = st->lsb_depth; 2594 } 2595 break; 2596 case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: 2597 { 2598 opus_int32 value = va_arg(ap, opus_int32); 2599 if (value != OPUS_FRAMESIZE_ARG && value != OPUS_FRAMESIZE_2_5_MS && 2600 value != OPUS_FRAMESIZE_5_MS && value != OPUS_FRAMESIZE_10_MS && 2601 value != OPUS_FRAMESIZE_20_MS && value != OPUS_FRAMESIZE_40_MS && 2602 value != OPUS_FRAMESIZE_60_MS && value != OPUS_FRAMESIZE_80_MS && 2603 value != OPUS_FRAMESIZE_100_MS && value != OPUS_FRAMESIZE_120_MS) 2604 { 2605 goto bad_arg; 2606 } 2607 st->variable_duration = value; 2608 celt_encoder_ctl(celt_enc, OPUS_SET_EXPERT_FRAME_DURATION(value)); 2609 } 2610 break; 2611 case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST: 2612 { 2613 opus_int32 *value = va_arg(ap, opus_int32*); 2614 if (!value) 2615 { 2616 goto bad_arg; 2617 } 2618 *value = st->variable_duration; 2619 } 2620 break; 2621 case OPUS_SET_PREDICTION_DISABLED_REQUEST: 2622 { 2623 opus_int32 value = va_arg(ap, opus_int32); 2624 if (value > 1 || value < 0) 2625 goto bad_arg; 2626 st->silk_mode.reducedDependency = value; 2627 } 2628 break; 2629 case OPUS_GET_PREDICTION_DISABLED_REQUEST: 2630 { 2631 opus_int32 *value = va_arg(ap, opus_int32*); 2632 if (!value) 2633 goto bad_arg; 2634 *value = st->silk_mode.reducedDependency; 2635 } 2636 break; 2637 case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST: 2638 { 2639 opus_int32 value = va_arg(ap, opus_int32); 2640 if(value<0 || value>1) 2641 { 2642 goto bad_arg; 2643 } 2644 celt_encoder_ctl(celt_enc, OPUS_SET_PHASE_INVERSION_DISABLED(value)); 2645 } 2646 break; 2647 case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST: 2648 { 2649 opus_int32 *value = va_arg(ap, opus_int32*); 2650 if (!value) 2651 { 2652 goto bad_arg; 2653 } 2654 celt_encoder_ctl(celt_enc, OPUS_GET_PHASE_INVERSION_DISABLED(value)); 2655 } 2656 break; 2657 case OPUS_RESET_STATE: 2658 { 2659 void *silk_enc; 2660 silk_EncControlStruct dummy; 2661 char *start; 2662 silk_enc = (char*)st+st->silk_enc_offset; 2663 #ifndef DISABLE_FLOAT_API 2664 tonality_analysis_reset(&st->analysis); 2665 #endif 2666 2667 start = (char*)&st->OPUS_ENCODER_RESET_START; 2668 OPUS_CLEAR(start, sizeof(OpusEncoder) - (start - (char*)st)); 2669 2670 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); 2671 silk_InitEncoder( silk_enc, st->arch, &dummy ); 2672 st->stream_channels = st->channels; 2673 st->hybrid_stereo_width_Q14 = 1 << 14; 2674 st->prev_HB_gain = Q15ONE; 2675 st->first = 1; 2676 st->mode = MODE_HYBRID; 2677 st->bandwidth = OPUS_BANDWIDTH_FULLBAND; 2678 st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); 2679 } 2680 break; 2681 case OPUS_SET_FORCE_MODE_REQUEST: 2682 { 2683 opus_int32 value = va_arg(ap, opus_int32); 2684 if ((value < MODE_SILK_ONLY || value > MODE_CELT_ONLY) && value != OPUS_AUTO) 2685 { 2686 goto bad_arg; 2687 } 2688 st->user_forced_mode = value; 2689 } 2690 break; 2691 case OPUS_SET_LFE_REQUEST: 2692 { 2693 opus_int32 value = va_arg(ap, opus_int32); 2694 st->lfe = value; 2695 ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value)); 2696 } 2697 break; 2698 case OPUS_SET_ENERGY_MASK_REQUEST: 2699 { 2700 opus_val16 *value = va_arg(ap, opus_val16*); 2701 st->energy_masking = value; 2702 ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value)); 2703 } 2704 break; 2705 2706 case CELT_GET_MODE_REQUEST: 2707 { 2708 const CELTMode ** value = va_arg(ap, const CELTMode**); 2709 if (!value) 2710 { 2711 goto bad_arg; 2712 } 2713 ret = celt_encoder_ctl(celt_enc, CELT_GET_MODE(value)); 2714 } 2715 break; 2716 default: 2717 /* fprintf(stderr, "unknown opus_encoder_ctl() request: %d", request);*/ 2718 ret = OPUS_UNIMPLEMENTED; 2719 break; 2720 } 2721 va_end(ap); 2722 return ret; 2723 bad_arg: 2724 va_end(ap); 2725 return OPUS_BAD_ARG; 2726 } 2727 2728 void opus_encoder_destroy(OpusEncoder *st) 2729 { 2730 opus_free(st); 2731 } 2732