1 /*********************************************************************** 2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. 3 Redistribution and use in source and binary forms, with or without 4 modification, are permitted provided that the following conditions 5 are met: 6 - Redistributions of source code must retain the above copyright notice, 7 this list of conditions and the following disclaimer. 8 - Redistributions in binary form must reproduce the above copyright 9 notice, this list of conditions and the following disclaimer in the 10 documentation and/or other materials provided with the distribution. 11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 12 names of specific contributors, may be used to endorse or promote 13 products derived from this software without specific prior written 14 permission. 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 POSSIBILITY OF SUCH DAMAGE. 26 ***********************************************************************/ 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 #include "define.h" 32 #include "API.h" 33 #include "control.h" 34 #include "typedef.h" 35 #include "stack_alloc.h" 36 #include "structs.h" 37 #include "tuning_parameters.h" 38 #ifdef FIXED_POINT 39 #include "main_FIX.h" 40 #else 41 #include "main_FLP.h" 42 #endif 43 44 /***************************************/ 45 /* Read control structure from encoder */ 46 /***************************************/ 47 static opus_int silk_QueryEncoder( /* O Returns error code */ 48 const void *encState, /* I State */ 49 silk_EncControlStruct *encStatus /* O Encoder Status */ 50 ); 51 52 /****************************************/ 53 /* Encoder functions */ 54 /****************************************/ 55 56 opus_int silk_Get_Encoder_Size( /* O Returns error code */ 57 opus_int *encSizeBytes /* O Number of bytes in SILK encoder state */ 58 ) 59 { 60 opus_int ret = SILK_NO_ERROR; 61 62 *encSizeBytes = sizeof( silk_encoder ); 63 64 return ret; 65 } 66 67 /*************************/ 68 /* Init or Reset encoder */ 69 /*************************/ 70 opus_int silk_InitEncoder( /* O Returns error code */ 71 void *encState, /* I/O State */ 72 int arch, /* I Run-time architecture */ 73 silk_EncControlStruct *encStatus /* O Encoder Status */ 74 ) 75 { 76 silk_encoder *psEnc; 77 opus_int n, ret = SILK_NO_ERROR; 78 79 psEnc = (silk_encoder *)encState; 80 81 /* Reset encoder */ 82 silk_memset( psEnc, 0, sizeof( silk_encoder ) ); 83 for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) { 84 if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) { 85 celt_assert( 0 ); 86 } 87 } 88 89 psEnc->nChannelsAPI = 1; 90 psEnc->nChannelsInternal = 1; 91 92 /* Read control structure */ 93 if( ret += silk_QueryEncoder( encState, encStatus ) ) { 94 celt_assert( 0 ); 95 } 96 97 return ret; 98 } 99 100 /***************************************/ 101 /* Read control structure from encoder */ 102 /***************************************/ 103 static opus_int silk_QueryEncoder( /* O Returns error code */ 104 const void *encState, /* I State */ 105 silk_EncControlStruct *encStatus /* O Encoder Status */ 106 ) 107 { 108 opus_int ret = SILK_NO_ERROR; 109 silk_encoder_state_Fxx *state_Fxx; 110 silk_encoder *psEnc = (silk_encoder *)encState; 111 112 state_Fxx = psEnc->state_Fxx; 113 114 encStatus->nChannelsAPI = psEnc->nChannelsAPI; 115 encStatus->nChannelsInternal = psEnc->nChannelsInternal; 116 encStatus->API_sampleRate = state_Fxx[ 0 ].sCmn.API_fs_Hz; 117 encStatus->maxInternalSampleRate = state_Fxx[ 0 ].sCmn.maxInternal_fs_Hz; 118 encStatus->minInternalSampleRate = state_Fxx[ 0 ].sCmn.minInternal_fs_Hz; 119 encStatus->desiredInternalSampleRate = state_Fxx[ 0 ].sCmn.desiredInternal_fs_Hz; 120 encStatus->payloadSize_ms = state_Fxx[ 0 ].sCmn.PacketSize_ms; 121 encStatus->bitRate = state_Fxx[ 0 ].sCmn.TargetRate_bps; 122 encStatus->packetLossPercentage = state_Fxx[ 0 ].sCmn.PacketLoss_perc; 123 encStatus->complexity = state_Fxx[ 0 ].sCmn.Complexity; 124 encStatus->useInBandFEC = state_Fxx[ 0 ].sCmn.useInBandFEC; 125 encStatus->useDTX = state_Fxx[ 0 ].sCmn.useDTX; 126 encStatus->useCBR = state_Fxx[ 0 ].sCmn.useCBR; 127 encStatus->internalSampleRate = silk_SMULBB( state_Fxx[ 0 ].sCmn.fs_kHz, 1000 ); 128 encStatus->allowBandwidthSwitch = state_Fxx[ 0 ].sCmn.allow_bandwidth_switch; 129 encStatus->inWBmodeWithoutVariableLP = state_Fxx[ 0 ].sCmn.fs_kHz == 16 && state_Fxx[ 0 ].sCmn.sLP.mode == 0; 130 131 return ret; 132 } 133 134 135 /**************************/ 136 /* Encode frame with Silk */ 137 /**************************/ 138 /* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what */ 139 /* encControl->payloadSize_ms is set to */ 140 opus_int silk_Encode( /* O Returns error code */ 141 void *encState, /* I/O State */ 142 silk_EncControlStruct *encControl, /* I Control status */ 143 const opus_int16 *samplesIn, /* I Speech sample input vector */ 144 opus_int nSamplesIn, /* I Number of samples in input vector */ 145 ec_enc *psRangeEnc, /* I/O Compressor data structure */ 146 opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */ 147 const opus_int prefillFlag, /* I Flag to indicate prefilling buffers no coding */ 148 opus_int activity /* I Decision of Opus voice activity detector */ 149 ) 150 { 151 opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0; 152 opus_int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms; 153 opus_int nSamplesFromInput = 0, nSamplesFromInputMax; 154 opus_int speech_act_thr_for_switch_Q8; 155 opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum; 156 silk_encoder *psEnc = ( silk_encoder * )encState; 157 VARDECL( opus_int16, buf ); 158 opus_int transition, curr_block, tot_blocks; 159 SAVE_STACK; 160 161 if (encControl->reducedDependency) 162 { 163 psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1; 164 psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1; 165 } 166 psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0; 167 168 /* Check values in encoder control structure */ 169 if( ( ret = check_control_input( encControl ) ) != 0 ) { 170 celt_assert( 0 ); 171 RESTORE_STACK; 172 return ret; 173 } 174 175 encControl->switchReady = 0; 176 177 if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) { 178 /* Mono -> Stereo transition: init state of second channel and stereo state */ 179 ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch ); 180 silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) ); 181 silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) ); 182 psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0; 183 psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1; 184 psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0; 185 psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1; 186 psEnc->sStereo.width_prev_Q14 = 0; 187 psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 ); 188 if( psEnc->nChannelsAPI == 2 ) { 189 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof( silk_resampler_state_struct ) ); 190 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.In_HP_State, &psEnc->state_Fxx[ 0 ].sCmn.In_HP_State, sizeof( psEnc->state_Fxx[ 1 ].sCmn.In_HP_State ) ); 191 } 192 } 193 194 transition = (encControl->payloadSize_ms != psEnc->state_Fxx[ 0 ].sCmn.PacketSize_ms) || (psEnc->nChannelsInternal != encControl->nChannelsInternal); 195 196 psEnc->nChannelsAPI = encControl->nChannelsAPI; 197 psEnc->nChannelsInternal = encControl->nChannelsInternal; 198 199 nBlocksOf10ms = silk_DIV32( 100 * nSamplesIn, encControl->API_sampleRate ); 200 tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1; 201 curr_block = 0; 202 if( prefillFlag ) { 203 silk_LP_state save_LP; 204 /* Only accept input length of 10 ms */ 205 if( nBlocksOf10ms != 1 ) { 206 celt_assert( 0 ); 207 RESTORE_STACK; 208 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; 209 } 210 if ( prefillFlag == 2 ) { 211 save_LP = psEnc->state_Fxx[ 0 ].sCmn.sLP; 212 /* Save the sampling rate so the bandwidth switching code can keep handling transitions. */ 213 save_LP.saved_fs_kHz = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz; 214 } 215 /* Reset Encoder */ 216 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 217 ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch ); 218 /* Restore the variable LP state. */ 219 if ( prefillFlag == 2 ) { 220 psEnc->state_Fxx[ n ].sCmn.sLP = save_LP; 221 } 222 celt_assert( !ret ); 223 } 224 tmp_payloadSize_ms = encControl->payloadSize_ms; 225 encControl->payloadSize_ms = 10; 226 tmp_complexity = encControl->complexity; 227 encControl->complexity = 0; 228 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 229 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; 230 psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1; 231 } 232 } else { 233 /* Only accept input lengths that are a multiple of 10 ms */ 234 if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) { 235 celt_assert( 0 ); 236 RESTORE_STACK; 237 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; 238 } 239 /* Make sure no more than one packet can be produced */ 240 if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) { 241 celt_assert( 0 ); 242 RESTORE_STACK; 243 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; 244 } 245 } 246 247 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 248 /* Force the side channel to the same rate as the mid */ 249 opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0; 250 if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) { 251 silk_assert( 0 ); 252 RESTORE_STACK; 253 return ret; 254 } 255 if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) { 256 for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) { 257 psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0; 258 } 259 } 260 psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX; 261 } 262 celt_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); 263 264 /* Input buffering/resampling and encoding */ 265 nSamplesToBufferMax = 266 10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz; 267 nSamplesFromInputMax = 268 silk_DIV32_16( nSamplesToBufferMax * 269 psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, 270 psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 ); 271 ALLOC( buf, nSamplesFromInputMax, opus_int16 ); 272 while( 1 ) { 273 nSamplesToBuffer = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx; 274 nSamplesToBuffer = silk_min( nSamplesToBuffer, nSamplesToBufferMax ); 275 nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 ); 276 /* Resample and write to buffer */ 277 if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) { 278 opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded; 279 for( n = 0; n < nSamplesFromInput; n++ ) { 280 buf[ n ] = samplesIn[ 2 * n ]; 281 } 282 /* Making sure to start both resamplers from the same state when switching from mono to stereo */ 283 if( psEnc->nPrevChannelsInternal == 1 && id==0 ) { 284 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state)); 285 } 286 287 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, 288 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); 289 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; 290 291 nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx; 292 nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); 293 for( n = 0; n < nSamplesFromInput; n++ ) { 294 buf[ n ] = samplesIn[ 2 * n + 1 ]; 295 } 296 ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, 297 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); 298 299 psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer; 300 } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) { 301 /* Combine left and right channels before resampling */ 302 for( n = 0; n < nSamplesFromInput; n++ ) { 303 sum = samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ]; 304 buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 ); 305 } 306 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, 307 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); 308 /* On the first mono frame, average the results for the two resampler states */ 309 if( psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 ) { 310 ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, 311 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); 312 for( n = 0; n < psEnc->state_Fxx[ 0 ].sCmn.frame_length; n++ ) { 313 psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] = 314 silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] 315 + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1); 316 } 317 } 318 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; 319 } else { 320 celt_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 ); 321 silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16)); 322 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, 323 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); 324 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; 325 } 326 327 samplesIn += nSamplesFromInput * encControl->nChannelsAPI; 328 nSamplesIn -= nSamplesFromInput; 329 330 /* Default */ 331 psEnc->allowBandwidthSwitch = 0; 332 333 /* Silk encoder */ 334 if( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx >= psEnc->state_Fxx[ 0 ].sCmn.frame_length ) { 335 /* Enough data in input buffer, so encode */ 336 celt_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length ); 337 celt_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length ); 338 339 /* Deal with LBRR data */ 340 if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 && !prefillFlag ) { 341 /* Create space at start of payload for VAD and FEC flags */ 342 opus_uint8 iCDF[ 2 ] = { 0, 0 }; 343 iCDF[ 0 ] = 256 - silk_RSHIFT( 256, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal ); 344 ec_enc_icdf( psRangeEnc, 0, iCDF, 8 ); 345 346 /* Encode any LBRR data from previous packet */ 347 /* Encode LBRR flags */ 348 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 349 LBRR_symbol = 0; 350 for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) { 351 LBRR_symbol |= silk_LSHIFT( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ], i ); 352 } 353 psEnc->state_Fxx[ n ].sCmn.LBRR_flag = LBRR_symbol > 0 ? 1 : 0; 354 if( LBRR_symbol && psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket > 1 ) { 355 ec_enc_icdf( psRangeEnc, LBRR_symbol - 1, silk_LBRR_flags_iCDF_ptr[ psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket - 2 ], 8 ); 356 } 357 } 358 359 /* Code LBRR indices and excitation signals */ 360 for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) { 361 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 362 if( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] ) { 363 opus_int condCoding; 364 365 if( encControl->nChannelsInternal == 2 && n == 0 ) { 366 silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ i ] ); 367 /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */ 368 if( psEnc->state_Fxx[ 1 ].sCmn.LBRR_flags[ i ] == 0 ) { 369 silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ i ] ); 370 } 371 } 372 /* Use conditional coding if previous frame available */ 373 if( i > 0 && psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i - 1 ] ) { 374 condCoding = CODE_CONDITIONALLY; 375 } else { 376 condCoding = CODE_INDEPENDENTLY; 377 } 378 silk_encode_indices( &psEnc->state_Fxx[ n ].sCmn, psRangeEnc, i, 1, condCoding ); 379 silk_encode_pulses( psRangeEnc, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].signalType, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].quantOffsetType, 380 psEnc->state_Fxx[ n ].sCmn.pulses_LBRR[ i ], psEnc->state_Fxx[ n ].sCmn.frame_length ); 381 } 382 } 383 } 384 385 /* Reset LBRR flags */ 386 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 387 silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) ); 388 } 389 390 psEnc->nBitsUsedLBRR = ec_tell( psRangeEnc ); 391 } 392 393 silk_HP_variable_cutoff( psEnc->state_Fxx ); 394 395 /* Total target bits for packet */ 396 nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 ); 397 /* Subtract bits used for LBRR */ 398 if( !prefillFlag ) { 399 nBits -= psEnc->nBitsUsedLBRR; 400 } 401 /* Divide by number of uncoded frames left in packet */ 402 nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket ); 403 /* Convert to bits/second */ 404 if( encControl->payloadSize_ms == 10 ) { 405 TargetRate_bps = silk_SMULBB( nBits, 100 ); 406 } else { 407 TargetRate_bps = silk_SMULBB( nBits, 50 ); 408 } 409 /* Subtract fraction of bits in excess of target in previous frames and packets */ 410 TargetRate_bps -= silk_DIV32_16( silk_MUL( psEnc->nBitsExceeded, 1000 ), BITRESERVOIR_DECAY_TIME_MS ); 411 if( !prefillFlag && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded > 0 ) { 412 /* Compare actual vs target bits so far in this packet */ 413 opus_int32 bitsBalance = ec_tell( psRangeEnc ) - psEnc->nBitsUsedLBRR - nBits * psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded; 414 TargetRate_bps -= silk_DIV32_16( silk_MUL( bitsBalance, 1000 ), BITRESERVOIR_DECAY_TIME_MS ); 415 } 416 /* Never exceed input bitrate */ 417 TargetRate_bps = silk_LIMIT( TargetRate_bps, encControl->bitRate, 5000 ); 418 419 /* Convert Left/Right to Mid/Side */ 420 if( encControl->nChannelsInternal == 2 ) { 421 silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ], 422 psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], 423 MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono, 424 psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length ); 425 if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) { 426 /* Reset side channel encoder memory for first frame with side coding */ 427 if( psEnc->prev_decode_only_middle == 1 ) { 428 silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) ); 429 silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) ); 430 silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) ); 431 silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) ); 432 psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100; 433 psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100; 434 psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10; 435 psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY; 436 psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16 = 65536; 437 psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1; 438 } 439 silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ], activity ); 440 } else { 441 psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0; 442 } 443 if( !prefillFlag ) { 444 silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] ); 445 if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) { 446 silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] ); 447 } 448 } 449 } else { 450 /* Buffering */ 451 silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) ); 452 silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) ); 453 } 454 silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ], activity ); 455 456 /* Encode */ 457 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 458 opus_int maxBits, useCBR; 459 460 /* Handling rate constraints */ 461 maxBits = encControl->maxBits; 462 if( tot_blocks == 2 && curr_block == 0 ) { 463 maxBits = maxBits * 3 / 5; 464 } else if( tot_blocks == 3 ) { 465 if( curr_block == 0 ) { 466 maxBits = maxBits * 2 / 5; 467 } else if( curr_block == 1 ) { 468 maxBits = maxBits * 3 / 4; 469 } 470 } 471 useCBR = encControl->useCBR && curr_block == tot_blocks - 1; 472 473 if( encControl->nChannelsInternal == 1 ) { 474 channelRate_bps = TargetRate_bps; 475 } else { 476 channelRate_bps = MStargetRates_bps[ n ]; 477 if( n == 0 && MStargetRates_bps[ 1 ] > 0 ) { 478 useCBR = 0; 479 /* Give mid up to 1/2 of the max bits for that frame */ 480 maxBits -= encControl->maxBits / ( tot_blocks * 2 ); 481 } 482 } 483 484 if( channelRate_bps > 0 ) { 485 opus_int condCoding; 486 487 silk_control_SNR( &psEnc->state_Fxx[ n ].sCmn, channelRate_bps ); 488 489 /* Use independent coding if no previous frame available */ 490 if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - n <= 0 ) { 491 condCoding = CODE_INDEPENDENTLY; 492 } else if( n > 0 && psEnc->prev_decode_only_middle ) { 493 /* If we skipped a side frame in this packet, we don't 494 need LTP scaling; the LTP state is well-defined. */ 495 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; 496 } else { 497 condCoding = CODE_CONDITIONALLY; 498 } 499 if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding, maxBits, useCBR ) ) != 0 ) { 500 silk_assert( 0 ); 501 } 502 } 503 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; 504 psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0; 505 psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++; 506 } 507 psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - 1 ]; 508 509 /* Insert VAD and FEC flags at beginning of bitstream */ 510 if( *nBytesOut > 0 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket) { 511 flags = 0; 512 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 513 for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) { 514 flags = silk_LSHIFT( flags, 1 ); 515 flags |= psEnc->state_Fxx[ n ].sCmn.VAD_flags[ i ]; 516 } 517 flags = silk_LSHIFT( flags, 1 ); 518 flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag; 519 } 520 if( !prefillFlag ) { 521 ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal ); 522 } 523 524 /* Return zero bytes if all channels DTXed */ 525 if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) { 526 *nBytesOut = 0; 527 } 528 529 psEnc->nBitsExceeded += *nBytesOut * 8; 530 psEnc->nBitsExceeded -= silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 ); 531 psEnc->nBitsExceeded = silk_LIMIT( psEnc->nBitsExceeded, 0, 10000 ); 532 533 /* Update flag indicating if bandwidth switching is allowed */ 534 speech_act_thr_for_switch_Q8 = silk_SMLAWB( SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ), 535 SILK_FIX_CONST( ( 1 - SPEECH_ACTIVITY_DTX_THRES ) / MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8 ), psEnc->timeSinceSwitchAllowed_ms ); 536 if( psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8 < speech_act_thr_for_switch_Q8 ) { 537 psEnc->allowBandwidthSwitch = 1; 538 psEnc->timeSinceSwitchAllowed_ms = 0; 539 } else { 540 psEnc->allowBandwidthSwitch = 0; 541 psEnc->timeSinceSwitchAllowed_ms += encControl->payloadSize_ms; 542 } 543 } 544 545 if( nSamplesIn == 0 ) { 546 break; 547 } 548 } else { 549 break; 550 } 551 curr_block++; 552 } 553 554 psEnc->nPrevChannelsInternal = encControl->nChannelsInternal; 555 556 encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch; 557 encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0; 558 encControl->internalSampleRate = silk_SMULBB( psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, 1000 ); 559 encControl->stereoWidth_Q14 = encControl->toMono ? 0 : psEnc->sStereo.smth_width_Q14; 560 if( prefillFlag ) { 561 encControl->payloadSize_ms = tmp_payloadSize_ms; 562 encControl->complexity = tmp_complexity; 563 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 564 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; 565 psEnc->state_Fxx[ n ].sCmn.prefillFlag = 0; 566 } 567 } 568 569 encControl->signalType = psEnc->state_Fxx[0].sCmn.indices.signalType; 570 encControl->offset = silk_Quantization_Offsets_Q10 571 [ psEnc->state_Fxx[0].sCmn.indices.signalType >> 1 ] 572 [ psEnc->state_Fxx[0].sCmn.indices.quantOffsetType ]; 573 RESTORE_STACK; 574 return ret; 575 } 576 577