1 /*********************************************************************** 2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. 3 Redistribution and use in source and binary forms, with or without 4 modification, are permitted provided that the following conditions 5 are met: 6 - Redistributions of source code must retain the above copyright notice, 7 this list of conditions and the following disclaimer. 8 - Redistributions in binary form must reproduce the above copyright 9 notice, this list of conditions and the following disclaimer in the 10 documentation and/or other materials provided with the distribution. 11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 12 names of specific contributors, may be used to endorse or promote 13 products derived from this software without specific prior written 14 permission. 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 POSSIBILITY OF SUCH DAMAGE. 26 ***********************************************************************/ 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 #include "define.h" 32 #include "API.h" 33 #include "control.h" 34 #include "typedef.h" 35 #include "stack_alloc.h" 36 #include "structs.h" 37 #include "tuning_parameters.h" 38 #ifdef FIXED_POINT 39 #include "main_FIX.h" 40 #else 41 #include "main_FLP.h" 42 #endif 43 44 /***************************************/ 45 /* Read control structure from encoder */ 46 /***************************************/ 47 static opus_int silk_QueryEncoder( /* O Returns error code */ 48 const void *encState, /* I State */ 49 silk_EncControlStruct *encStatus /* O Encoder Status */ 50 ); 51 52 /****************************************/ 53 /* Encoder functions */ 54 /****************************************/ 55 56 opus_int silk_Get_Encoder_Size( /* O Returns error code */ 57 opus_int *encSizeBytes /* O Number of bytes in SILK encoder state */ 58 ) 59 { 60 opus_int ret = SILK_NO_ERROR; 61 62 *encSizeBytes = sizeof( silk_encoder ); 63 64 return ret; 65 } 66 67 /*************************/ 68 /* Init or Reset encoder */ 69 /*************************/ 70 opus_int silk_InitEncoder( /* O Returns error code */ 71 void *encState, /* I/O State */ 72 int arch, /* I Run-time architecture */ 73 silk_EncControlStruct *encStatus /* O Encoder Status */ 74 ) 75 { 76 silk_encoder *psEnc; 77 opus_int n, ret = SILK_NO_ERROR; 78 79 psEnc = (silk_encoder *)encState; 80 81 /* Reset encoder */ 82 silk_memset( psEnc, 0, sizeof( silk_encoder ) ); 83 for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) { 84 if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) { 85 silk_assert( 0 ); 86 } 87 } 88 89 psEnc->nChannelsAPI = 1; 90 psEnc->nChannelsInternal = 1; 91 92 /* Read control structure */ 93 if( ret += silk_QueryEncoder( encState, encStatus ) ) { 94 silk_assert( 0 ); 95 } 96 97 return ret; 98 } 99 100 /***************************************/ 101 /* Read control structure from encoder */ 102 /***************************************/ 103 static opus_int silk_QueryEncoder( /* O Returns error code */ 104 const void *encState, /* I State */ 105 silk_EncControlStruct *encStatus /* O Encoder Status */ 106 ) 107 { 108 opus_int ret = SILK_NO_ERROR; 109 silk_encoder_state_Fxx *state_Fxx; 110 silk_encoder *psEnc = (silk_encoder *)encState; 111 112 state_Fxx = psEnc->state_Fxx; 113 114 encStatus->nChannelsAPI = psEnc->nChannelsAPI; 115 encStatus->nChannelsInternal = psEnc->nChannelsInternal; 116 encStatus->API_sampleRate = state_Fxx[ 0 ].sCmn.API_fs_Hz; 117 encStatus->maxInternalSampleRate = state_Fxx[ 0 ].sCmn.maxInternal_fs_Hz; 118 encStatus->minInternalSampleRate = state_Fxx[ 0 ].sCmn.minInternal_fs_Hz; 119 encStatus->desiredInternalSampleRate = state_Fxx[ 0 ].sCmn.desiredInternal_fs_Hz; 120 encStatus->payloadSize_ms = state_Fxx[ 0 ].sCmn.PacketSize_ms; 121 encStatus->bitRate = state_Fxx[ 0 ].sCmn.TargetRate_bps; 122 encStatus->packetLossPercentage = state_Fxx[ 0 ].sCmn.PacketLoss_perc; 123 encStatus->complexity = state_Fxx[ 0 ].sCmn.Complexity; 124 encStatus->useInBandFEC = state_Fxx[ 0 ].sCmn.useInBandFEC; 125 encStatus->useDTX = state_Fxx[ 0 ].sCmn.useDTX; 126 encStatus->useCBR = state_Fxx[ 0 ].sCmn.useCBR; 127 encStatus->internalSampleRate = silk_SMULBB( state_Fxx[ 0 ].sCmn.fs_kHz, 1000 ); 128 encStatus->allowBandwidthSwitch = state_Fxx[ 0 ].sCmn.allow_bandwidth_switch; 129 encStatus->inWBmodeWithoutVariableLP = state_Fxx[ 0 ].sCmn.fs_kHz == 16 && state_Fxx[ 0 ].sCmn.sLP.mode == 0; 130 131 return ret; 132 } 133 134 135 /**************************/ 136 /* Encode frame with Silk */ 137 /**************************/ 138 /* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what */ 139 /* encControl->payloadSize_ms is set to */ 140 opus_int silk_Encode( /* O Returns error code */ 141 void *encState, /* I/O State */ 142 silk_EncControlStruct *encControl, /* I Control status */ 143 const opus_int16 *samplesIn, /* I Speech sample input vector */ 144 opus_int nSamplesIn, /* I Number of samples in input vector */ 145 ec_enc *psRangeEnc, /* I/O Compressor data structure */ 146 opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */ 147 const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */ 148 ) 149 { 150 opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0; 151 opus_int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms; 152 opus_int nSamplesFromInput = 0, nSamplesFromInputMax; 153 opus_int speech_act_thr_for_switch_Q8; 154 opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum; 155 silk_encoder *psEnc = ( silk_encoder * )encState; 156 VARDECL( opus_int16, buf ); 157 opus_int transition, curr_block, tot_blocks; 158 SAVE_STACK; 159 160 if (encControl->reducedDependency) 161 { 162 psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1; 163 psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1; 164 } 165 psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0; 166 167 /* Check values in encoder control structure */ 168 if( ( ret = check_control_input( encControl ) ) != 0 ) { 169 silk_assert( 0 ); 170 RESTORE_STACK; 171 return ret; 172 } 173 174 encControl->switchReady = 0; 175 176 if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) { 177 /* Mono -> Stereo transition: init state of second channel and stereo state */ 178 ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch ); 179 silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) ); 180 silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) ); 181 psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0; 182 psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1; 183 psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0; 184 psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1; 185 psEnc->sStereo.width_prev_Q14 = 0; 186 psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 ); 187 if( psEnc->nChannelsAPI == 2 ) { 188 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof( silk_resampler_state_struct ) ); 189 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.In_HP_State, &psEnc->state_Fxx[ 0 ].sCmn.In_HP_State, sizeof( psEnc->state_Fxx[ 1 ].sCmn.In_HP_State ) ); 190 } 191 } 192 193 transition = (encControl->payloadSize_ms != psEnc->state_Fxx[ 0 ].sCmn.PacketSize_ms) || (psEnc->nChannelsInternal != encControl->nChannelsInternal); 194 195 psEnc->nChannelsAPI = encControl->nChannelsAPI; 196 psEnc->nChannelsInternal = encControl->nChannelsInternal; 197 198 nBlocksOf10ms = silk_DIV32( 100 * nSamplesIn, encControl->API_sampleRate ); 199 tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1; 200 curr_block = 0; 201 if( prefillFlag ) { 202 /* Only accept input length of 10 ms */ 203 if( nBlocksOf10ms != 1 ) { 204 silk_assert( 0 ); 205 RESTORE_STACK; 206 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; 207 } 208 /* Reset Encoder */ 209 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 210 ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch ); 211 silk_assert( !ret ); 212 } 213 tmp_payloadSize_ms = encControl->payloadSize_ms; 214 encControl->payloadSize_ms = 10; 215 tmp_complexity = encControl->complexity; 216 encControl->complexity = 0; 217 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 218 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; 219 psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1; 220 } 221 } else { 222 /* Only accept input lengths that are a multiple of 10 ms */ 223 if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) { 224 silk_assert( 0 ); 225 RESTORE_STACK; 226 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; 227 } 228 /* Make sure no more than one packet can be produced */ 229 if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) { 230 silk_assert( 0 ); 231 RESTORE_STACK; 232 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; 233 } 234 } 235 236 TargetRate_bps = silk_RSHIFT32( encControl->bitRate, encControl->nChannelsInternal - 1 ); 237 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 238 /* Force the side channel to the same rate as the mid */ 239 opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0; 240 if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, TargetRate_bps, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) { 241 silk_assert( 0 ); 242 RESTORE_STACK; 243 return ret; 244 } 245 if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) { 246 for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) { 247 psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0; 248 } 249 } 250 psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX; 251 } 252 silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); 253 254 /* Input buffering/resampling and encoding */ 255 nSamplesToBufferMax = 256 10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz; 257 nSamplesFromInputMax = 258 silk_DIV32_16( nSamplesToBufferMax * 259 psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, 260 psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 ); 261 ALLOC( buf, nSamplesFromInputMax, opus_int16 ); 262 while( 1 ) { 263 nSamplesToBuffer = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx; 264 nSamplesToBuffer = silk_min( nSamplesToBuffer, nSamplesToBufferMax ); 265 nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 ); 266 /* Resample and write to buffer */ 267 if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) { 268 opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded; 269 for( n = 0; n < nSamplesFromInput; n++ ) { 270 buf[ n ] = samplesIn[ 2 * n ]; 271 } 272 /* Making sure to start both resamplers from the same state when switching from mono to stereo */ 273 if( psEnc->nPrevChannelsInternal == 1 && id==0 ) { 274 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state)); 275 } 276 277 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, 278 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); 279 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; 280 281 nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx; 282 nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); 283 for( n = 0; n < nSamplesFromInput; n++ ) { 284 buf[ n ] = samplesIn[ 2 * n + 1 ]; 285 } 286 ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, 287 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); 288 289 psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer; 290 } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) { 291 /* Combine left and right channels before resampling */ 292 for( n = 0; n < nSamplesFromInput; n++ ) { 293 sum = samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ]; 294 buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 ); 295 } 296 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, 297 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); 298 /* On the first mono frame, average the results for the two resampler states */ 299 if( psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 ) { 300 ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, 301 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); 302 for( n = 0; n < psEnc->state_Fxx[ 0 ].sCmn.frame_length; n++ ) { 303 psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] = 304 silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] 305 + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1); 306 } 307 } 308 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; 309 } else { 310 silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 ); 311 silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16)); 312 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, 313 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); 314 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; 315 } 316 317 samplesIn += nSamplesFromInput * encControl->nChannelsAPI; 318 nSamplesIn -= nSamplesFromInput; 319 320 /* Default */ 321 psEnc->allowBandwidthSwitch = 0; 322 323 /* Silk encoder */ 324 if( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx >= psEnc->state_Fxx[ 0 ].sCmn.frame_length ) { 325 /* Enough data in input buffer, so encode */ 326 silk_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length ); 327 silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length ); 328 329 /* Deal with LBRR data */ 330 if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 && !prefillFlag ) { 331 /* Create space at start of payload for VAD and FEC flags */ 332 opus_uint8 iCDF[ 2 ] = { 0, 0 }; 333 iCDF[ 0 ] = 256 - silk_RSHIFT( 256, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal ); 334 ec_enc_icdf( psRangeEnc, 0, iCDF, 8 ); 335 336 /* Encode any LBRR data from previous packet */ 337 /* Encode LBRR flags */ 338 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 339 LBRR_symbol = 0; 340 for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) { 341 LBRR_symbol |= silk_LSHIFT( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ], i ); 342 } 343 psEnc->state_Fxx[ n ].sCmn.LBRR_flag = LBRR_symbol > 0 ? 1 : 0; 344 if( LBRR_symbol && psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket > 1 ) { 345 ec_enc_icdf( psRangeEnc, LBRR_symbol - 1, silk_LBRR_flags_iCDF_ptr[ psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket - 2 ], 8 ); 346 } 347 } 348 349 /* Code LBRR indices and excitation signals */ 350 for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) { 351 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 352 if( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] ) { 353 opus_int condCoding; 354 355 if( encControl->nChannelsInternal == 2 && n == 0 ) { 356 silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ i ] ); 357 /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */ 358 if( psEnc->state_Fxx[ 1 ].sCmn.LBRR_flags[ i ] == 0 ) { 359 silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ i ] ); 360 } 361 } 362 /* Use conditional coding if previous frame available */ 363 if( i > 0 && psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i - 1 ] ) { 364 condCoding = CODE_CONDITIONALLY; 365 } else { 366 condCoding = CODE_INDEPENDENTLY; 367 } 368 silk_encode_indices( &psEnc->state_Fxx[ n ].sCmn, psRangeEnc, i, 1, condCoding ); 369 silk_encode_pulses( psRangeEnc, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].signalType, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].quantOffsetType, 370 psEnc->state_Fxx[ n ].sCmn.pulses_LBRR[ i ], psEnc->state_Fxx[ n ].sCmn.frame_length ); 371 } 372 } 373 } 374 375 /* Reset LBRR flags */ 376 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 377 silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) ); 378 } 379 380 psEnc->nBitsUsedLBRR = ec_tell( psRangeEnc ); 381 } 382 383 silk_HP_variable_cutoff( psEnc->state_Fxx ); 384 385 /* Total target bits for packet */ 386 nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 ); 387 /* Subtract bits used for LBRR */ 388 if( !prefillFlag ) { 389 nBits -= psEnc->nBitsUsedLBRR; 390 } 391 /* Divide by number of uncoded frames left in packet */ 392 nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket ); 393 /* Convert to bits/second */ 394 if( encControl->payloadSize_ms == 10 ) { 395 TargetRate_bps = silk_SMULBB( nBits, 100 ); 396 } else { 397 TargetRate_bps = silk_SMULBB( nBits, 50 ); 398 } 399 /* Subtract fraction of bits in excess of target in previous frames and packets */ 400 TargetRate_bps -= silk_DIV32_16( silk_MUL( psEnc->nBitsExceeded, 1000 ), BITRESERVOIR_DECAY_TIME_MS ); 401 if( !prefillFlag && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded > 0 ) { 402 /* Compare actual vs target bits so far in this packet */ 403 opus_int32 bitsBalance = ec_tell( psRangeEnc ) - psEnc->nBitsUsedLBRR - nBits * psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded; 404 TargetRate_bps -= silk_DIV32_16( silk_MUL( bitsBalance, 1000 ), BITRESERVOIR_DECAY_TIME_MS ); 405 } 406 /* Never exceed input bitrate */ 407 TargetRate_bps = silk_LIMIT( TargetRate_bps, encControl->bitRate, 5000 ); 408 409 /* Convert Left/Right to Mid/Side */ 410 if( encControl->nChannelsInternal == 2 ) { 411 silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ], 412 psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], 413 MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono, 414 psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length ); 415 if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) { 416 /* Reset side channel encoder memory for first frame with side coding */ 417 if( psEnc->prev_decode_only_middle == 1 ) { 418 silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) ); 419 silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt, 0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) ); 420 silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) ); 421 silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) ); 422 silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) ); 423 psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100; 424 psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100; 425 psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10; 426 psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY; 427 psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16 = 65536; 428 psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1; 429 } 430 silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] ); 431 } else { 432 psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0; 433 } 434 if( !prefillFlag ) { 435 silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] ); 436 if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) { 437 silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] ); 438 } 439 } 440 } else { 441 /* Buffering */ 442 silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) ); 443 silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) ); 444 } 445 silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] ); 446 447 /* Encode */ 448 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 449 opus_int maxBits, useCBR; 450 451 /* Handling rate constraints */ 452 maxBits = encControl->maxBits; 453 if( tot_blocks == 2 && curr_block == 0 ) { 454 maxBits = maxBits * 3 / 5; 455 } else if( tot_blocks == 3 ) { 456 if( curr_block == 0 ) { 457 maxBits = maxBits * 2 / 5; 458 } else if( curr_block == 1 ) { 459 maxBits = maxBits * 3 / 4; 460 } 461 } 462 useCBR = encControl->useCBR && curr_block == tot_blocks - 1; 463 464 if( encControl->nChannelsInternal == 1 ) { 465 channelRate_bps = TargetRate_bps; 466 } else { 467 channelRate_bps = MStargetRates_bps[ n ]; 468 if( n == 0 && MStargetRates_bps[ 1 ] > 0 ) { 469 useCBR = 0; 470 /* Give mid up to 1/2 of the max bits for that frame */ 471 maxBits -= encControl->maxBits / ( tot_blocks * 2 ); 472 } 473 } 474 475 if( channelRate_bps > 0 ) { 476 opus_int condCoding; 477 478 silk_control_SNR( &psEnc->state_Fxx[ n ].sCmn, channelRate_bps ); 479 480 /* Use independent coding if no previous frame available */ 481 if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - n <= 0 ) { 482 condCoding = CODE_INDEPENDENTLY; 483 } else if( n > 0 && psEnc->prev_decode_only_middle ) { 484 /* If we skipped a side frame in this packet, we don't 485 need LTP scaling; the LTP state is well-defined. */ 486 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; 487 } else { 488 condCoding = CODE_CONDITIONALLY; 489 } 490 if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding, maxBits, useCBR ) ) != 0 ) { 491 silk_assert( 0 ); 492 } 493 } 494 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; 495 psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0; 496 psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++; 497 } 498 psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - 1 ]; 499 500 /* Insert VAD and FEC flags at beginning of bitstream */ 501 if( *nBytesOut > 0 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket) { 502 flags = 0; 503 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 504 for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) { 505 flags = silk_LSHIFT( flags, 1 ); 506 flags |= psEnc->state_Fxx[ n ].sCmn.VAD_flags[ i ]; 507 } 508 flags = silk_LSHIFT( flags, 1 ); 509 flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag; 510 } 511 if( !prefillFlag ) { 512 ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal ); 513 } 514 515 /* Return zero bytes if all channels DTXed */ 516 if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) { 517 *nBytesOut = 0; 518 } 519 520 psEnc->nBitsExceeded += *nBytesOut * 8; 521 psEnc->nBitsExceeded -= silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 ); 522 psEnc->nBitsExceeded = silk_LIMIT( psEnc->nBitsExceeded, 0, 10000 ); 523 524 /* Update flag indicating if bandwidth switching is allowed */ 525 speech_act_thr_for_switch_Q8 = silk_SMLAWB( SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ), 526 SILK_FIX_CONST( ( 1 - SPEECH_ACTIVITY_DTX_THRES ) / MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8 ), psEnc->timeSinceSwitchAllowed_ms ); 527 if( psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8 < speech_act_thr_for_switch_Q8 ) { 528 psEnc->allowBandwidthSwitch = 1; 529 psEnc->timeSinceSwitchAllowed_ms = 0; 530 } else { 531 psEnc->allowBandwidthSwitch = 0; 532 psEnc->timeSinceSwitchAllowed_ms += encControl->payloadSize_ms; 533 } 534 } 535 536 if( nSamplesIn == 0 ) { 537 break; 538 } 539 } else { 540 break; 541 } 542 curr_block++; 543 } 544 545 psEnc->nPrevChannelsInternal = encControl->nChannelsInternal; 546 547 encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch; 548 encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0; 549 encControl->internalSampleRate = silk_SMULBB( psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, 1000 ); 550 encControl->stereoWidth_Q14 = encControl->toMono ? 0 : psEnc->sStereo.smth_width_Q14; 551 if( prefillFlag ) { 552 encControl->payloadSize_ms = tmp_payloadSize_ms; 553 encControl->complexity = tmp_complexity; 554 for( n = 0; n < encControl->nChannelsInternal; n++ ) { 555 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; 556 psEnc->state_Fxx[ n ].sCmn.prefillFlag = 0; 557 } 558 } 559 560 RESTORE_STACK; 561 return ret; 562 } 563 564