1 /*********************************************************************** 2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. 3 Redistribution and use in source and binary forms, with or without 4 modification, are permitted provided that the following conditions 5 are met: 6 - Redistributions of source code must retain the above copyright notice, 7 this list of conditions and the following disclaimer. 8 - Redistributions in binary form must reproduce the above copyright 9 notice, this list of conditions and the following disclaimer in the 10 documentation and/or other materials provided with the distribution. 11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 12 names of specific contributors, may be used to endorse or promote 13 products derived from this software without specific prior written 14 permission. 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 POSSIBILITY OF SUCH DAMAGE. 26 ***********************************************************************/ 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 #ifdef FIXED_POINT 32 #include "main_FIX.h" 33 #define silk_encoder_state_Fxx silk_encoder_state_FIX 34 #else 35 #include "main_FLP.h" 36 #define silk_encoder_state_Fxx silk_encoder_state_FLP 37 #endif 38 #include "stack_alloc.h" 39 #include "tuning_parameters.h" 40 #include "pitch_est_defines.h" 41 42 static opus_int silk_setup_resamplers( 43 silk_encoder_state_Fxx *psEnc, /* I/O */ 44 opus_int fs_kHz /* I */ 45 ); 46 47 static opus_int silk_setup_fs( 48 silk_encoder_state_Fxx *psEnc, /* I/O */ 49 opus_int fs_kHz, /* I */ 50 opus_int PacketSize_ms /* I */ 51 ); 52 53 static opus_int silk_setup_complexity( 54 silk_encoder_state *psEncC, /* I/O */ 55 opus_int Complexity /* I */ 56 ); 57 58 static OPUS_INLINE opus_int silk_setup_LBRR( 59 silk_encoder_state *psEncC, /* I/O */ 60 const silk_EncControlStruct *encControl /* I */ 61 ); 62 63 64 /* Control encoder */ 65 opus_int silk_control_encoder( 66 silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */ 67 silk_EncControlStruct *encControl, /* I Control structure */ 68 const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ 69 const opus_int channelNb, /* I Channel number */ 70 const opus_int force_fs_kHz 71 ) 72 { 73 opus_int fs_kHz, ret = 0; 74 75 psEnc->sCmn.useDTX = encControl->useDTX; 76 psEnc->sCmn.useCBR = encControl->useCBR; 77 psEnc->sCmn.API_fs_Hz = encControl->API_sampleRate; 78 psEnc->sCmn.maxInternal_fs_Hz = encControl->maxInternalSampleRate; 79 psEnc->sCmn.minInternal_fs_Hz = encControl->minInternalSampleRate; 80 psEnc->sCmn.desiredInternal_fs_Hz = encControl->desiredInternalSampleRate; 81 psEnc->sCmn.useInBandFEC = encControl->useInBandFEC; 82 psEnc->sCmn.nChannelsAPI = encControl->nChannelsAPI; 83 psEnc->sCmn.nChannelsInternal = encControl->nChannelsInternal; 84 psEnc->sCmn.allow_bandwidth_switch = allow_bw_switch; 85 psEnc->sCmn.channelNb = channelNb; 86 87 if( psEnc->sCmn.controlled_since_last_payload != 0 && psEnc->sCmn.prefillFlag == 0 ) { 88 if( psEnc->sCmn.API_fs_Hz != psEnc->sCmn.prev_API_fs_Hz && psEnc->sCmn.fs_kHz > 0 ) { 89 /* Change in API sampling rate in the middle of encoding a packet */ 90 ret += silk_setup_resamplers( psEnc, psEnc->sCmn.fs_kHz ); 91 } 92 return ret; 93 } 94 95 /* Beyond this point we know that there are no previously coded frames in the payload buffer */ 96 97 /********************************************/ 98 /* Determine internal sampling rate */ 99 /********************************************/ 100 fs_kHz = silk_control_audio_bandwidth( &psEnc->sCmn, encControl ); 101 if( force_fs_kHz ) { 102 fs_kHz = force_fs_kHz; 103 } 104 /********************************************/ 105 /* Prepare resampler and buffered data */ 106 /********************************************/ 107 ret += silk_setup_resamplers( psEnc, fs_kHz ); 108 109 /********************************************/ 110 /* Set internal sampling frequency */ 111 /********************************************/ 112 ret += silk_setup_fs( psEnc, fs_kHz, encControl->payloadSize_ms ); 113 114 /********************************************/ 115 /* Set encoding complexity */ 116 /********************************************/ 117 ret += silk_setup_complexity( &psEnc->sCmn, encControl->complexity ); 118 119 /********************************************/ 120 /* Set packet loss rate measured by farend */ 121 /********************************************/ 122 psEnc->sCmn.PacketLoss_perc = encControl->packetLossPercentage; 123 124 /********************************************/ 125 /* Set LBRR usage */ 126 /********************************************/ 127 ret += silk_setup_LBRR( &psEnc->sCmn, encControl ); 128 129 psEnc->sCmn.controlled_since_last_payload = 1; 130 131 return ret; 132 } 133 134 static opus_int silk_setup_resamplers( 135 silk_encoder_state_Fxx *psEnc, /* I/O */ 136 opus_int fs_kHz /* I */ 137 ) 138 { 139 opus_int ret = SILK_NO_ERROR; 140 SAVE_STACK; 141 142 if( psEnc->sCmn.fs_kHz != fs_kHz || psEnc->sCmn.prev_API_fs_Hz != psEnc->sCmn.API_fs_Hz ) 143 { 144 if( psEnc->sCmn.fs_kHz == 0 ) { 145 /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */ 146 ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, fs_kHz * 1000, 1 ); 147 } else { 148 VARDECL( opus_int16, x_buf_API_fs_Hz ); 149 VARDECL( silk_resampler_state_struct, temp_resampler_state ); 150 #ifdef FIXED_POINT 151 opus_int16 *x_bufFIX = psEnc->x_buf; 152 #else 153 VARDECL( opus_int16, x_bufFIX ); 154 opus_int32 new_buf_samples; 155 #endif 156 opus_int32 api_buf_samples; 157 opus_int32 old_buf_samples; 158 opus_int32 buf_length_ms; 159 160 buf_length_ms = silk_LSHIFT( psEnc->sCmn.nb_subfr * 5, 1 ) + LA_SHAPE_MS; 161 old_buf_samples = buf_length_ms * psEnc->sCmn.fs_kHz; 162 163 #ifndef FIXED_POINT 164 new_buf_samples = buf_length_ms * fs_kHz; 165 ALLOC( x_bufFIX, silk_max( old_buf_samples, new_buf_samples ), 166 opus_int16 ); 167 silk_float2short_array( x_bufFIX, psEnc->x_buf, old_buf_samples ); 168 #endif 169 170 /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */ 171 ALLOC( temp_resampler_state, 1, silk_resampler_state_struct ); 172 ret += silk_resampler_init( temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz, 0 ); 173 174 /* Calculate number of samples to temporarily upsample */ 175 api_buf_samples = buf_length_ms * silk_DIV32_16( psEnc->sCmn.API_fs_Hz, 1000 ); 176 177 /* Temporary resampling of x_buf data to API_fs_Hz */ 178 ALLOC( x_buf_API_fs_Hz, api_buf_samples, opus_int16 ); 179 ret += silk_resampler( temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, old_buf_samples ); 180 181 /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */ 182 ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ), 1 ); 183 184 /* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */ 185 ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, api_buf_samples ); 186 187 #ifndef FIXED_POINT 188 silk_short2float_array( psEnc->x_buf, x_bufFIX, new_buf_samples); 189 #endif 190 } 191 } 192 193 psEnc->sCmn.prev_API_fs_Hz = psEnc->sCmn.API_fs_Hz; 194 195 RESTORE_STACK; 196 return ret; 197 } 198 199 static opus_int silk_setup_fs( 200 silk_encoder_state_Fxx *psEnc, /* I/O */ 201 opus_int fs_kHz, /* I */ 202 opus_int PacketSize_ms /* I */ 203 ) 204 { 205 opus_int ret = SILK_NO_ERROR; 206 207 /* Set packet size */ 208 if( PacketSize_ms != psEnc->sCmn.PacketSize_ms ) { 209 if( ( PacketSize_ms != 10 ) && 210 ( PacketSize_ms != 20 ) && 211 ( PacketSize_ms != 40 ) && 212 ( PacketSize_ms != 60 ) ) { 213 ret = SILK_ENC_PACKET_SIZE_NOT_SUPPORTED; 214 } 215 if( PacketSize_ms <= 10 ) { 216 psEnc->sCmn.nFramesPerPacket = 1; 217 psEnc->sCmn.nb_subfr = PacketSize_ms == 10 ? 2 : 1; 218 psEnc->sCmn.frame_length = silk_SMULBB( PacketSize_ms, fs_kHz ); 219 psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz ); 220 if( psEnc->sCmn.fs_kHz == 8 ) { 221 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF; 222 } else { 223 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF; 224 } 225 } else { 226 psEnc->sCmn.nFramesPerPacket = silk_DIV32_16( PacketSize_ms, MAX_FRAME_LENGTH_MS ); 227 psEnc->sCmn.nb_subfr = MAX_NB_SUBFR; 228 psEnc->sCmn.frame_length = silk_SMULBB( 20, fs_kHz ); 229 psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz ); 230 if( psEnc->sCmn.fs_kHz == 8 ) { 231 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF; 232 } else { 233 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF; 234 } 235 } 236 psEnc->sCmn.PacketSize_ms = PacketSize_ms; 237 psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */ 238 } 239 240 /* Set internal sampling frequency */ 241 silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 ); 242 silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 ); 243 if( psEnc->sCmn.fs_kHz != fs_kHz ) { 244 /* reset part of the state */ 245 silk_memset( &psEnc->sShape, 0, sizeof( psEnc->sShape ) ); 246 silk_memset( &psEnc->sCmn.sNSQ, 0, sizeof( psEnc->sCmn.sNSQ ) ); 247 silk_memset( psEnc->sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) ); 248 silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) ); 249 psEnc->sCmn.inputBufIx = 0; 250 psEnc->sCmn.nFramesEncoded = 0; 251 psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */ 252 253 /* Initialize non-zero parameters */ 254 psEnc->sCmn.prevLag = 100; 255 psEnc->sCmn.first_frame_after_reset = 1; 256 psEnc->sShape.LastGainIndex = 10; 257 psEnc->sCmn.sNSQ.lagPrev = 100; 258 psEnc->sCmn.sNSQ.prev_gain_Q16 = 65536; 259 psEnc->sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY; 260 261 psEnc->sCmn.fs_kHz = fs_kHz; 262 if( psEnc->sCmn.fs_kHz == 8 ) { 263 if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { 264 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF; 265 } else { 266 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF; 267 } 268 } else { 269 if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { 270 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF; 271 } else { 272 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF; 273 } 274 } 275 if( psEnc->sCmn.fs_kHz == 8 || psEnc->sCmn.fs_kHz == 12 ) { 276 psEnc->sCmn.predictLPCOrder = MIN_LPC_ORDER; 277 psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_NB_MB; 278 } else { 279 psEnc->sCmn.predictLPCOrder = MAX_LPC_ORDER; 280 psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_WB; 281 } 282 psEnc->sCmn.subfr_length = SUB_FRAME_LENGTH_MS * fs_kHz; 283 psEnc->sCmn.frame_length = silk_SMULBB( psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr ); 284 psEnc->sCmn.ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz ); 285 psEnc->sCmn.la_pitch = silk_SMULBB( LA_PITCH_MS, fs_kHz ); 286 psEnc->sCmn.max_pitch_lag = silk_SMULBB( 18, fs_kHz ); 287 if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { 288 psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz ); 289 } else { 290 psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz ); 291 } 292 if( psEnc->sCmn.fs_kHz == 16 ) { 293 psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform8_iCDF; 294 } else if( psEnc->sCmn.fs_kHz == 12 ) { 295 psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform6_iCDF; 296 } else { 297 psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform4_iCDF; 298 } 299 } 300 301 /* Check that settings are valid */ 302 silk_assert( ( psEnc->sCmn.subfr_length * psEnc->sCmn.nb_subfr ) == psEnc->sCmn.frame_length ); 303 304 return ret; 305 } 306 307 static opus_int silk_setup_complexity( 308 silk_encoder_state *psEncC, /* I/O */ 309 opus_int Complexity /* I */ 310 ) 311 { 312 opus_int ret = 0; 313 314 /* Set encoding complexity */ 315 silk_assert( Complexity >= 0 && Complexity <= 10 ); 316 if( Complexity < 1 ) { 317 psEncC->pitchEstimationComplexity = SILK_PE_MIN_COMPLEX; 318 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.8, 16 ); 319 psEncC->pitchEstimationLPCOrder = 6; 320 psEncC->shapingLPCOrder = 12; 321 psEncC->la_shape = 3 * psEncC->fs_kHz; 322 psEncC->nStatesDelayedDecision = 1; 323 psEncC->useInterpolatedNLSFs = 0; 324 psEncC->NLSF_MSVQ_Survivors = 2; 325 psEncC->warping_Q16 = 0; 326 } else if( Complexity < 2 ) { 327 psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; 328 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.76, 16 ); 329 psEncC->pitchEstimationLPCOrder = 8; 330 psEncC->shapingLPCOrder = 14; 331 psEncC->la_shape = 5 * psEncC->fs_kHz; 332 psEncC->nStatesDelayedDecision = 1; 333 psEncC->useInterpolatedNLSFs = 0; 334 psEncC->NLSF_MSVQ_Survivors = 3; 335 psEncC->warping_Q16 = 0; 336 } else if( Complexity < 3 ) { 337 psEncC->pitchEstimationComplexity = SILK_PE_MIN_COMPLEX; 338 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.8, 16 ); 339 psEncC->pitchEstimationLPCOrder = 6; 340 psEncC->shapingLPCOrder = 12; 341 psEncC->la_shape = 3 * psEncC->fs_kHz; 342 psEncC->nStatesDelayedDecision = 2; 343 psEncC->useInterpolatedNLSFs = 0; 344 psEncC->NLSF_MSVQ_Survivors = 2; 345 psEncC->warping_Q16 = 0; 346 } else if( Complexity < 4 ) { 347 psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; 348 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.76, 16 ); 349 psEncC->pitchEstimationLPCOrder = 8; 350 psEncC->shapingLPCOrder = 14; 351 psEncC->la_shape = 5 * psEncC->fs_kHz; 352 psEncC->nStatesDelayedDecision = 2; 353 psEncC->useInterpolatedNLSFs = 0; 354 psEncC->NLSF_MSVQ_Survivors = 4; 355 psEncC->warping_Q16 = 0; 356 } else if( Complexity < 6 ) { 357 psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; 358 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.74, 16 ); 359 psEncC->pitchEstimationLPCOrder = 10; 360 psEncC->shapingLPCOrder = 16; 361 psEncC->la_shape = 5 * psEncC->fs_kHz; 362 psEncC->nStatesDelayedDecision = 2; 363 psEncC->useInterpolatedNLSFs = 1; 364 psEncC->NLSF_MSVQ_Survivors = 6; 365 psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); 366 } else if( Complexity < 8 ) { 367 psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; 368 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.72, 16 ); 369 psEncC->pitchEstimationLPCOrder = 12; 370 psEncC->shapingLPCOrder = 20; 371 psEncC->la_shape = 5 * psEncC->fs_kHz; 372 psEncC->nStatesDelayedDecision = 3; 373 psEncC->useInterpolatedNLSFs = 1; 374 psEncC->NLSF_MSVQ_Survivors = 8; 375 psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); 376 } else { 377 psEncC->pitchEstimationComplexity = SILK_PE_MAX_COMPLEX; 378 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.7, 16 ); 379 psEncC->pitchEstimationLPCOrder = 16; 380 psEncC->shapingLPCOrder = 24; 381 psEncC->la_shape = 5 * psEncC->fs_kHz; 382 psEncC->nStatesDelayedDecision = MAX_DEL_DEC_STATES; 383 psEncC->useInterpolatedNLSFs = 1; 384 psEncC->NLSF_MSVQ_Survivors = 16; 385 psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); 386 } 387 388 /* Do not allow higher pitch estimation LPC order than predict LPC order */ 389 psEncC->pitchEstimationLPCOrder = silk_min_int( psEncC->pitchEstimationLPCOrder, psEncC->predictLPCOrder ); 390 psEncC->shapeWinLength = SUB_FRAME_LENGTH_MS * psEncC->fs_kHz + 2 * psEncC->la_shape; 391 psEncC->Complexity = Complexity; 392 393 silk_assert( psEncC->pitchEstimationLPCOrder <= MAX_FIND_PITCH_LPC_ORDER ); 394 silk_assert( psEncC->shapingLPCOrder <= MAX_SHAPE_LPC_ORDER ); 395 silk_assert( psEncC->nStatesDelayedDecision <= MAX_DEL_DEC_STATES ); 396 silk_assert( psEncC->warping_Q16 <= 32767 ); 397 silk_assert( psEncC->la_shape <= LA_SHAPE_MAX ); 398 silk_assert( psEncC->shapeWinLength <= SHAPE_LPC_WIN_MAX ); 399 400 return ret; 401 } 402 403 static OPUS_INLINE opus_int silk_setup_LBRR( 404 silk_encoder_state *psEncC, /* I/O */ 405 const silk_EncControlStruct *encControl /* I */ 406 ) 407 { 408 opus_int LBRR_in_previous_packet, ret = SILK_NO_ERROR; 409 410 LBRR_in_previous_packet = psEncC->LBRR_enabled; 411 psEncC->LBRR_enabled = encControl->LBRR_coded; 412 if( psEncC->LBRR_enabled ) { 413 /* Set gain increase for coding LBRR excitation */ 414 if( LBRR_in_previous_packet == 0 ) { 415 /* Previous packet did not have LBRR, and was therefore coded at a higher bitrate */ 416 psEncC->LBRR_GainIncreases = 7; 417 } else { 418 psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 ); 419 } 420 } 421 422 return ret; 423 } 424