1 /*********************************************************************** 2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. 3 Redistribution and use in source and binary forms, with or without 4 modification, are permitted provided that the following conditions 5 are met: 6 - Redistributions of source code must retain the above copyright notice, 7 this list of conditions and the following disclaimer. 8 - Redistributions in binary form must reproduce the above copyright 9 notice, this list of conditions and the following disclaimer in the 10 documentation and/or other materials provided with the distribution. 11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 12 names of specific contributors, may be used to endorse or promote 13 products derived from this software without specific prior written 14 permission. 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS 16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 POSSIBILITY OF SUCH DAMAGE. 26 ***********************************************************************/ 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 #ifdef FIXED_POINT 32 #include "main_FIX.h" 33 #define silk_encoder_state_Fxx silk_encoder_state_FIX 34 #else 35 #include "main_FLP.h" 36 #define silk_encoder_state_Fxx silk_encoder_state_FLP 37 #endif 38 #include "tuning_parameters.h" 39 #include "pitch_est_defines.h" 40 41 static opus_int silk_setup_resamplers( 42 silk_encoder_state_Fxx *psEnc, /* I/O */ 43 opus_int fs_kHz /* I */ 44 ); 45 46 static opus_int silk_setup_fs( 47 silk_encoder_state_Fxx *psEnc, /* I/O */ 48 opus_int fs_kHz, /* I */ 49 opus_int PacketSize_ms /* I */ 50 ); 51 52 static opus_int silk_setup_complexity( 53 silk_encoder_state *psEncC, /* I/O */ 54 opus_int Complexity /* I */ 55 ); 56 57 static inline opus_int silk_setup_LBRR( 58 silk_encoder_state *psEncC, /* I/O */ 59 const opus_int32 TargetRate_bps /* I */ 60 ); 61 62 63 /* Control encoder */ 64 opus_int silk_control_encoder( 65 silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */ 66 silk_EncControlStruct *encControl, /* I Control structure */ 67 const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */ 68 const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ 69 const opus_int channelNb, /* I Channel number */ 70 const opus_int force_fs_kHz 71 ) 72 { 73 opus_int fs_kHz, ret = 0; 74 75 psEnc->sCmn.useDTX = encControl->useDTX; 76 psEnc->sCmn.useCBR = encControl->useCBR; 77 psEnc->sCmn.API_fs_Hz = encControl->API_sampleRate; 78 psEnc->sCmn.maxInternal_fs_Hz = encControl->maxInternalSampleRate; 79 psEnc->sCmn.minInternal_fs_Hz = encControl->minInternalSampleRate; 80 psEnc->sCmn.desiredInternal_fs_Hz = encControl->desiredInternalSampleRate; 81 psEnc->sCmn.useInBandFEC = encControl->useInBandFEC; 82 psEnc->sCmn.nChannelsAPI = encControl->nChannelsAPI; 83 psEnc->sCmn.nChannelsInternal = encControl->nChannelsInternal; 84 psEnc->sCmn.allow_bandwidth_switch = allow_bw_switch; 85 psEnc->sCmn.channelNb = channelNb; 86 87 if( psEnc->sCmn.controlled_since_last_payload != 0 && psEnc->sCmn.prefillFlag == 0 ) { 88 if( psEnc->sCmn.API_fs_Hz != psEnc->sCmn.prev_API_fs_Hz && psEnc->sCmn.fs_kHz > 0 ) { 89 /* Change in API sampling rate in the middle of encoding a packet */ 90 ret += silk_setup_resamplers( psEnc, psEnc->sCmn.fs_kHz ); 91 } 92 return ret; 93 } 94 95 /* Beyond this point we know that there are no previously coded frames in the payload buffer */ 96 97 /********************************************/ 98 /* Determine internal sampling rate */ 99 /********************************************/ 100 fs_kHz = silk_control_audio_bandwidth( &psEnc->sCmn, encControl ); 101 if( force_fs_kHz ) { 102 fs_kHz = force_fs_kHz; 103 } 104 /********************************************/ 105 /* Prepare resampler and buffered data */ 106 /********************************************/ 107 ret += silk_setup_resamplers( psEnc, fs_kHz ); 108 109 /********************************************/ 110 /* Set internal sampling frequency */ 111 /********************************************/ 112 ret += silk_setup_fs( psEnc, fs_kHz, encControl->payloadSize_ms ); 113 114 /********************************************/ 115 /* Set encoding complexity */ 116 /********************************************/ 117 ret += silk_setup_complexity( &psEnc->sCmn, encControl->complexity ); 118 119 /********************************************/ 120 /* Set packet loss rate measured by farend */ 121 /********************************************/ 122 psEnc->sCmn.PacketLoss_perc = encControl->packetLossPercentage; 123 124 /********************************************/ 125 /* Set LBRR usage */ 126 /********************************************/ 127 ret += silk_setup_LBRR( &psEnc->sCmn, TargetRate_bps ); 128 129 psEnc->sCmn.controlled_since_last_payload = 1; 130 131 return ret; 132 } 133 134 static opus_int silk_setup_resamplers( 135 silk_encoder_state_Fxx *psEnc, /* I/O */ 136 opus_int fs_kHz /* I */ 137 ) 138 { 139 opus_int ret = SILK_NO_ERROR; 140 opus_int32 nSamples_temp; 141 142 if( psEnc->sCmn.fs_kHz != fs_kHz || psEnc->sCmn.prev_API_fs_Hz != psEnc->sCmn.API_fs_Hz ) 143 { 144 if( psEnc->sCmn.fs_kHz == 0 ) { 145 /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */ 146 ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, fs_kHz * 1000, 1 ); 147 } else { 148 /* Allocate worst case space for temporary upsampling, 8 to 48 kHz, so a factor 6 */ 149 opus_int16 x_buf_API_fs_Hz[ ( 2 * MAX_FRAME_LENGTH_MS + LA_SHAPE_MS ) * MAX_API_FS_KHZ ]; 150 silk_resampler_state_struct temp_resampler_state; 151 #ifdef FIXED_POINT 152 opus_int16 *x_bufFIX = psEnc->x_buf; 153 #else 154 opus_int16 x_bufFIX[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ]; 155 #endif 156 157 nSamples_temp = silk_LSHIFT( psEnc->sCmn.frame_length, 1 ) + LA_SHAPE_MS * psEnc->sCmn.fs_kHz; 158 159 #ifndef FIXED_POINT 160 silk_float2short_array( x_bufFIX, psEnc->x_buf, nSamples_temp ); 161 #endif 162 163 /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */ 164 ret += silk_resampler_init( &temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz, 0 ); 165 166 /* Temporary resampling of x_buf data to API_fs_Hz */ 167 ret += silk_resampler( &temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, nSamples_temp ); 168 169 /* Calculate number of samples that has been temporarily upsampled */ 170 nSamples_temp = silk_DIV32_16( nSamples_temp * psEnc->sCmn.API_fs_Hz, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ) ); 171 172 /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */ 173 ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ), 1 ); 174 175 /* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */ 176 ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, nSamples_temp ); 177 178 #ifndef FIXED_POINT 179 silk_short2float_array( psEnc->x_buf, x_bufFIX, ( 2 * MAX_FRAME_LENGTH_MS + LA_SHAPE_MS ) * fs_kHz ); 180 #endif 181 } 182 } 183 184 psEnc->sCmn.prev_API_fs_Hz = psEnc->sCmn.API_fs_Hz; 185 186 return ret; 187 } 188 189 static opus_int silk_setup_fs( 190 silk_encoder_state_Fxx *psEnc, /* I/O */ 191 opus_int fs_kHz, /* I */ 192 opus_int PacketSize_ms /* I */ 193 ) 194 { 195 opus_int ret = SILK_NO_ERROR; 196 197 /* Set packet size */ 198 if( PacketSize_ms != psEnc->sCmn.PacketSize_ms ) { 199 if( ( PacketSize_ms != 10 ) && 200 ( PacketSize_ms != 20 ) && 201 ( PacketSize_ms != 40 ) && 202 ( PacketSize_ms != 60 ) ) { 203 ret = SILK_ENC_PACKET_SIZE_NOT_SUPPORTED; 204 } 205 if( PacketSize_ms <= 10 ) { 206 psEnc->sCmn.nFramesPerPacket = 1; 207 psEnc->sCmn.nb_subfr = PacketSize_ms == 10 ? 2 : 1; 208 psEnc->sCmn.frame_length = silk_SMULBB( PacketSize_ms, fs_kHz ); 209 psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz ); 210 if( psEnc->sCmn.fs_kHz == 8 ) { 211 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF; 212 } else { 213 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF; 214 } 215 } else { 216 psEnc->sCmn.nFramesPerPacket = silk_DIV32_16( PacketSize_ms, MAX_FRAME_LENGTH_MS ); 217 psEnc->sCmn.nb_subfr = MAX_NB_SUBFR; 218 psEnc->sCmn.frame_length = silk_SMULBB( 20, fs_kHz ); 219 psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz ); 220 if( psEnc->sCmn.fs_kHz == 8 ) { 221 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF; 222 } else { 223 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF; 224 } 225 } 226 psEnc->sCmn.PacketSize_ms = PacketSize_ms; 227 psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */ 228 } 229 230 /* Set internal sampling frequency */ 231 silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 ); 232 silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 ); 233 if( psEnc->sCmn.fs_kHz != fs_kHz ) { 234 /* reset part of the state */ 235 silk_memset( &psEnc->sShape, 0, sizeof( psEnc->sShape ) ); 236 silk_memset( &psEnc->sPrefilt, 0, sizeof( psEnc->sPrefilt ) ); 237 silk_memset( &psEnc->sCmn.sNSQ, 0, sizeof( psEnc->sCmn.sNSQ ) ); 238 silk_memset( psEnc->sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) ); 239 silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) ); 240 psEnc->sCmn.inputBufIx = 0; 241 psEnc->sCmn.nFramesEncoded = 0; 242 psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */ 243 244 /* Initialize non-zero parameters */ 245 psEnc->sCmn.prevLag = 100; 246 psEnc->sCmn.first_frame_after_reset = 1; 247 psEnc->sPrefilt.lagPrev = 100; 248 psEnc->sShape.LastGainIndex = 10; 249 psEnc->sCmn.sNSQ.lagPrev = 100; 250 psEnc->sCmn.sNSQ.prev_gain_Q16 = 65536; 251 psEnc->sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY; 252 253 psEnc->sCmn.fs_kHz = fs_kHz; 254 if( psEnc->sCmn.fs_kHz == 8 ) { 255 if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { 256 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF; 257 } else { 258 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF; 259 } 260 } else { 261 if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { 262 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF; 263 } else { 264 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF; 265 } 266 } 267 if( psEnc->sCmn.fs_kHz == 8 || psEnc->sCmn.fs_kHz == 12 ) { 268 psEnc->sCmn.predictLPCOrder = MIN_LPC_ORDER; 269 psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_NB_MB; 270 } else { 271 psEnc->sCmn.predictLPCOrder = MAX_LPC_ORDER; 272 psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_WB; 273 } 274 psEnc->sCmn.subfr_length = SUB_FRAME_LENGTH_MS * fs_kHz; 275 psEnc->sCmn.frame_length = silk_SMULBB( psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr ); 276 psEnc->sCmn.ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz ); 277 psEnc->sCmn.la_pitch = silk_SMULBB( LA_PITCH_MS, fs_kHz ); 278 psEnc->sCmn.max_pitch_lag = silk_SMULBB( 18, fs_kHz ); 279 if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { 280 psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz ); 281 } else { 282 psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz ); 283 } 284 if( psEnc->sCmn.fs_kHz == 16 ) { 285 psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_WB, 9 ); 286 psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform8_iCDF; 287 } else if( psEnc->sCmn.fs_kHz == 12 ) { 288 psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_MB, 9 ); 289 psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform6_iCDF; 290 } else { 291 psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_NB, 9 ); 292 psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform4_iCDF; 293 } 294 } 295 296 /* Check that settings are valid */ 297 silk_assert( ( psEnc->sCmn.subfr_length * psEnc->sCmn.nb_subfr ) == psEnc->sCmn.frame_length ); 298 299 return ret; 300 } 301 302 static opus_int silk_setup_complexity( 303 silk_encoder_state *psEncC, /* I/O */ 304 opus_int Complexity /* I */ 305 ) 306 { 307 opus_int ret = 0; 308 309 /* Set encoding complexity */ 310 silk_assert( Complexity >= 0 && Complexity <= 10 ); 311 if( Complexity < 2 ) { 312 psEncC->pitchEstimationComplexity = SILK_PE_MIN_COMPLEX; 313 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.8, 16 ); 314 psEncC->pitchEstimationLPCOrder = 6; 315 psEncC->shapingLPCOrder = 8; 316 psEncC->la_shape = 3 * psEncC->fs_kHz; 317 psEncC->nStatesDelayedDecision = 1; 318 psEncC->useInterpolatedNLSFs = 0; 319 psEncC->LTPQuantLowComplexity = 1; 320 psEncC->NLSF_MSVQ_Survivors = 2; 321 psEncC->warping_Q16 = 0; 322 } else if( Complexity < 4 ) { 323 psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; 324 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.76, 16 ); 325 psEncC->pitchEstimationLPCOrder = 8; 326 psEncC->shapingLPCOrder = 10; 327 psEncC->la_shape = 5 * psEncC->fs_kHz; 328 psEncC->nStatesDelayedDecision = 1; 329 psEncC->useInterpolatedNLSFs = 0; 330 psEncC->LTPQuantLowComplexity = 0; 331 psEncC->NLSF_MSVQ_Survivors = 4; 332 psEncC->warping_Q16 = 0; 333 } else if( Complexity < 6 ) { 334 psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; 335 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.74, 16 ); 336 psEncC->pitchEstimationLPCOrder = 10; 337 psEncC->shapingLPCOrder = 12; 338 psEncC->la_shape = 5 * psEncC->fs_kHz; 339 psEncC->nStatesDelayedDecision = 2; 340 psEncC->useInterpolatedNLSFs = 1; 341 psEncC->LTPQuantLowComplexity = 0; 342 psEncC->NLSF_MSVQ_Survivors = 8; 343 psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); 344 } else if( Complexity < 8 ) { 345 psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; 346 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.72, 16 ); 347 psEncC->pitchEstimationLPCOrder = 12; 348 psEncC->shapingLPCOrder = 14; 349 psEncC->la_shape = 5 * psEncC->fs_kHz; 350 psEncC->nStatesDelayedDecision = 3; 351 psEncC->useInterpolatedNLSFs = 1; 352 psEncC->LTPQuantLowComplexity = 0; 353 psEncC->NLSF_MSVQ_Survivors = 16; 354 psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); 355 } else { 356 psEncC->pitchEstimationComplexity = SILK_PE_MAX_COMPLEX; 357 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.7, 16 ); 358 psEncC->pitchEstimationLPCOrder = 16; 359 psEncC->shapingLPCOrder = 16; 360 psEncC->la_shape = 5 * psEncC->fs_kHz; 361 psEncC->nStatesDelayedDecision = MAX_DEL_DEC_STATES; 362 psEncC->useInterpolatedNLSFs = 1; 363 psEncC->LTPQuantLowComplexity = 0; 364 psEncC->NLSF_MSVQ_Survivors = 32; 365 psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); 366 } 367 368 /* Do not allow higher pitch estimation LPC order than predict LPC order */ 369 psEncC->pitchEstimationLPCOrder = silk_min_int( psEncC->pitchEstimationLPCOrder, psEncC->predictLPCOrder ); 370 psEncC->shapeWinLength = SUB_FRAME_LENGTH_MS * psEncC->fs_kHz + 2 * psEncC->la_shape; 371 psEncC->Complexity = Complexity; 372 373 silk_assert( psEncC->pitchEstimationLPCOrder <= MAX_FIND_PITCH_LPC_ORDER ); 374 silk_assert( psEncC->shapingLPCOrder <= MAX_SHAPE_LPC_ORDER ); 375 silk_assert( psEncC->nStatesDelayedDecision <= MAX_DEL_DEC_STATES ); 376 silk_assert( psEncC->warping_Q16 <= 32767 ); 377 silk_assert( psEncC->la_shape <= LA_SHAPE_MAX ); 378 silk_assert( psEncC->shapeWinLength <= SHAPE_LPC_WIN_MAX ); 379 silk_assert( psEncC->NLSF_MSVQ_Survivors <= NLSF_VQ_MAX_SURVIVORS ); 380 381 return ret; 382 } 383 384 static inline opus_int silk_setup_LBRR( 385 silk_encoder_state *psEncC, /* I/O */ 386 const opus_int32 TargetRate_bps /* I */ 387 ) 388 { 389 opus_int ret = SILK_NO_ERROR; 390 opus_int32 LBRR_rate_thres_bps; 391 392 psEncC->LBRR_enabled = 0; 393 if( psEncC->useInBandFEC && psEncC->PacketLoss_perc > 0 ) { 394 if( psEncC->fs_kHz == 8 ) { 395 LBRR_rate_thres_bps = LBRR_NB_MIN_RATE_BPS; 396 } else if( psEncC->fs_kHz == 12 ) { 397 LBRR_rate_thres_bps = LBRR_MB_MIN_RATE_BPS; 398 } else { 399 LBRR_rate_thres_bps = LBRR_WB_MIN_RATE_BPS; 400 } 401 LBRR_rate_thres_bps = silk_SMULWB( silk_MUL( LBRR_rate_thres_bps, 125 - silk_min( psEncC->PacketLoss_perc, 25 ) ), SILK_FIX_CONST( 0.01, 16 ) ); 402 403 if( TargetRate_bps > LBRR_rate_thres_bps ) { 404 /* Set gain increase for coding LBRR excitation */ 405 psEncC->LBRR_enabled = 1; 406 psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 ); 407 } 408 } 409 410 return ret; 411 } 412