1 /*********************************************************************** 2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. 3 Redistribution and use in source and binary forms, with or without 4 modification, are permitted provided that the following conditions 5 are met: 6 - Redistributions of source code must retain the above copyright notice, 7 this list of conditions and the following disclaimer. 8 - Redistributions in binary form must reproduce the above copyright 9 notice, this list of conditions and the following disclaimer in the 10 documentation and/or other materials provided with the distribution. 11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 12 names of specific contributors, may be used to endorse or promote 13 products derived from this software without specific prior written 14 permission. 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 POSSIBILITY OF SUCH DAMAGE. 26 ***********************************************************************/ 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 32 #include "main_FIX.h" 33 #include "stack_alloc.h" 34 #include "tuning_parameters.h" 35 36 /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */ 37 static OPUS_INLINE void silk_LBRR_encode_FIX( 38 silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ 39 silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */ 40 const opus_int32 xfw_Q3[], /* I Input signal */ 41 opus_int condCoding /* I The type of conditional coding used so far for this frame */ 42 ); 43 44 void silk_encode_do_VAD_FIX( 45 silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */ 46 ) 47 { 48 /****************************/ 49 /* Voice Activity Detection */ 50 /****************************/ 51 silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch ); 52 53 /**************************************************/ 54 /* Convert speech activity into VAD and DTX flags */ 55 /**************************************************/ 56 if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) { 57 psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY; 58 psEnc->sCmn.noSpeechCounter++; 59 if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) { 60 psEnc->sCmn.inDTX = 0; 61 } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) { 62 psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX; 63 psEnc->sCmn.inDTX = 0; 64 } 65 psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0; 66 } else { 67 psEnc->sCmn.noSpeechCounter = 0; 68 psEnc->sCmn.inDTX = 0; 69 psEnc->sCmn.indices.signalType = TYPE_UNVOICED; 70 psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1; 71 } 72 } 73 74 /****************/ 75 /* Encode frame */ 76 /****************/ 77 opus_int silk_encode_frame_FIX( 78 silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ 79 opus_int32 *pnBytesOut, /* O Pointer to number of payload bytes; */ 80 ec_enc *psRangeEnc, /* I/O compressor data structure */ 81 opus_int condCoding, /* I The type of conditional coding to use */ 82 opus_int maxBits, /* I If > 0: maximum number of output bits */ 83 opus_int useCBR /* I Flag to force constant-bitrate operation */ 84 ) 85 { 86 silk_encoder_control_FIX sEncCtrl; 87 opus_int i, iter, maxIter, found_upper, found_lower, ret = 0; 88 opus_int16 *x_frame; 89 ec_enc sRangeEnc_copy, sRangeEnc_copy2; 90 silk_nsq_state sNSQ_copy, sNSQ_copy2; 91 opus_int32 seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper; 92 opus_int32 gainsID, gainsID_lower, gainsID_upper; 93 opus_int16 gainMult_Q8; 94 opus_int16 ec_prevLagIndex_copy; 95 opus_int ec_prevSignalType_copy; 96 opus_int8 LastGainIndex_copy2; 97 SAVE_STACK; 98 99 /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */ 100 LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0; 101 102 psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3; 103 104 /**************************************************************/ 105 /* Set up Input Pointers, and insert frame in input buffer */ 106 /*************************************************************/ 107 /* start of frame to encode */ 108 x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; 109 110 /***************************************/ 111 /* Ensure smooth bandwidth transitions */ 112 /***************************************/ 113 silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length ); 114 115 /*******************************************/ 116 /* Copy new frame to front of input buffer */ 117 /*******************************************/ 118 silk_memcpy( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length * sizeof( opus_int16 ) ); 119 120 if( !psEnc->sCmn.prefillFlag ) { 121 VARDECL( opus_int32, xfw_Q3 ); 122 VARDECL( opus_int16, res_pitch ); 123 VARDECL( opus_uint8, ec_buf_copy ); 124 opus_int16 *res_pitch_frame; 125 126 ALLOC( res_pitch, 127 psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length 128 + psEnc->sCmn.ltp_mem_length, opus_int16 ); 129 /* start of pitch LPC residual frame */ 130 res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; 131 132 /*****************************************/ 133 /* Find pitch lags, initial LPC analysis */ 134 /*****************************************/ 135 silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch ); 136 137 /************************/ 138 /* Noise shape analysis */ 139 /************************/ 140 silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame, psEnc->sCmn.arch ); 141 142 /***************************************************/ 143 /* Find linear prediction coefficients (LPC + LTP) */ 144 /***************************************************/ 145 silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding ); 146 147 /****************************************/ 148 /* Process gains */ 149 /****************************************/ 150 silk_process_gains_FIX( psEnc, &sEncCtrl, condCoding ); 151 152 /*****************************************/ 153 /* Prefiltering for noise shaper */ 154 /*****************************************/ 155 ALLOC( xfw_Q3, psEnc->sCmn.frame_length, opus_int32 ); 156 silk_prefilter_FIX( psEnc, &sEncCtrl, xfw_Q3, x_frame ); 157 158 /****************************************/ 159 /* Low Bitrate Redundant Encoding */ 160 /****************************************/ 161 silk_LBRR_encode_FIX( psEnc, &sEncCtrl, xfw_Q3, condCoding ); 162 163 /* Loop over quantizer and entropy coding to control bitrate */ 164 maxIter = 6; 165 gainMult_Q8 = SILK_FIX_CONST( 1, 8 ); 166 found_lower = 0; 167 found_upper = 0; 168 gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); 169 gainsID_lower = -1; 170 gainsID_upper = -1; 171 /* Copy part of the input state */ 172 silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) ); 173 silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); 174 seed_copy = psEnc->sCmn.indices.Seed; 175 ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex; 176 ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType; 177 ALLOC( ec_buf_copy, 1275, opus_uint8 ); 178 for( iter = 0; ; iter++ ) { 179 if( gainsID == gainsID_lower ) { 180 nBits = nBits_lower; 181 } else if( gainsID == gainsID_upper ) { 182 nBits = nBits_upper; 183 } else { 184 /* Restore part of the input state */ 185 if( iter > 0 ) { 186 silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) ); 187 silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) ); 188 psEnc->sCmn.indices.Seed = seed_copy; 189 psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy; 190 psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy; 191 } 192 193 /*****************************************/ 194 /* Noise shaping quantization */ 195 /*****************************************/ 196 if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { 197 silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses, 198 sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, 199 sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14, 200 psEnc->sCmn.arch ); 201 } else { 202 silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses, 203 sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, 204 sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14, 205 psEnc->sCmn.arch); 206 } 207 208 /****************************************/ 209 /* Encode Parameters */ 210 /****************************************/ 211 silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding ); 212 213 /****************************************/ 214 /* Encode Excitation Signal */ 215 /****************************************/ 216 silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType, 217 psEnc->sCmn.pulses, psEnc->sCmn.frame_length ); 218 219 nBits = ec_tell( psRangeEnc ); 220 221 if( useCBR == 0 && iter == 0 && nBits <= maxBits ) { 222 break; 223 } 224 } 225 226 if( iter == maxIter ) { 227 if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) { 228 /* Restore output state from earlier iteration that did meet the bitrate budget */ 229 silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) ); 230 silk_assert( sRangeEnc_copy2.offs <= 1275 ); 231 silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs ); 232 silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) ); 233 psEnc->sShape.LastGainIndex = LastGainIndex_copy2; 234 } 235 break; 236 } 237 238 if( nBits > maxBits ) { 239 if( found_lower == 0 && iter >= 2 ) { 240 /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */ 241 sEncCtrl.Lambda_Q10 = silk_ADD_RSHIFT32( sEncCtrl.Lambda_Q10, sEncCtrl.Lambda_Q10, 1 ); 242 found_upper = 0; 243 gainsID_upper = -1; 244 } else { 245 found_upper = 1; 246 nBits_upper = nBits; 247 gainMult_upper = gainMult_Q8; 248 gainsID_upper = gainsID; 249 } 250 } else if( nBits < maxBits - 5 ) { 251 found_lower = 1; 252 nBits_lower = nBits; 253 gainMult_lower = gainMult_Q8; 254 if( gainsID != gainsID_lower ) { 255 gainsID_lower = gainsID; 256 /* Copy part of the output state */ 257 silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) ); 258 silk_assert( psRangeEnc->offs <= 1275 ); 259 silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs ); 260 silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); 261 LastGainIndex_copy2 = psEnc->sShape.LastGainIndex; 262 } 263 } else { 264 /* Within 5 bits of budget: close enough */ 265 break; 266 } 267 268 if( ( found_lower & found_upper ) == 0 ) { 269 /* Adjust gain according to high-rate rate/distortion curve */ 270 opus_int32 gain_factor_Q16; 271 gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) ); 272 gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) ); 273 if( nBits > maxBits ) { 274 gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) ); 275 } 276 gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 ); 277 } else { 278 /* Adjust gain by interpolating */ 279 gainMult_Q8 = gainMult_lower + silk_DIV32_16( silk_MUL( gainMult_upper - gainMult_lower, maxBits - nBits_lower ), nBits_upper - nBits_lower ); 280 /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */ 281 if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) { 282 gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ); 283 } else 284 if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) { 285 gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ); 286 } 287 } 288 289 for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { 290 sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 ); 291 } 292 293 /* Quantize gains */ 294 psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev; 295 silk_gains_quant( psEnc->sCmn.indices.GainsIndices, sEncCtrl.Gains_Q16, 296 &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); 297 298 /* Unique identifier of gains vector */ 299 gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); 300 } 301 } 302 303 /* Update input buffer */ 304 silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ], 305 ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( opus_int16 ) ); 306 307 /* Exit without entropy coding */ 308 if( psEnc->sCmn.prefillFlag ) { 309 /* No payload */ 310 *pnBytesOut = 0; 311 RESTORE_STACK; 312 return ret; 313 } 314 315 /* Parameters needed for next frame */ 316 psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ]; 317 psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType; 318 319 /****************************************/ 320 /* Finalize payload */ 321 /****************************************/ 322 psEnc->sCmn.first_frame_after_reset = 0; 323 /* Payload size */ 324 *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 ); 325 326 RESTORE_STACK; 327 return ret; 328 } 329 330 /* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate */ 331 static OPUS_INLINE void silk_LBRR_encode_FIX( 332 silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ 333 silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */ 334 const opus_int32 xfw_Q3[], /* I Input signal */ 335 opus_int condCoding /* I The type of conditional coding used so far for this frame */ 336 ) 337 { 338 opus_int32 TempGains_Q16[ MAX_NB_SUBFR ]; 339 SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ]; 340 silk_nsq_state sNSQ_LBRR; 341 342 /*******************************************/ 343 /* Control use of inband LBRR */ 344 /*******************************************/ 345 if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) { 346 psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1; 347 348 /* Copy noise shaping quantizer state and quantization indices from regular encoding */ 349 silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); 350 silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) ); 351 352 /* Save original gains */ 353 silk_memcpy( TempGains_Q16, psEncCtrl->Gains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) ); 354 355 if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) { 356 /* First frame in packet or previous frame not LBRR coded */ 357 psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex; 358 359 /* Increase Gains to get target LBRR rate */ 360 psIndices_LBRR->GainsIndices[ 0 ] = psIndices_LBRR->GainsIndices[ 0 ] + psEnc->sCmn.LBRR_GainIncreases; 361 psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 ); 362 } 363 364 /* Decode to get gains in sync with decoder */ 365 /* Overwrite unquantized gains with quantized gains */ 366 silk_gains_dequant( psEncCtrl->Gains_Q16, psIndices_LBRR->GainsIndices, 367 &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); 368 369 /*****************************************/ 370 /* Noise shaping quantization */ 371 /*****************************************/ 372 if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { 373 silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3, 374 psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14, 375 psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, 376 psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch ); 377 } else { 378 silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3, 379 psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14, 380 psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, 381 psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14, psEnc->sCmn.arch ); 382 } 383 384 /* Restore original gains */ 385 silk_memcpy( psEncCtrl->Gains_Q16, TempGains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) ); 386 } 387 } 388