1 /*********************************************************************** 2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. 3 Redistribution and use in source and binary forms, with or without 4 modification, are permitted provided that the following conditions 5 are met: 6 - Redistributions of source code must retain the above copyright notice, 7 this list of conditions and the following disclaimer. 8 - Redistributions in binary form must reproduce the above copyright 9 notice, this list of conditions and the following disclaimer in the 10 documentation and/or other materials provided with the distribution. 11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 12 names of specific contributors, may be used to endorse or promote 13 products derived from this software without specific prior written 14 permission. 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 POSSIBILITY OF SUCH DAMAGE. 26 ***********************************************************************/ 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 32 #include "main_FLP.h" 33 #include "tuning_parameters.h" 34 35 /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */ 36 static inline void silk_LBRR_encode_FLP( 37 silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ 38 silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ 39 const silk_float xfw[], /* I Input signal */ 40 opus_int condCoding /* I The type of conditional coding used so far for this frame */ 41 ); 42 43 void silk_encode_do_VAD_FLP( 44 silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */ 45 ) 46 { 47 /****************************/ 48 /* Voice Activity Detection */ 49 /****************************/ 50 silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 ); 51 52 /**************************************************/ 53 /* Convert speech activity into VAD and DTX flags */ 54 /**************************************************/ 55 if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) { 56 psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY; 57 psEnc->sCmn.noSpeechCounter++; 58 if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) { 59 psEnc->sCmn.inDTX = 0; 60 } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) { 61 psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX; 62 psEnc->sCmn.inDTX = 0; 63 } 64 psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0; 65 } else { 66 psEnc->sCmn.noSpeechCounter = 0; 67 psEnc->sCmn.inDTX = 0; 68 psEnc->sCmn.indices.signalType = TYPE_UNVOICED; 69 psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1; 70 } 71 } 72 73 /****************/ 74 /* Encode frame */ 75 /****************/ 76 opus_int silk_encode_frame_FLP( 77 silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ 78 opus_int32 *pnBytesOut, /* O Number of payload bytes; */ 79 ec_enc *psRangeEnc, /* I/O compressor data structure */ 80 opus_int condCoding, /* I The type of conditional coding to use */ 81 opus_int maxBits, /* I If > 0: maximum number of output bits */ 82 opus_int useCBR /* I Flag to force constant-bitrate operation */ 83 ) 84 { 85 silk_encoder_control_FLP sEncCtrl; 86 opus_int i, iter, maxIter, found_upper, found_lower, ret = 0; 87 silk_float *x_frame, *res_pitch_frame; 88 silk_float xfw[ MAX_FRAME_LENGTH ]; 89 silk_float res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ]; 90 ec_enc sRangeEnc_copy, sRangeEnc_copy2; 91 silk_nsq_state sNSQ_copy, sNSQ_copy2; 92 opus_int32 seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper; 93 opus_int32 gainsID, gainsID_lower, gainsID_upper; 94 opus_int16 gainMult_Q8; 95 opus_int16 ec_prevLagIndex_copy; 96 opus_int ec_prevSignalType_copy; 97 opus_int8 LastGainIndex_copy2; 98 opus_int32 pGains_Q16[ MAX_NB_SUBFR ]; 99 opus_uint8 ec_buf_copy[ 1275 ]; 100 101 /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */ 102 LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0; 103 104 psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3; 105 106 /**************************************************************/ 107 /* Set up Input Pointers, and insert frame in input buffer */ 108 /**************************************************************/ 109 /* pointers aligned with start of frame to encode */ 110 x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; /* start of frame to encode */ 111 res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; /* start of pitch LPC residual frame */ 112 113 /***************************************/ 114 /* Ensure smooth bandwidth transitions */ 115 /***************************************/ 116 silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length ); 117 118 /*******************************************/ 119 /* Copy new frame to front of input buffer */ 120 /*******************************************/ 121 silk_short2float_array( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length ); 122 123 /* Add tiny signal to avoid high CPU load from denormalized floating point numbers */ 124 for( i = 0; i < 8; i++ ) { 125 x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + i * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( i & 2 ) ) * 1e-6f; 126 } 127 128 if( !psEnc->sCmn.prefillFlag ) { 129 /*****************************************/ 130 /* Find pitch lags, initial LPC analysis */ 131 /*****************************************/ 132 silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame ); 133 134 /************************/ 135 /* Noise shape analysis */ 136 /************************/ 137 silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame ); 138 139 /***************************************************/ 140 /* Find linear prediction coefficients (LPC + LTP) */ 141 /***************************************************/ 142 silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding ); 143 144 /****************************************/ 145 /* Process gains */ 146 /****************************************/ 147 silk_process_gains_FLP( psEnc, &sEncCtrl, condCoding ); 148 149 /*****************************************/ 150 /* Prefiltering for noise shaper */ 151 /*****************************************/ 152 silk_prefilter_FLP( psEnc, &sEncCtrl, xfw, x_frame ); 153 154 /****************************************/ 155 /* Low Bitrate Redundant Encoding */ 156 /****************************************/ 157 silk_LBRR_encode_FLP( psEnc, &sEncCtrl, xfw, condCoding ); 158 159 /* Loop over quantizer and entroy coding to control bitrate */ 160 maxIter = 6; 161 gainMult_Q8 = SILK_FIX_CONST( 1, 8 ); 162 found_lower = 0; 163 found_upper = 0; 164 gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); 165 gainsID_lower = -1; 166 gainsID_upper = -1; 167 /* Copy part of the input state */ 168 silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) ); 169 silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); 170 seed_copy = psEnc->sCmn.indices.Seed; 171 ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex; 172 ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType; 173 for( iter = 0; ; iter++ ) { 174 if( gainsID == gainsID_lower ) { 175 nBits = nBits_lower; 176 } else if( gainsID == gainsID_upper ) { 177 nBits = nBits_upper; 178 } else { 179 /* Restore part of the input state */ 180 if( iter > 0 ) { 181 silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) ); 182 silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) ); 183 psEnc->sCmn.indices.Seed = seed_copy; 184 psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy; 185 psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy; 186 } 187 188 /*****************************************/ 189 /* Noise shaping quantization */ 190 /*****************************************/ 191 silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, &psEnc->sCmn.indices, &psEnc->sCmn.sNSQ, psEnc->sCmn.pulses, xfw ); 192 193 /****************************************/ 194 /* Encode Parameters */ 195 /****************************************/ 196 silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding ); 197 198 /****************************************/ 199 /* Encode Excitation Signal */ 200 /****************************************/ 201 silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType, 202 psEnc->sCmn.pulses, psEnc->sCmn.frame_length ); 203 204 nBits = ec_tell( psRangeEnc ); 205 206 if( useCBR == 0 && iter == 0 && nBits <= maxBits ) { 207 break; 208 } 209 } 210 211 if( iter == maxIter ) { 212 if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) { 213 /* Restore output state from earlier iteration that did meet the bitrate budget */ 214 silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) ); 215 silk_assert( sRangeEnc_copy2.offs <= 1275 ); 216 silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs ); 217 silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) ); 218 psEnc->sShape.LastGainIndex = LastGainIndex_copy2; 219 } 220 break; 221 } 222 223 if( nBits > maxBits ) { 224 if( found_lower == 0 && iter >= 2 ) { 225 /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */ 226 sEncCtrl.Lambda *= 1.5f; 227 found_upper = 0; 228 gainsID_upper = -1; 229 } else { 230 found_upper = 1; 231 nBits_upper = nBits; 232 gainMult_upper = gainMult_Q8; 233 gainsID_upper = gainsID; 234 } 235 } else if( nBits < maxBits - 5 ) { 236 found_lower = 1; 237 nBits_lower = nBits; 238 gainMult_lower = gainMult_Q8; 239 if( gainsID != gainsID_lower ) { 240 gainsID_lower = gainsID; 241 /* Copy part of the output state */ 242 silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) ); 243 silk_assert( psRangeEnc->offs <= 1275 ); 244 silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs ); 245 silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); 246 LastGainIndex_copy2 = psEnc->sShape.LastGainIndex; 247 } 248 } else { 249 /* Within 5 bits of budget: close enough */ 250 break; 251 } 252 253 if( ( found_lower & found_upper ) == 0 ) { 254 /* Adjust gain according to high-rate rate/distortion curve */ 255 opus_int32 gain_factor_Q16; 256 gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) ); 257 gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) ); 258 if( nBits > maxBits ) { 259 gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) ); 260 } 261 gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 ); 262 } else { 263 /* Adjust gain by interpolating */ 264 gainMult_Q8 = gainMult_lower + ( ( gainMult_upper - gainMult_lower ) * ( maxBits - nBits_lower ) ) / ( nBits_upper - nBits_lower ); 265 /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */ 266 if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) { 267 gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ); 268 } else 269 if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) { 270 gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ); 271 } 272 } 273 274 for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { 275 pGains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 ); 276 } 277 278 /* Quantize gains */ 279 psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev; 280 silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16, 281 &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); 282 283 /* Unique identifier of gains vector */ 284 gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); 285 286 /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */ 287 for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { 288 sEncCtrl.Gains[ i ] = pGains_Q16[ i ] / 65536.0f; 289 } 290 } 291 } 292 293 /* Update input buffer */ 294 silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ], 295 ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( silk_float ) ); 296 297 /* Parameters needed for next frame */ 298 psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ]; 299 psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType; 300 301 /* Exit without entropy coding */ 302 if( psEnc->sCmn.prefillFlag ) { 303 /* No payload */ 304 *pnBytesOut = 0; 305 return ret; 306 } 307 308 /****************************************/ 309 /* Finalize payload */ 310 /****************************************/ 311 psEnc->sCmn.first_frame_after_reset = 0; 312 /* Payload size */ 313 *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 ); 314 315 return ret; 316 } 317 318 /* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate */ 319 static inline void silk_LBRR_encode_FLP( 320 silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ 321 silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ 322 const silk_float xfw[], /* I Input signal */ 323 opus_int condCoding /* I The type of conditional coding used so far for this frame */ 324 ) 325 { 326 opus_int k; 327 opus_int32 Gains_Q16[ MAX_NB_SUBFR ]; 328 silk_float TempGains[ MAX_NB_SUBFR ]; 329 SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ]; 330 silk_nsq_state sNSQ_LBRR; 331 332 /*******************************************/ 333 /* Control use of inband LBRR */ 334 /*******************************************/ 335 if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) { 336 psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1; 337 338 /* Copy noise shaping quantizer state and quantization indices from regular encoding */ 339 silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); 340 silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) ); 341 342 /* Save original gains */ 343 silk_memcpy( TempGains, psEncCtrl->Gains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) ); 344 345 if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) { 346 /* First frame in packet or previous frame not LBRR coded */ 347 psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex; 348 349 /* Increase Gains to get target LBRR rate */ 350 psIndices_LBRR->GainsIndices[ 0 ] += psEnc->sCmn.LBRR_GainIncreases; 351 psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 ); 352 } 353 354 /* Decode to get gains in sync with decoder */ 355 silk_gains_dequant( Gains_Q16, psIndices_LBRR->GainsIndices, 356 &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); 357 358 /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */ 359 for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { 360 psEncCtrl->Gains[ k ] = Gains_Q16[ k ] * ( 1.0f / 65536.0f ); 361 } 362 363 /*****************************************/ 364 /* Noise shaping quantization */ 365 /*****************************************/ 366 silk_NSQ_wrapper_FLP( psEnc, psEncCtrl, psIndices_LBRR, &sNSQ_LBRR, 367 psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], xfw ); 368 369 /* Restore original gains */ 370 silk_memcpy( psEncCtrl->Gains, TempGains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) ); 371 } 372 } 373