1 /*********************************************************************** 2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. 3 Redistribution and use in source and binary forms, with or without 4 modification, are permitted provided that the following conditions 5 are met: 6 - Redistributions of source code must retain the above copyright notice, 7 this list of conditions and the following disclaimer. 8 - Redistributions in binary form must reproduce the above copyright 9 notice, this list of conditions and the following disclaimer in the 10 documentation and/or other materials provided with the distribution. 11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 12 names of specific contributors, may be used to endorse or promote 13 products derived from this software without specific prior written 14 permission. 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS 16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 POSSIBILITY OF SUCH DAMAGE. 26 ***********************************************************************/ 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 #include "API.h" 32 #include "main.h" 33 #include "stack_alloc.h" 34 35 /************************/ 36 /* Decoder Super Struct */ 37 /************************/ 38 typedef struct { 39 silk_decoder_state channel_state[ DECODER_NUM_CHANNELS ]; 40 stereo_dec_state sStereo; 41 opus_int nChannelsAPI; 42 opus_int nChannelsInternal; 43 opus_int prev_decode_only_middle; 44 } silk_decoder; 45 46 /*********************/ 47 /* Decoder functions */ 48 /*********************/ 49 50 opus_int silk_Get_Decoder_Size( /* O Returns error code */ 51 opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */ 52 ) 53 { 54 opus_int ret = SILK_NO_ERROR; 55 56 *decSizeBytes = sizeof( silk_decoder ); 57 58 return ret; 59 } 60 61 /* Reset decoder state */ 62 opus_int silk_InitDecoder( /* O Returns error code */ 63 void *decState /* I/O State */ 64 ) 65 { 66 opus_int n, ret = SILK_NO_ERROR; 67 silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state; 68 69 for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) { 70 ret = silk_init_decoder( &channel_state[ n ] ); 71 } 72 73 return ret; 74 } 75 76 /* Decode a frame */ 77 opus_int silk_Decode( /* O Returns error code */ 78 void* decState, /* I/O State */ 79 silk_DecControlStruct* decControl, /* I/O Control Structure */ 80 opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ 81 opus_int newPacketFlag, /* I Indicates first decoder call for this packet */ 82 ec_dec *psRangeDec, /* I/O Compressor data structure */ 83 opus_int16 *samplesOut, /* O Decoded output speech vector */ 84 opus_int32 *nSamplesOut /* O Number of samples decoded */ 85 ) 86 { 87 opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; 88 opus_int32 nSamplesOutDec, LBRR_symbol; 89 opus_int16 *samplesOut1_tmp[ 2 ]; 90 VARDECL( opus_int16, samplesOut1_tmp_storage ); 91 VARDECL( opus_int16, samplesOut2_tmp ); 92 opus_int32 MS_pred_Q13[ 2 ] = { 0 }; 93 opus_int16 *resample_out_ptr; 94 silk_decoder *psDec = ( silk_decoder * )decState; 95 silk_decoder_state *channel_state = psDec->channel_state; 96 opus_int has_side; 97 opus_int stereo_to_mono; 98 SAVE_STACK; 99 100 /**********************************/ 101 /* Test if first frame in payload */ 102 /**********************************/ 103 if( newPacketFlag ) { 104 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 105 channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */ 106 } 107 } 108 109 /* If Mono -> Stereo transition in bitstream: init state of second channel */ 110 if( decControl->nChannelsInternal > psDec->nChannelsInternal ) { 111 ret += silk_init_decoder( &channel_state[ 1 ] ); 112 } 113 114 stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 && 115 ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz ); 116 117 if( channel_state[ 0 ].nFramesDecoded == 0 ) { 118 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 119 opus_int fs_kHz_dec; 120 if( decControl->payloadSize_ms == 0 ) { 121 /* Assuming packet loss, use 10 ms */ 122 channel_state[ n ].nFramesPerPacket = 1; 123 channel_state[ n ].nb_subfr = 2; 124 } else if( decControl->payloadSize_ms == 10 ) { 125 channel_state[ n ].nFramesPerPacket = 1; 126 channel_state[ n ].nb_subfr = 2; 127 } else if( decControl->payloadSize_ms == 20 ) { 128 channel_state[ n ].nFramesPerPacket = 1; 129 channel_state[ n ].nb_subfr = 4; 130 } else if( decControl->payloadSize_ms == 40 ) { 131 channel_state[ n ].nFramesPerPacket = 2; 132 channel_state[ n ].nb_subfr = 4; 133 } else if( decControl->payloadSize_ms == 60 ) { 134 channel_state[ n ].nFramesPerPacket = 3; 135 channel_state[ n ].nb_subfr = 4; 136 } else { 137 silk_assert( 0 ); 138 RESTORE_STACK; 139 return SILK_DEC_INVALID_FRAME_SIZE; 140 } 141 fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1; 142 if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) { 143 silk_assert( 0 ); 144 RESTORE_STACK; 145 return SILK_DEC_INVALID_SAMPLING_FREQUENCY; 146 } 147 ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate ); 148 } 149 } 150 151 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) { 152 silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) ); 153 silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) ); 154 silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); 155 } 156 psDec->nChannelsAPI = decControl->nChannelsAPI; 157 psDec->nChannelsInternal = decControl->nChannelsInternal; 158 159 if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) { 160 ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY; 161 RESTORE_STACK; 162 return( ret ); 163 } 164 165 if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) { 166 /* First decoder call for this payload */ 167 /* Decode VAD flags and LBRR flag */ 168 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 169 for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { 170 channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1); 171 } 172 channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1); 173 } 174 /* Decode LBRR flags */ 175 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 176 silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) ); 177 if( channel_state[ n ].LBRR_flag ) { 178 if( channel_state[ n ].nFramesPerPacket == 1 ) { 179 channel_state[ n ].LBRR_flags[ 0 ] = 1; 180 } else { 181 LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1; 182 for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { 183 channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1; 184 } 185 } 186 } 187 } 188 189 if( lostFlag == FLAG_DECODE_NORMAL ) { 190 /* Regular decoding: skip all LBRR data */ 191 for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { 192 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 193 if( channel_state[ n ].LBRR_flags[ i ] ) { 194 opus_int pulses[ MAX_FRAME_LENGTH ]; 195 opus_int condCoding; 196 197 if( decControl->nChannelsInternal == 2 && n == 0 ) { 198 silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); 199 if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) { 200 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); 201 } 202 } 203 /* Use conditional coding if previous frame available */ 204 if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) { 205 condCoding = CODE_CONDITIONALLY; 206 } else { 207 condCoding = CODE_INDEPENDENTLY; 208 } 209 silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding ); 210 silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType, 211 channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length ); 212 } 213 } 214 } 215 } 216 } 217 218 /* Get MS predictor index */ 219 if( decControl->nChannelsInternal == 2 ) { 220 if( lostFlag == FLAG_DECODE_NORMAL || 221 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) ) 222 { 223 silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); 224 /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */ 225 if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) || 226 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ) 227 { 228 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); 229 } else { 230 decode_only_middle = 0; 231 } 232 } else { 233 for( n = 0; n < 2; n++ ) { 234 MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ]; 235 } 236 } 237 } 238 239 /* Reset side channel decoder prediction memory for first frame with side coding */ 240 if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) { 241 silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) ); 242 silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) ); 243 psDec->channel_state[ 1 ].lagPrev = 100; 244 psDec->channel_state[ 1 ].LastGainIndex = 10; 245 psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY; 246 psDec->channel_state[ 1 ].first_frame_after_reset = 1; 247 } 248 249 ALLOC( samplesOut1_tmp_storage, 250 decControl->nChannelsInternal*( 251 channel_state[ 0 ].frame_length + 2 ), 252 opus_int16 ); 253 samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage; 254 samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage 255 + channel_state[ 0 ].frame_length + 2; 256 257 if( lostFlag == FLAG_DECODE_NORMAL ) { 258 has_side = !decode_only_middle; 259 } else { 260 has_side = !psDec->prev_decode_only_middle 261 || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 ); 262 } 263 /* Call decoder for one frame */ 264 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 265 if( n == 0 || has_side ) { 266 opus_int FrameIndex; 267 opus_int condCoding; 268 269 FrameIndex = channel_state[ 0 ].nFramesDecoded - n; 270 /* Use independent coding if no previous frame available */ 271 if( FrameIndex <= 0 ) { 272 condCoding = CODE_INDEPENDENTLY; 273 } else if( lostFlag == FLAG_DECODE_LBRR ) { 274 condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY; 275 } else if( n > 0 && psDec->prev_decode_only_middle ) { 276 /* If we skipped a side frame in this packet, we don't 277 need LTP scaling; the LTP state is well-defined. */ 278 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; 279 } else { 280 condCoding = CODE_CONDITIONALLY; 281 } 282 ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding); 283 } else { 284 silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) ); 285 } 286 channel_state[ n ].nFramesDecoded++; 287 } 288 289 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { 290 /* Convert Mid/Side to Left/Right */ 291 silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); 292 } else { 293 /* Buffering */ 294 silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) ); 295 silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) ); 296 } 297 298 /* Number of output samples */ 299 *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) ); 300 301 /* Set up pointers to temp buffers */ 302 ALLOC( samplesOut2_tmp, 303 decControl->nChannelsAPI == 2 ? *nSamplesOut : 0, opus_int16 ); 304 if( decControl->nChannelsAPI == 2 ) { 305 resample_out_ptr = samplesOut2_tmp; 306 } else { 307 resample_out_ptr = samplesOut; 308 } 309 310 for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { 311 312 /* Resample decoded signal to API_sampleRate */ 313 ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec ); 314 315 /* Interleave if stereo output and stereo stream */ 316 if( decControl->nChannelsAPI == 2 ) { 317 for( i = 0; i < *nSamplesOut; i++ ) { 318 samplesOut[ n + 2 * i ] = resample_out_ptr[ i ]; 319 } 320 } 321 } 322 323 /* Create two channel output from mono stream */ 324 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) { 325 if ( stereo_to_mono ){ 326 /* Resample right channel for newly collapsed stereo just in case 327 we weren't doing collapsing when switching to mono */ 328 ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec ); 329 330 for( i = 0; i < *nSamplesOut; i++ ) { 331 samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ]; 332 } 333 } else { 334 for( i = 0; i < *nSamplesOut; i++ ) { 335 samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ]; 336 } 337 } 338 } 339 340 /* Export pitch lag, measured at 48 kHz sampling rate */ 341 if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) { 342 int mult_tab[ 3 ] = { 6, 4, 3 }; 343 decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ]; 344 } else { 345 decControl->prevPitchLag = 0; 346 } 347 348 if( lostFlag == FLAG_PACKET_LOST ) { 349 /* On packet loss, remove the gain clamping to prevent having the energy "bounce back" 350 if we lose packets when the energy is going down */ 351 for ( i = 0; i < psDec->nChannelsInternal; i++ ) 352 psDec->channel_state[ i ].LastGainIndex = 10; 353 } else { 354 psDec->prev_decode_only_middle = decode_only_middle; 355 } 356 RESTORE_STACK; 357 return ret; 358 } 359 360 #if 0 361 /* Getting table of contents for a packet */ 362 opus_int silk_get_TOC( 363 const opus_uint8 *payload, /* I Payload data */ 364 const opus_int nBytesIn, /* I Number of input bytes */ 365 const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */ 366 silk_TOC_struct *Silk_TOC /* O Type of content */ 367 ) 368 { 369 opus_int i, flags, ret = SILK_NO_ERROR; 370 371 if( nBytesIn < 1 ) { 372 return -1; 373 } 374 if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) { 375 return -1; 376 } 377 378 silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) ); 379 380 /* For stereo, extract the flags for the mid channel */ 381 flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 ); 382 383 Silk_TOC->inbandFECFlag = flags & 1; 384 for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) { 385 flags = silk_RSHIFT( flags, 1 ); 386 Silk_TOC->VADFlags[ i ] = flags & 1; 387 Silk_TOC->VADFlag |= flags & 1; 388 } 389 390 return ret; 391 } 392 #endif 393