1 /*********************************************************************** 2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. 3 Redistribution and use in source and binary forms, with or without 4 modification, are permitted provided that the following conditions 5 are met: 6 - Redistributions of source code must retain the above copyright notice, 7 this list of conditions and the following disclaimer. 8 - Redistributions in binary form must reproduce the above copyright 9 notice, this list of conditions and the following disclaimer in the 10 documentation and/or other materials provided with the distribution. 11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 12 names of specific contributors, may be used to endorse or promote 13 products derived from this software without specific prior written 14 permission. 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 POSSIBILITY OF SUCH DAMAGE. 26 ***********************************************************************/ 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 #include "API.h" 32 #include "main.h" 33 #include "stack_alloc.h" 34 35 /************************/ 36 /* Decoder Super Struct */ 37 /************************/ 38 typedef struct { 39 silk_decoder_state channel_state[ DECODER_NUM_CHANNELS ]; 40 stereo_dec_state sStereo; 41 opus_int nChannelsAPI; 42 opus_int nChannelsInternal; 43 opus_int prev_decode_only_middle; 44 } silk_decoder; 45 46 /*********************/ 47 /* Decoder functions */ 48 /*********************/ 49 50 opus_int silk_Get_Decoder_Size( /* O Returns error code */ 51 opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */ 52 ) 53 { 54 opus_int ret = SILK_NO_ERROR; 55 56 *decSizeBytes = sizeof( silk_decoder ); 57 58 return ret; 59 } 60 61 /* Reset decoder state */ 62 opus_int silk_InitDecoder( /* O Returns error code */ 63 void *decState /* I/O State */ 64 ) 65 { 66 opus_int n, ret = SILK_NO_ERROR; 67 silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state; 68 69 for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) { 70 ret = silk_init_decoder( &channel_state[ n ] ); 71 } 72 silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo)); 73 /* Not strictly needed, but it's cleaner that way */ 74 ((silk_decoder *)decState)->prev_decode_only_middle = 0; 75 76 return ret; 77 } 78 79 /* Decode a frame */ 80 opus_int silk_Decode( /* O Returns error code */ 81 void* decState, /* I/O State */ 82 silk_DecControlStruct* decControl, /* I/O Control Structure */ 83 opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ 84 opus_int newPacketFlag, /* I Indicates first decoder call for this packet */ 85 ec_dec *psRangeDec, /* I/O Compressor data structure */ 86 opus_int16 *samplesOut, /* O Decoded output speech vector */ 87 opus_int32 *nSamplesOut /* O Number of samples decoded */ 88 ) 89 { 90 opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; 91 opus_int32 nSamplesOutDec, LBRR_symbol; 92 opus_int16 *samplesOut1_tmp[ 2 ]; 93 VARDECL( opus_int16, samplesOut1_tmp_storage ); 94 VARDECL( opus_int16, samplesOut2_tmp ); 95 opus_int32 MS_pred_Q13[ 2 ] = { 0 }; 96 opus_int16 *resample_out_ptr; 97 silk_decoder *psDec = ( silk_decoder * )decState; 98 silk_decoder_state *channel_state = psDec->channel_state; 99 opus_int has_side; 100 opus_int stereo_to_mono; 101 SAVE_STACK; 102 103 silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); 104 105 /**********************************/ 106 /* Test if first frame in payload */ 107 /**********************************/ 108 if( newPacketFlag ) { 109 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 110 channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */ 111 } 112 } 113 114 /* If Mono -> Stereo transition in bitstream: init state of second channel */ 115 if( decControl->nChannelsInternal > psDec->nChannelsInternal ) { 116 ret += silk_init_decoder( &channel_state[ 1 ] ); 117 } 118 119 stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 && 120 ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz ); 121 122 if( channel_state[ 0 ].nFramesDecoded == 0 ) { 123 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 124 opus_int fs_kHz_dec; 125 if( decControl->payloadSize_ms == 0 ) { 126 /* Assuming packet loss, use 10 ms */ 127 channel_state[ n ].nFramesPerPacket = 1; 128 channel_state[ n ].nb_subfr = 2; 129 } else if( decControl->payloadSize_ms == 10 ) { 130 channel_state[ n ].nFramesPerPacket = 1; 131 channel_state[ n ].nb_subfr = 2; 132 } else if( decControl->payloadSize_ms == 20 ) { 133 channel_state[ n ].nFramesPerPacket = 1; 134 channel_state[ n ].nb_subfr = 4; 135 } else if( decControl->payloadSize_ms == 40 ) { 136 channel_state[ n ].nFramesPerPacket = 2; 137 channel_state[ n ].nb_subfr = 4; 138 } else if( decControl->payloadSize_ms == 60 ) { 139 channel_state[ n ].nFramesPerPacket = 3; 140 channel_state[ n ].nb_subfr = 4; 141 } else { 142 silk_assert( 0 ); 143 RESTORE_STACK; 144 return SILK_DEC_INVALID_FRAME_SIZE; 145 } 146 fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1; 147 if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) { 148 silk_assert( 0 ); 149 RESTORE_STACK; 150 return SILK_DEC_INVALID_SAMPLING_FREQUENCY; 151 } 152 ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate ); 153 } 154 } 155 156 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) { 157 silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) ); 158 silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) ); 159 silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); 160 } 161 psDec->nChannelsAPI = decControl->nChannelsAPI; 162 psDec->nChannelsInternal = decControl->nChannelsInternal; 163 164 if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) { 165 ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY; 166 RESTORE_STACK; 167 return( ret ); 168 } 169 170 if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) { 171 /* First decoder call for this payload */ 172 /* Decode VAD flags and LBRR flag */ 173 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 174 for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { 175 channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1); 176 } 177 channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1); 178 } 179 /* Decode LBRR flags */ 180 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 181 silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) ); 182 if( channel_state[ n ].LBRR_flag ) { 183 if( channel_state[ n ].nFramesPerPacket == 1 ) { 184 channel_state[ n ].LBRR_flags[ 0 ] = 1; 185 } else { 186 LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1; 187 for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { 188 channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1; 189 } 190 } 191 } 192 } 193 194 if( lostFlag == FLAG_DECODE_NORMAL ) { 195 /* Regular decoding: skip all LBRR data */ 196 for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { 197 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 198 if( channel_state[ n ].LBRR_flags[ i ] ) { 199 opus_int pulses[ MAX_FRAME_LENGTH ]; 200 opus_int condCoding; 201 202 if( decControl->nChannelsInternal == 2 && n == 0 ) { 203 silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); 204 if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) { 205 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); 206 } 207 } 208 /* Use conditional coding if previous frame available */ 209 if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) { 210 condCoding = CODE_CONDITIONALLY; 211 } else { 212 condCoding = CODE_INDEPENDENTLY; 213 } 214 silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding ); 215 silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType, 216 channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length ); 217 } 218 } 219 } 220 } 221 } 222 223 /* Get MS predictor index */ 224 if( decControl->nChannelsInternal == 2 ) { 225 if( lostFlag == FLAG_DECODE_NORMAL || 226 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) ) 227 { 228 silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); 229 /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */ 230 if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) || 231 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ) 232 { 233 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); 234 } else { 235 decode_only_middle = 0; 236 } 237 } else { 238 for( n = 0; n < 2; n++ ) { 239 MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ]; 240 } 241 } 242 } 243 244 /* Reset side channel decoder prediction memory for first frame with side coding */ 245 if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) { 246 silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) ); 247 silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) ); 248 psDec->channel_state[ 1 ].lagPrev = 100; 249 psDec->channel_state[ 1 ].LastGainIndex = 10; 250 psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY; 251 psDec->channel_state[ 1 ].first_frame_after_reset = 1; 252 } 253 254 ALLOC( samplesOut1_tmp_storage, 255 decControl->nChannelsInternal*( 256 channel_state[ 0 ].frame_length + 2 ), 257 opus_int16 ); 258 samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage; 259 samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage 260 + channel_state[ 0 ].frame_length + 2; 261 262 if( lostFlag == FLAG_DECODE_NORMAL ) { 263 has_side = !decode_only_middle; 264 } else { 265 has_side = !psDec->prev_decode_only_middle 266 || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 ); 267 } 268 /* Call decoder for one frame */ 269 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 270 if( n == 0 || has_side ) { 271 opus_int FrameIndex; 272 opus_int condCoding; 273 274 FrameIndex = channel_state[ 0 ].nFramesDecoded - n; 275 /* Use independent coding if no previous frame available */ 276 if( FrameIndex <= 0 ) { 277 condCoding = CODE_INDEPENDENTLY; 278 } else if( lostFlag == FLAG_DECODE_LBRR ) { 279 condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY; 280 } else if( n > 0 && psDec->prev_decode_only_middle ) { 281 /* If we skipped a side frame in this packet, we don't 282 need LTP scaling; the LTP state is well-defined. */ 283 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; 284 } else { 285 condCoding = CODE_CONDITIONALLY; 286 } 287 ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding); 288 } else { 289 silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) ); 290 } 291 channel_state[ n ].nFramesDecoded++; 292 } 293 294 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { 295 /* Convert Mid/Side to Left/Right */ 296 silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); 297 } else { 298 /* Buffering */ 299 silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) ); 300 silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) ); 301 } 302 303 /* Number of output samples */ 304 *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) ); 305 306 /* Set up pointers to temp buffers */ 307 ALLOC( samplesOut2_tmp, 308 decControl->nChannelsAPI == 2 ? *nSamplesOut : 0, opus_int16 ); 309 if( decControl->nChannelsAPI == 2 ) { 310 resample_out_ptr = samplesOut2_tmp; 311 } else { 312 resample_out_ptr = samplesOut; 313 } 314 315 for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { 316 317 /* Resample decoded signal to API_sampleRate */ 318 ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec ); 319 320 /* Interleave if stereo output and stereo stream */ 321 if( decControl->nChannelsAPI == 2 ) { 322 for( i = 0; i < *nSamplesOut; i++ ) { 323 samplesOut[ n + 2 * i ] = resample_out_ptr[ i ]; 324 } 325 } 326 } 327 328 /* Create two channel output from mono stream */ 329 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) { 330 if ( stereo_to_mono ){ 331 /* Resample right channel for newly collapsed stereo just in case 332 we weren't doing collapsing when switching to mono */ 333 ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec ); 334 335 for( i = 0; i < *nSamplesOut; i++ ) { 336 samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ]; 337 } 338 } else { 339 for( i = 0; i < *nSamplesOut; i++ ) { 340 samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ]; 341 } 342 } 343 } 344 345 /* Export pitch lag, measured at 48 kHz sampling rate */ 346 if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) { 347 int mult_tab[ 3 ] = { 6, 4, 3 }; 348 decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ]; 349 } else { 350 decControl->prevPitchLag = 0; 351 } 352 353 if( lostFlag == FLAG_PACKET_LOST ) { 354 /* On packet loss, remove the gain clamping to prevent having the energy "bounce back" 355 if we lose packets when the energy is going down */ 356 for ( i = 0; i < psDec->nChannelsInternal; i++ ) 357 psDec->channel_state[ i ].LastGainIndex = 10; 358 } else { 359 psDec->prev_decode_only_middle = decode_only_middle; 360 } 361 RESTORE_STACK; 362 return ret; 363 } 364 365 #if 0 366 /* Getting table of contents for a packet */ 367 opus_int silk_get_TOC( 368 const opus_uint8 *payload, /* I Payload data */ 369 const opus_int nBytesIn, /* I Number of input bytes */ 370 const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */ 371 silk_TOC_struct *Silk_TOC /* O Type of content */ 372 ) 373 { 374 opus_int i, flags, ret = SILK_NO_ERROR; 375 376 if( nBytesIn < 1 ) { 377 return -1; 378 } 379 if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) { 380 return -1; 381 } 382 383 silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) ); 384 385 /* For stereo, extract the flags for the mid channel */ 386 flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 ); 387 388 Silk_TOC->inbandFECFlag = flags & 1; 389 for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) { 390 flags = silk_RSHIFT( flags, 1 ); 391 Silk_TOC->VADFlags[ i ] = flags & 1; 392 Silk_TOC->VADFlag |= flags & 1; 393 } 394 395 return ret; 396 } 397 #endif 398