1 /*********************************************************************** 2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. 3 Redistribution and use in source and binary forms, with or without 4 modification, are permitted provided that the following conditions 5 are met: 6 - Redistributions of source code must retain the above copyright notice, 7 this list of conditions and the following disclaimer. 8 - Redistributions in binary form must reproduce the above copyright 9 notice, this list of conditions and the following disclaimer in the 10 documentation and/or other materials provided with the distribution. 11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 12 names of specific contributors, may be used to endorse or promote 13 products derived from this software without specific prior written 14 permission. 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 POSSIBILITY OF SUCH DAMAGE. 26 ***********************************************************************/ 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 #include "API.h" 32 #include "main.h" 33 #include "stack_alloc.h" 34 #include "os_support.h" 35 36 /************************/ 37 /* Decoder Super Struct */ 38 /************************/ 39 typedef struct { 40 silk_decoder_state channel_state[ DECODER_NUM_CHANNELS ]; 41 stereo_dec_state sStereo; 42 opus_int nChannelsAPI; 43 opus_int nChannelsInternal; 44 opus_int prev_decode_only_middle; 45 } silk_decoder; 46 47 /*********************/ 48 /* Decoder functions */ 49 /*********************/ 50 51 opus_int silk_Get_Decoder_Size( /* O Returns error code */ 52 opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */ 53 ) 54 { 55 opus_int ret = SILK_NO_ERROR; 56 57 *decSizeBytes = sizeof( silk_decoder ); 58 59 return ret; 60 } 61 62 /* Reset decoder state */ 63 opus_int silk_InitDecoder( /* O Returns error code */ 64 void *decState /* I/O State */ 65 ) 66 { 67 opus_int n, ret = SILK_NO_ERROR; 68 silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state; 69 70 for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) { 71 ret = silk_init_decoder( &channel_state[ n ] ); 72 } 73 silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo)); 74 /* Not strictly needed, but it's cleaner that way */ 75 ((silk_decoder *)decState)->prev_decode_only_middle = 0; 76 77 return ret; 78 } 79 80 /* Decode a frame */ 81 opus_int silk_Decode( /* O Returns error code */ 82 void* decState, /* I/O State */ 83 silk_DecControlStruct* decControl, /* I/O Control Structure */ 84 opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ 85 opus_int newPacketFlag, /* I Indicates first decoder call for this packet */ 86 ec_dec *psRangeDec, /* I/O Compressor data structure */ 87 opus_int16 *samplesOut, /* O Decoded output speech vector */ 88 opus_int32 *nSamplesOut, /* O Number of samples decoded */ 89 int arch /* I Run-time architecture */ 90 ) 91 { 92 opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; 93 opus_int32 nSamplesOutDec, LBRR_symbol; 94 opus_int16 *samplesOut1_tmp[ 2 ]; 95 VARDECL( opus_int16, samplesOut1_tmp_storage1 ); 96 VARDECL( opus_int16, samplesOut1_tmp_storage2 ); 97 VARDECL( opus_int16, samplesOut2_tmp ); 98 opus_int32 MS_pred_Q13[ 2 ] = { 0 }; 99 opus_int16 *resample_out_ptr; 100 silk_decoder *psDec = ( silk_decoder * )decState; 101 silk_decoder_state *channel_state = psDec->channel_state; 102 opus_int has_side; 103 opus_int stereo_to_mono; 104 int delay_stack_alloc; 105 SAVE_STACK; 106 107 silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); 108 109 /**********************************/ 110 /* Test if first frame in payload */ 111 /**********************************/ 112 if( newPacketFlag ) { 113 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 114 channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */ 115 } 116 } 117 118 /* If Mono -> Stereo transition in bitstream: init state of second channel */ 119 if( decControl->nChannelsInternal > psDec->nChannelsInternal ) { 120 ret += silk_init_decoder( &channel_state[ 1 ] ); 121 } 122 123 stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 && 124 ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz ); 125 126 if( channel_state[ 0 ].nFramesDecoded == 0 ) { 127 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 128 opus_int fs_kHz_dec; 129 if( decControl->payloadSize_ms == 0 ) { 130 /* Assuming packet loss, use 10 ms */ 131 channel_state[ n ].nFramesPerPacket = 1; 132 channel_state[ n ].nb_subfr = 2; 133 } else if( decControl->payloadSize_ms == 10 ) { 134 channel_state[ n ].nFramesPerPacket = 1; 135 channel_state[ n ].nb_subfr = 2; 136 } else if( decControl->payloadSize_ms == 20 ) { 137 channel_state[ n ].nFramesPerPacket = 1; 138 channel_state[ n ].nb_subfr = 4; 139 } else if( decControl->payloadSize_ms == 40 ) { 140 channel_state[ n ].nFramesPerPacket = 2; 141 channel_state[ n ].nb_subfr = 4; 142 } else if( decControl->payloadSize_ms == 60 ) { 143 channel_state[ n ].nFramesPerPacket = 3; 144 channel_state[ n ].nb_subfr = 4; 145 } else { 146 silk_assert( 0 ); 147 RESTORE_STACK; 148 return SILK_DEC_INVALID_FRAME_SIZE; 149 } 150 fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1; 151 if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) { 152 silk_assert( 0 ); 153 RESTORE_STACK; 154 return SILK_DEC_INVALID_SAMPLING_FREQUENCY; 155 } 156 ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate ); 157 } 158 } 159 160 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) { 161 silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) ); 162 silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) ); 163 silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); 164 } 165 psDec->nChannelsAPI = decControl->nChannelsAPI; 166 psDec->nChannelsInternal = decControl->nChannelsInternal; 167 168 if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) { 169 ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY; 170 RESTORE_STACK; 171 return( ret ); 172 } 173 174 if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) { 175 /* First decoder call for this payload */ 176 /* Decode VAD flags and LBRR flag */ 177 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 178 for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { 179 channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1); 180 } 181 channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1); 182 } 183 /* Decode LBRR flags */ 184 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 185 silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) ); 186 if( channel_state[ n ].LBRR_flag ) { 187 if( channel_state[ n ].nFramesPerPacket == 1 ) { 188 channel_state[ n ].LBRR_flags[ 0 ] = 1; 189 } else { 190 LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1; 191 for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { 192 channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1; 193 } 194 } 195 } 196 } 197 198 if( lostFlag == FLAG_DECODE_NORMAL ) { 199 /* Regular decoding: skip all LBRR data */ 200 for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { 201 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 202 if( channel_state[ n ].LBRR_flags[ i ] ) { 203 opus_int16 pulses[ MAX_FRAME_LENGTH ]; 204 opus_int condCoding; 205 206 if( decControl->nChannelsInternal == 2 && n == 0 ) { 207 silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); 208 if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) { 209 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); 210 } 211 } 212 /* Use conditional coding if previous frame available */ 213 if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) { 214 condCoding = CODE_CONDITIONALLY; 215 } else { 216 condCoding = CODE_INDEPENDENTLY; 217 } 218 silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding ); 219 silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType, 220 channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length ); 221 } 222 } 223 } 224 } 225 } 226 227 /* Get MS predictor index */ 228 if( decControl->nChannelsInternal == 2 ) { 229 if( lostFlag == FLAG_DECODE_NORMAL || 230 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) ) 231 { 232 silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); 233 /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */ 234 if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) || 235 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ) 236 { 237 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); 238 } else { 239 decode_only_middle = 0; 240 } 241 } else { 242 for( n = 0; n < 2; n++ ) { 243 MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ]; 244 } 245 } 246 } 247 248 /* Reset side channel decoder prediction memory for first frame with side coding */ 249 if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) { 250 silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) ); 251 silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) ); 252 psDec->channel_state[ 1 ].lagPrev = 100; 253 psDec->channel_state[ 1 ].LastGainIndex = 10; 254 psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY; 255 psDec->channel_state[ 1 ].first_frame_after_reset = 1; 256 } 257 258 /* Check if the temp buffer fits into the output PCM buffer. If it fits, 259 we can delay allocating the temp buffer until after the SILK peak stack 260 usage. We need to use a < and not a <= because of the two extra samples. */ 261 delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal 262 < decControl->API_sampleRate*decControl->nChannelsAPI; 263 ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE 264 : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ), 265 opus_int16 ); 266 if ( delay_stack_alloc ) 267 { 268 samplesOut1_tmp[ 0 ] = samplesOut; 269 samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2; 270 } else { 271 samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1; 272 samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2; 273 } 274 275 if( lostFlag == FLAG_DECODE_NORMAL ) { 276 has_side = !decode_only_middle; 277 } else { 278 has_side = !psDec->prev_decode_only_middle 279 || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 ); 280 } 281 /* Call decoder for one frame */ 282 for( n = 0; n < decControl->nChannelsInternal; n++ ) { 283 if( n == 0 || has_side ) { 284 opus_int FrameIndex; 285 opus_int condCoding; 286 287 FrameIndex = channel_state[ 0 ].nFramesDecoded - n; 288 /* Use independent coding if no previous frame available */ 289 if( FrameIndex <= 0 ) { 290 condCoding = CODE_INDEPENDENTLY; 291 } else if( lostFlag == FLAG_DECODE_LBRR ) { 292 condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY; 293 } else if( n > 0 && psDec->prev_decode_only_middle ) { 294 /* If we skipped a side frame in this packet, we don't 295 need LTP scaling; the LTP state is well-defined. */ 296 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; 297 } else { 298 condCoding = CODE_CONDITIONALLY; 299 } 300 ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding, arch); 301 } else { 302 silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) ); 303 } 304 channel_state[ n ].nFramesDecoded++; 305 } 306 307 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { 308 /* Convert Mid/Side to Left/Right */ 309 silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); 310 } else { 311 /* Buffering */ 312 silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) ); 313 silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) ); 314 } 315 316 /* Number of output samples */ 317 *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) ); 318 319 /* Set up pointers to temp buffers */ 320 ALLOC( samplesOut2_tmp, 321 decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 ); 322 if( decControl->nChannelsAPI == 2 ) { 323 resample_out_ptr = samplesOut2_tmp; 324 } else { 325 resample_out_ptr = samplesOut; 326 } 327 328 ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc 329 ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ) 330 : ALLOC_NONE, 331 opus_int16 ); 332 if ( delay_stack_alloc ) { 333 OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2)); 334 samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2; 335 samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2; 336 } 337 for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { 338 339 /* Resample decoded signal to API_sampleRate */ 340 ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec ); 341 342 /* Interleave if stereo output and stereo stream */ 343 if( decControl->nChannelsAPI == 2 ) { 344 for( i = 0; i < *nSamplesOut; i++ ) { 345 samplesOut[ n + 2 * i ] = resample_out_ptr[ i ]; 346 } 347 } 348 } 349 350 /* Create two channel output from mono stream */ 351 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) { 352 if ( stereo_to_mono ){ 353 /* Resample right channel for newly collapsed stereo just in case 354 we weren't doing collapsing when switching to mono */ 355 ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec ); 356 357 for( i = 0; i < *nSamplesOut; i++ ) { 358 samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ]; 359 } 360 } else { 361 for( i = 0; i < *nSamplesOut; i++ ) { 362 samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ]; 363 } 364 } 365 } 366 367 /* Export pitch lag, measured at 48 kHz sampling rate */ 368 if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) { 369 int mult_tab[ 3 ] = { 6, 4, 3 }; 370 decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ]; 371 } else { 372 decControl->prevPitchLag = 0; 373 } 374 375 if( lostFlag == FLAG_PACKET_LOST ) { 376 /* On packet loss, remove the gain clamping to prevent having the energy "bounce back" 377 if we lose packets when the energy is going down */ 378 for ( i = 0; i < psDec->nChannelsInternal; i++ ) 379 psDec->channel_state[ i ].LastGainIndex = 10; 380 } else { 381 psDec->prev_decode_only_middle = decode_only_middle; 382 } 383 RESTORE_STACK; 384 return ret; 385 } 386 387 #if 0 388 /* Getting table of contents for a packet */ 389 opus_int silk_get_TOC( 390 const opus_uint8 *payload, /* I Payload data */ 391 const opus_int nBytesIn, /* I Number of input bytes */ 392 const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */ 393 silk_TOC_struct *Silk_TOC /* O Type of content */ 394 ) 395 { 396 opus_int i, flags, ret = SILK_NO_ERROR; 397 398 if( nBytesIn < 1 ) { 399 return -1; 400 } 401 if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) { 402 return -1; 403 } 404 405 silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) ); 406 407 /* For stereo, extract the flags for the mid channel */ 408 flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 ); 409 410 Silk_TOC->inbandFECFlag = flags & 1; 411 for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) { 412 flags = silk_RSHIFT( flags, 1 ); 413 Silk_TOC->VADFlags[ i ] = flags & 1; 414 Silk_TOC->VADFlag |= flags & 1; 415 } 416 417 return ret; 418 } 419 #endif 420