Home | History | Annotate | Download | only in silk
      1 /***********************************************************************
      2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
      3 Redistribution and use in source and binary forms, with or without
      4 modification, are permitted provided that the following conditions
      5 are met:
      6 - Redistributions of source code must retain the above copyright notice,
      7 this list of conditions and the following disclaimer.
      8 - Redistributions in binary form must reproduce the above copyright
      9 notice, this list of conditions and the following disclaimer in the
     10 documentation and/or other materials provided with the distribution.
     11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
     12 names of specific contributors, may be used to endorse or promote
     13 products derived from this software without specific prior written
     14 permission.
     15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
     16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     25 POSSIBILITY OF SUCH DAMAGE.
     26 ***********************************************************************/
     27 
     28 #ifdef HAVE_CONFIG_H
     29 #include "config.h"
     30 #endif
     31 #include "API.h"
     32 #include "main.h"
     33 #include "stack_alloc.h"
     34 
     35 /************************/
     36 /* Decoder Super Struct */
     37 /************************/
     38 typedef struct {
     39     silk_decoder_state          channel_state[ DECODER_NUM_CHANNELS ];
     40     stereo_dec_state                sStereo;
     41     opus_int                         nChannelsAPI;
     42     opus_int                         nChannelsInternal;
     43     opus_int                         prev_decode_only_middle;
     44 } silk_decoder;
     45 
     46 /*********************/
     47 /* Decoder functions */
     48 /*********************/
     49 
     50 opus_int silk_Get_Decoder_Size(                         /* O    Returns error code                              */
     51     opus_int                        *decSizeBytes       /* O    Number of bytes in SILK decoder state           */
     52 )
     53 {
     54     opus_int ret = SILK_NO_ERROR;
     55 
     56     *decSizeBytes = sizeof( silk_decoder );
     57 
     58     return ret;
     59 }
     60 
     61 /* Reset decoder state */
     62 opus_int silk_InitDecoder(                              /* O    Returns error code                              */
     63     void                            *decState           /* I/O  State                                           */
     64 )
     65 {
     66     opus_int n, ret = SILK_NO_ERROR;
     67     silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
     68 
     69     for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
     70         ret  = silk_init_decoder( &channel_state[ n ] );
     71     }
     72 
     73     return ret;
     74 }
     75 
     76 /* Decode a frame */
     77 opus_int silk_Decode(                                   /* O    Returns error code                              */
     78     void*                           decState,           /* I/O  State                                           */
     79     silk_DecControlStruct*          decControl,         /* I/O  Control Structure                               */
     80     opus_int                        lostFlag,           /* I    0: no loss, 1 loss, 2 decode fec                */
     81     opus_int                        newPacketFlag,      /* I    Indicates first decoder call for this packet    */
     82     ec_dec                          *psRangeDec,        /* I/O  Compressor data structure                       */
     83     opus_int16                      *samplesOut,        /* O    Decoded output speech vector                    */
     84     opus_int32                      *nSamplesOut        /* O    Number of samples decoded                       */
     85 )
     86 {
     87     opus_int   i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
     88     opus_int32 nSamplesOutDec, LBRR_symbol;
     89     opus_int16 *samplesOut1_tmp[ 2 ];
     90     VARDECL( opus_int16, samplesOut1_tmp_storage );
     91     VARDECL( opus_int16, samplesOut2_tmp );
     92     opus_int32 MS_pred_Q13[ 2 ] = { 0 };
     93     opus_int16 *resample_out_ptr;
     94     silk_decoder *psDec = ( silk_decoder * )decState;
     95     silk_decoder_state *channel_state = psDec->channel_state;
     96     opus_int has_side;
     97     opus_int stereo_to_mono;
     98     SAVE_STACK;
     99 
    100     /**********************************/
    101     /* Test if first frame in payload */
    102     /**********************************/
    103     if( newPacketFlag ) {
    104         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
    105             channel_state[ n ].nFramesDecoded = 0;  /* Used to count frames in packet */
    106         }
    107     }
    108 
    109     /* If Mono -> Stereo transition in bitstream: init state of second channel */
    110     if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
    111         ret += silk_init_decoder( &channel_state[ 1 ] );
    112     }
    113 
    114     stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 &&
    115                      ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz );
    116 
    117     if( channel_state[ 0 ].nFramesDecoded == 0 ) {
    118         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
    119             opus_int fs_kHz_dec;
    120             if( decControl->payloadSize_ms == 0 ) {
    121                 /* Assuming packet loss, use 10 ms */
    122                 channel_state[ n ].nFramesPerPacket = 1;
    123                 channel_state[ n ].nb_subfr = 2;
    124             } else if( decControl->payloadSize_ms == 10 ) {
    125                 channel_state[ n ].nFramesPerPacket = 1;
    126                 channel_state[ n ].nb_subfr = 2;
    127             } else if( decControl->payloadSize_ms == 20 ) {
    128                 channel_state[ n ].nFramesPerPacket = 1;
    129                 channel_state[ n ].nb_subfr = 4;
    130             } else if( decControl->payloadSize_ms == 40 ) {
    131                 channel_state[ n ].nFramesPerPacket = 2;
    132                 channel_state[ n ].nb_subfr = 4;
    133             } else if( decControl->payloadSize_ms == 60 ) {
    134                 channel_state[ n ].nFramesPerPacket = 3;
    135                 channel_state[ n ].nb_subfr = 4;
    136             } else {
    137                 silk_assert( 0 );
    138                 RESTORE_STACK;
    139                 return SILK_DEC_INVALID_FRAME_SIZE;
    140             }
    141             fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
    142             if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
    143                 silk_assert( 0 );
    144                 RESTORE_STACK;
    145                 return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
    146             }
    147             ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
    148         }
    149     }
    150 
    151     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
    152         silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
    153         silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
    154         silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
    155     }
    156     psDec->nChannelsAPI      = decControl->nChannelsAPI;
    157     psDec->nChannelsInternal = decControl->nChannelsInternal;
    158 
    159     if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
    160         ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
    161         RESTORE_STACK;
    162         return( ret );
    163     }
    164 
    165     if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) {
    166         /* First decoder call for this payload */
    167         /* Decode VAD flags and LBRR flag */
    168         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
    169             for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
    170                 channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1);
    171             }
    172             channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1);
    173         }
    174         /* Decode LBRR flags */
    175         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
    176             silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) );
    177             if( channel_state[ n ].LBRR_flag ) {
    178                 if( channel_state[ n ].nFramesPerPacket == 1 ) {
    179                     channel_state[ n ].LBRR_flags[ 0 ] = 1;
    180                 } else {
    181                     LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1;
    182                     for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
    183                         channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1;
    184                     }
    185                 }
    186             }
    187         }
    188 
    189         if( lostFlag == FLAG_DECODE_NORMAL ) {
    190             /* Regular decoding: skip all LBRR data */
    191             for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
    192                 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
    193                     if( channel_state[ n ].LBRR_flags[ i ] ) {
    194                         opus_int pulses[ MAX_FRAME_LENGTH ];
    195                         opus_int condCoding;
    196 
    197                         if( decControl->nChannelsInternal == 2 && n == 0 ) {
    198                             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
    199                             if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) {
    200                                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
    201                             }
    202                         }
    203                         /* Use conditional coding if previous frame available */
    204                         if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) {
    205                             condCoding = CODE_CONDITIONALLY;
    206                         } else {
    207                             condCoding = CODE_INDEPENDENTLY;
    208                         }
    209                         silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding );
    210                         silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType,
    211                             channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length );
    212                     }
    213                 }
    214             }
    215         }
    216     }
    217 
    218     /* Get MS predictor index */
    219     if( decControl->nChannelsInternal == 2 ) {
    220         if(   lostFlag == FLAG_DECODE_NORMAL ||
    221             ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
    222         {
    223             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
    224             /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
    225             if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ||
    226                 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
    227             {
    228                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
    229             } else {
    230                 decode_only_middle = 0;
    231             }
    232         } else {
    233             for( n = 0; n < 2; n++ ) {
    234                 MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ];
    235             }
    236         }
    237     }
    238 
    239     /* Reset side channel decoder prediction memory for first frame with side coding */
    240     if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) {
    241         silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) );
    242         silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) );
    243         psDec->channel_state[ 1 ].lagPrev        = 100;
    244         psDec->channel_state[ 1 ].LastGainIndex  = 10;
    245         psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY;
    246         psDec->channel_state[ 1 ].first_frame_after_reset = 1;
    247     }
    248 
    249     ALLOC( samplesOut1_tmp_storage,
    250            decControl->nChannelsInternal*(
    251                channel_state[ 0 ].frame_length + 2 ),
    252            opus_int16 );
    253     samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
    254     samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
    255                            + channel_state[ 0 ].frame_length + 2;
    256 
    257     if( lostFlag == FLAG_DECODE_NORMAL ) {
    258         has_side = !decode_only_middle;
    259     } else {
    260         has_side = !psDec->prev_decode_only_middle
    261               || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 );
    262     }
    263     /* Call decoder for one frame */
    264     for( n = 0; n < decControl->nChannelsInternal; n++ ) {
    265         if( n == 0 || has_side ) {
    266             opus_int FrameIndex;
    267             opus_int condCoding;
    268 
    269             FrameIndex = channel_state[ 0 ].nFramesDecoded - n;
    270             /* Use independent coding if no previous frame available */
    271             if( FrameIndex <= 0 ) {
    272                 condCoding = CODE_INDEPENDENTLY;
    273             } else if( lostFlag == FLAG_DECODE_LBRR ) {
    274                 condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY;
    275             } else if( n > 0 && psDec->prev_decode_only_middle ) {
    276                 /* If we skipped a side frame in this packet, we don't
    277                    need LTP scaling; the LTP state is well-defined. */
    278                 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
    279             } else {
    280                 condCoding = CODE_CONDITIONALLY;
    281             }
    282             ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding);
    283         } else {
    284             silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
    285         }
    286         channel_state[ n ].nFramesDecoded++;
    287     }
    288 
    289     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
    290         /* Convert Mid/Side to Left/Right */
    291         silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
    292     } else {
    293         /* Buffering */
    294         silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
    295         silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) );
    296     }
    297 
    298     /* Number of output samples */
    299     *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
    300 
    301     /* Set up pointers to temp buffers */
    302     ALLOC( samplesOut2_tmp,
    303            decControl->nChannelsAPI == 2 ? *nSamplesOut : 0, opus_int16 );
    304     if( decControl->nChannelsAPI == 2 ) {
    305         resample_out_ptr = samplesOut2_tmp;
    306     } else {
    307         resample_out_ptr = samplesOut;
    308     }
    309 
    310     for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
    311 
    312         /* Resample decoded signal to API_sampleRate */
    313         ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
    314 
    315         /* Interleave if stereo output and stereo stream */
    316         if( decControl->nChannelsAPI == 2 ) {
    317             for( i = 0; i < *nSamplesOut; i++ ) {
    318                 samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
    319             }
    320         }
    321     }
    322 
    323     /* Create two channel output from mono stream */
    324     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) {
    325         if ( stereo_to_mono ){
    326             /* Resample right channel for newly collapsed stereo just in case
    327                we weren't doing collapsing when switching to mono */
    328             ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec );
    329 
    330             for( i = 0; i < *nSamplesOut; i++ ) {
    331                 samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
    332             }
    333         } else {
    334             for( i = 0; i < *nSamplesOut; i++ ) {
    335                 samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ];
    336             }
    337         }
    338     }
    339 
    340     /* Export pitch lag, measured at 48 kHz sampling rate */
    341     if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) {
    342         int mult_tab[ 3 ] = { 6, 4, 3 };
    343         decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ];
    344     } else {
    345         decControl->prevPitchLag = 0;
    346     }
    347 
    348     if( lostFlag == FLAG_PACKET_LOST ) {
    349        /* On packet loss, remove the gain clamping to prevent having the energy "bounce back"
    350           if we lose packets when the energy is going down */
    351        for ( i = 0; i < psDec->nChannelsInternal; i++ )
    352           psDec->channel_state[ i ].LastGainIndex = 10;
    353     } else {
    354        psDec->prev_decode_only_middle = decode_only_middle;
    355     }
    356     RESTORE_STACK;
    357     return ret;
    358 }
    359 
    360 #if 0
    361 /* Getting table of contents for a packet */
    362 opus_int silk_get_TOC(
    363     const opus_uint8                *payload,           /* I    Payload data                                */
    364     const opus_int                  nBytesIn,           /* I    Number of input bytes                       */
    365     const opus_int                  nFramesPerPayload,  /* I    Number of SILK frames per payload           */
    366     silk_TOC_struct                 *Silk_TOC           /* O    Type of content                             */
    367 )
    368 {
    369     opus_int i, flags, ret = SILK_NO_ERROR;
    370 
    371     if( nBytesIn < 1 ) {
    372         return -1;
    373     }
    374     if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) {
    375         return -1;
    376     }
    377 
    378     silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) );
    379 
    380     /* For stereo, extract the flags for the mid channel */
    381     flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 );
    382 
    383     Silk_TOC->inbandFECFlag = flags & 1;
    384     for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) {
    385         flags = silk_RSHIFT( flags, 1 );
    386         Silk_TOC->VADFlags[ i ] = flags & 1;
    387         Silk_TOC->VADFlag |= flags & 1;
    388     }
    389 
    390     return ret;
    391 }
    392 #endif
    393