Home | History | Annotate | Download | only in silk
      1 /***********************************************************************
      2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
      3 Redistribution and use in source and binary forms, with or without
      4 modification, are permitted provided that the following conditions
      5 are met:
      6 - Redistributions of source code must retain the above copyright notice,
      7 this list of conditions and the following disclaimer.
      8 - Redistributions in binary form must reproduce the above copyright
      9 notice, this list of conditions and the following disclaimer in the
     10 documentation and/or other materials provided with the distribution.
     11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
     12 names of specific contributors, may be used to endorse or promote
     13 products derived from this software without specific prior written
     14 permission.
     15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     25 POSSIBILITY OF SUCH DAMAGE.
     26 ***********************************************************************/
     27 
     28 #ifdef HAVE_CONFIG_H
     29 #include "config.h"
     30 #endif
     31 #include "API.h"
     32 #include "main.h"
     33 #include "stack_alloc.h"
     34 
     35 /************************/
     36 /* Decoder Super Struct */
     37 /************************/
     38 typedef struct {
     39     silk_decoder_state          channel_state[ DECODER_NUM_CHANNELS ];
     40     stereo_dec_state                sStereo;
     41     opus_int                         nChannelsAPI;
     42     opus_int                         nChannelsInternal;
     43     opus_int                         prev_decode_only_middle;
     44 } silk_decoder;
     45 
     46 /*********************/
     47 /* Decoder functions */
     48 /*********************/
     49 
     50 opus_int silk_Get_Decoder_Size(                         /* O    Returns error code                              */
     51     opus_int                        *decSizeBytes       /* O    Number of bytes in SILK decoder state           */
     52 )
     53 {
     54     opus_int ret = SILK_NO_ERROR;
     55 
     56     *decSizeBytes = sizeof( silk_decoder );
     57 
     58     return ret;
     59 }
     60 
     61 /* Reset decoder state */
     62 opus_int silk_InitDecoder(                              /* O    Returns error code                              */
     63     void                            *decState           /* I/O  State                                           */
     64 )
     65 {
     66     opus_int n, ret = SILK_NO_ERROR;
     67     silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
     68 
     69     for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
     70         ret  = silk_init_decoder( &channel_state[ n ] );
     71     }
     72     silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo));
     73     /* Not strictly needed, but it's cleaner that way */
     74     ((silk_decoder *)decState)->prev_decode_only_middle = 0;
     75 
     76     return ret;
     77 }
     78 
     79 /* Decode a frame */
     80 opus_int silk_Decode(                                   /* O    Returns error code                              */
     81     void*                           decState,           /* I/O  State                                           */
     82     silk_DecControlStruct*          decControl,         /* I/O  Control Structure                               */
     83     opus_int                        lostFlag,           /* I    0: no loss, 1 loss, 2 decode fec                */
     84     opus_int                        newPacketFlag,      /* I    Indicates first decoder call for this packet    */
     85     ec_dec                          *psRangeDec,        /* I/O  Compressor data structure                       */
     86     opus_int16                      *samplesOut,        /* O    Decoded output speech vector                    */
     87     opus_int32                      *nSamplesOut        /* O    Number of samples decoded                       */
     88 )
     89 {
     90     opus_int   i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
     91     opus_int32 nSamplesOutDec, LBRR_symbol;
     92     opus_int16 *samplesOut1_tmp[ 2 ];
     93     VARDECL( opus_int16, samplesOut1_tmp_storage );
     94     VARDECL( opus_int16, samplesOut2_tmp );
     95     opus_int32 MS_pred_Q13[ 2 ] = { 0 };
     96     opus_int16 *resample_out_ptr;
     97     silk_decoder *psDec = ( silk_decoder * )decState;
     98     silk_decoder_state *channel_state = psDec->channel_state;
     99     opus_int has_side;
    100     opus_int stereo_to_mono;
    101     SAVE_STACK;
    102 
    103     silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
    104 
    105     /**********************************/
    106     /* Test if first frame in payload */
    107     /**********************************/
    108     if( newPacketFlag ) {
    109         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
    110             channel_state[ n ].nFramesDecoded = 0;  /* Used to count frames in packet */
    111         }
    112     }
    113 
    114     /* If Mono -> Stereo transition in bitstream: init state of second channel */
    115     if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
    116         ret += silk_init_decoder( &channel_state[ 1 ] );
    117     }
    118 
    119     stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 &&
    120                      ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz );
    121 
    122     if( channel_state[ 0 ].nFramesDecoded == 0 ) {
    123         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
    124             opus_int fs_kHz_dec;
    125             if( decControl->payloadSize_ms == 0 ) {
    126                 /* Assuming packet loss, use 10 ms */
    127                 channel_state[ n ].nFramesPerPacket = 1;
    128                 channel_state[ n ].nb_subfr = 2;
    129             } else if( decControl->payloadSize_ms == 10 ) {
    130                 channel_state[ n ].nFramesPerPacket = 1;
    131                 channel_state[ n ].nb_subfr = 2;
    132             } else if( decControl->payloadSize_ms == 20 ) {
    133                 channel_state[ n ].nFramesPerPacket = 1;
    134                 channel_state[ n ].nb_subfr = 4;
    135             } else if( decControl->payloadSize_ms == 40 ) {
    136                 channel_state[ n ].nFramesPerPacket = 2;
    137                 channel_state[ n ].nb_subfr = 4;
    138             } else if( decControl->payloadSize_ms == 60 ) {
    139                 channel_state[ n ].nFramesPerPacket = 3;
    140                 channel_state[ n ].nb_subfr = 4;
    141             } else {
    142                 silk_assert( 0 );
    143                 RESTORE_STACK;
    144                 return SILK_DEC_INVALID_FRAME_SIZE;
    145             }
    146             fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
    147             if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
    148                 silk_assert( 0 );
    149                 RESTORE_STACK;
    150                 return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
    151             }
    152             ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
    153         }
    154     }
    155 
    156     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
    157         silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
    158         silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
    159         silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
    160     }
    161     psDec->nChannelsAPI      = decControl->nChannelsAPI;
    162     psDec->nChannelsInternal = decControl->nChannelsInternal;
    163 
    164     if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
    165         ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
    166         RESTORE_STACK;
    167         return( ret );
    168     }
    169 
    170     if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) {
    171         /* First decoder call for this payload */
    172         /* Decode VAD flags and LBRR flag */
    173         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
    174             for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
    175                 channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1);
    176             }
    177             channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1);
    178         }
    179         /* Decode LBRR flags */
    180         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
    181             silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) );
    182             if( channel_state[ n ].LBRR_flag ) {
    183                 if( channel_state[ n ].nFramesPerPacket == 1 ) {
    184                     channel_state[ n ].LBRR_flags[ 0 ] = 1;
    185                 } else {
    186                     LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1;
    187                     for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
    188                         channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1;
    189                     }
    190                 }
    191             }
    192         }
    193 
    194         if( lostFlag == FLAG_DECODE_NORMAL ) {
    195             /* Regular decoding: skip all LBRR data */
    196             for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
    197                 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
    198                     if( channel_state[ n ].LBRR_flags[ i ] ) {
    199                         opus_int pulses[ MAX_FRAME_LENGTH ];
    200                         opus_int condCoding;
    201 
    202                         if( decControl->nChannelsInternal == 2 && n == 0 ) {
    203                             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
    204                             if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) {
    205                                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
    206                             }
    207                         }
    208                         /* Use conditional coding if previous frame available */
    209                         if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) {
    210                             condCoding = CODE_CONDITIONALLY;
    211                         } else {
    212                             condCoding = CODE_INDEPENDENTLY;
    213                         }
    214                         silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding );
    215                         silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType,
    216                             channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length );
    217                     }
    218                 }
    219             }
    220         }
    221     }
    222 
    223     /* Get MS predictor index */
    224     if( decControl->nChannelsInternal == 2 ) {
    225         if(   lostFlag == FLAG_DECODE_NORMAL ||
    226             ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
    227         {
    228             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
    229             /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
    230             if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ||
    231                 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
    232             {
    233                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
    234             } else {
    235                 decode_only_middle = 0;
    236             }
    237         } else {
    238             for( n = 0; n < 2; n++ ) {
    239                 MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ];
    240             }
    241         }
    242     }
    243 
    244     /* Reset side channel decoder prediction memory for first frame with side coding */
    245     if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) {
    246         silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) );
    247         silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) );
    248         psDec->channel_state[ 1 ].lagPrev        = 100;
    249         psDec->channel_state[ 1 ].LastGainIndex  = 10;
    250         psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY;
    251         psDec->channel_state[ 1 ].first_frame_after_reset = 1;
    252     }
    253 
    254     ALLOC( samplesOut1_tmp_storage,
    255            decControl->nChannelsInternal*(
    256                channel_state[ 0 ].frame_length + 2 ),
    257            opus_int16 );
    258     samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
    259     samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
    260                            + channel_state[ 0 ].frame_length + 2;
    261 
    262     if( lostFlag == FLAG_DECODE_NORMAL ) {
    263         has_side = !decode_only_middle;
    264     } else {
    265         has_side = !psDec->prev_decode_only_middle
    266               || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 );
    267     }
    268     /* Call decoder for one frame */
    269     for( n = 0; n < decControl->nChannelsInternal; n++ ) {
    270         if( n == 0 || has_side ) {
    271             opus_int FrameIndex;
    272             opus_int condCoding;
    273 
    274             FrameIndex = channel_state[ 0 ].nFramesDecoded - n;
    275             /* Use independent coding if no previous frame available */
    276             if( FrameIndex <= 0 ) {
    277                 condCoding = CODE_INDEPENDENTLY;
    278             } else if( lostFlag == FLAG_DECODE_LBRR ) {
    279                 condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY;
    280             } else if( n > 0 && psDec->prev_decode_only_middle ) {
    281                 /* If we skipped a side frame in this packet, we don't
    282                    need LTP scaling; the LTP state is well-defined. */
    283                 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
    284             } else {
    285                 condCoding = CODE_CONDITIONALLY;
    286             }
    287             ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding);
    288         } else {
    289             silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
    290         }
    291         channel_state[ n ].nFramesDecoded++;
    292     }
    293 
    294     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
    295         /* Convert Mid/Side to Left/Right */
    296         silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
    297     } else {
    298         /* Buffering */
    299         silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
    300         silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) );
    301     }
    302 
    303     /* Number of output samples */
    304     *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
    305 
    306     /* Set up pointers to temp buffers */
    307     ALLOC( samplesOut2_tmp,
    308            decControl->nChannelsAPI == 2 ? *nSamplesOut : 0, opus_int16 );
    309     if( decControl->nChannelsAPI == 2 ) {
    310         resample_out_ptr = samplesOut2_tmp;
    311     } else {
    312         resample_out_ptr = samplesOut;
    313     }
    314 
    315     for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
    316 
    317         /* Resample decoded signal to API_sampleRate */
    318         ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
    319 
    320         /* Interleave if stereo output and stereo stream */
    321         if( decControl->nChannelsAPI == 2 ) {
    322             for( i = 0; i < *nSamplesOut; i++ ) {
    323                 samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
    324             }
    325         }
    326     }
    327 
    328     /* Create two channel output from mono stream */
    329     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) {
    330         if ( stereo_to_mono ){
    331             /* Resample right channel for newly collapsed stereo just in case
    332                we weren't doing collapsing when switching to mono */
    333             ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec );
    334 
    335             for( i = 0; i < *nSamplesOut; i++ ) {
    336                 samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
    337             }
    338         } else {
    339             for( i = 0; i < *nSamplesOut; i++ ) {
    340                 samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ];
    341             }
    342         }
    343     }
    344 
    345     /* Export pitch lag, measured at 48 kHz sampling rate */
    346     if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) {
    347         int mult_tab[ 3 ] = { 6, 4, 3 };
    348         decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ];
    349     } else {
    350         decControl->prevPitchLag = 0;
    351     }
    352 
    353     if( lostFlag == FLAG_PACKET_LOST ) {
    354        /* On packet loss, remove the gain clamping to prevent having the energy "bounce back"
    355           if we lose packets when the energy is going down */
    356        for ( i = 0; i < psDec->nChannelsInternal; i++ )
    357           psDec->channel_state[ i ].LastGainIndex = 10;
    358     } else {
    359        psDec->prev_decode_only_middle = decode_only_middle;
    360     }
    361     RESTORE_STACK;
    362     return ret;
    363 }
    364 
    365 #if 0
    366 /* Getting table of contents for a packet */
    367 opus_int silk_get_TOC(
    368     const opus_uint8                *payload,           /* I    Payload data                                */
    369     const opus_int                  nBytesIn,           /* I    Number of input bytes                       */
    370     const opus_int                  nFramesPerPayload,  /* I    Number of SILK frames per payload           */
    371     silk_TOC_struct                 *Silk_TOC           /* O    Type of content                             */
    372 )
    373 {
    374     opus_int i, flags, ret = SILK_NO_ERROR;
    375 
    376     if( nBytesIn < 1 ) {
    377         return -1;
    378     }
    379     if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) {
    380         return -1;
    381     }
    382 
    383     silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) );
    384 
    385     /* For stereo, extract the flags for the mid channel */
    386     flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 );
    387 
    388     Silk_TOC->inbandFECFlag = flags & 1;
    389     for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) {
    390         flags = silk_RSHIFT( flags, 1 );
    391         Silk_TOC->VADFlags[ i ] = flags & 1;
    392         Silk_TOC->VADFlag |= flags & 1;
    393     }
    394 
    395     return ret;
    396 }
    397 #endif
    398