Home | History | Annotate | Download | only in fixed
      1 /***********************************************************************
      2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
      3 Redistribution and use in source and binary forms, with or without
      4 modification, are permitted provided that the following conditions
      5 are met:
      6 - Redistributions of source code must retain the above copyright notice,
      7 this list of conditions and the following disclaimer.
      8 - Redistributions in binary form must reproduce the above copyright
      9 notice, this list of conditions and the following disclaimer in the
     10 documentation and/or other materials provided with the distribution.
     11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
     12 names of specific contributors, may be used to endorse or promote
     13 products derived from this software without specific prior written
     14 permission.
     15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     25 POSSIBILITY OF SUCH DAMAGE.
     26 ***********************************************************************/
     27 
     28 #ifdef HAVE_CONFIG_H
     29 #include "config.h"
     30 #endif
     31 
     32 /***********************************************************
     33 * Pitch analyser function
     34 ********************************************************** */
     35 #include "SigProc_FIX.h"
     36 #include "pitch_est_defines.h"
     37 #include "stack_alloc.h"
     38 #include "debug.h"
     39 #include "pitch.h"
     40 
     41 #define SCRATCH_SIZE    22
     42 #define SF_LENGTH_4KHZ  ( PE_SUBFR_LENGTH_MS * 4 )
     43 #define SF_LENGTH_8KHZ  ( PE_SUBFR_LENGTH_MS * 8 )
     44 #define MIN_LAG_4KHZ    ( PE_MIN_LAG_MS * 4 )
     45 #define MIN_LAG_8KHZ    ( PE_MIN_LAG_MS * 8 )
     46 #define MAX_LAG_4KHZ    ( PE_MAX_LAG_MS * 4 )
     47 #define MAX_LAG_8KHZ    ( PE_MAX_LAG_MS * 8 - 1 )
     48 #define CSTRIDE_4KHZ    ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ )
     49 #define CSTRIDE_8KHZ    ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) )
     50 #define D_COMP_MIN      ( MIN_LAG_8KHZ - 3 )
     51 #define D_COMP_MAX      ( MAX_LAG_8KHZ + 4 )
     52 #define D_COMP_STRIDE   ( D_COMP_MAX - D_COMP_MIN )
     53 
     54 typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ];
     55 
     56 /************************************************************/
     57 /* Internally used functions                                */
     58 /************************************************************/
     59 static void silk_P_Ana_calc_corr_st3(
     60     silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */
     61     const opus_int16  frame[],                         /* I vector to correlate         */
     62     opus_int          start_lag,                       /* I lag offset to search around */
     63     opus_int          sf_length,                       /* I length of a 5 ms subframe   */
     64     opus_int          nb_subfr,                        /* I number of subframes         */
     65     opus_int          complexity,                      /* I Complexity setting          */
     66     int               arch                             /* I Run-time architecture       */
     67 );
     68 
     69 static void silk_P_Ana_calc_energy_st3(
     70     silk_pe_stage3_vals energies_st3[],                /* O 3 DIM energy array */
     71     const opus_int16  frame[],                         /* I vector to calc energy in    */
     72     opus_int          start_lag,                       /* I lag offset to search around */
     73     opus_int          sf_length,                       /* I length of one 5 ms subframe */
     74     opus_int          nb_subfr,                        /* I number of subframes         */
     75     opus_int          complexity,                      /* I Complexity setting          */
     76     int               arch                             /* I Run-time architecture       */
     77 );
     78 
     79 /*************************************************************/
     80 /*      FIXED POINT CORE PITCH ANALYSIS FUNCTION             */
     81 /*************************************************************/
     82 opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
     83     const opus_int16            *frame_unscaled,    /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
     84     opus_int                    *pitch_out,         /* O    4 pitch lag values                                          */
     85     opus_int16                  *lagIndex,          /* O    Lag Index                                                   */
     86     opus_int8                   *contourIndex,      /* O    Pitch contour Index                                         */
     87     opus_int                    *LTPCorr_Q15,       /* I/O  Normalized correlation; input: value from previous frame    */
     88     opus_int                    prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
     89     const opus_int32            search_thres1_Q16,  /* I    First stage threshold for lag candidates 0 - 1              */
     90     const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */
     91     const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */
     92     const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
     93     const opus_int              nb_subfr,           /* I    number of 5 ms subframes                                    */
     94     int                         arch                /* I    Run-time architecture                                       */
     95 )
     96 {
     97     VARDECL( opus_int16, frame_8kHz_buf );
     98     VARDECL( opus_int16, frame_4kHz );
     99     VARDECL( opus_int16, frame_scaled );
    100     opus_int32 filt_state[ 6 ];
    101     const opus_int16 *frame, *frame_8kHz;
    102     opus_int   i, k, d, j;
    103     VARDECL( opus_int16, C );
    104     VARDECL( opus_int32, xcorr32 );
    105     const opus_int16 *target_ptr, *basis_ptr;
    106     opus_int32 cross_corr, normalizer, energy, energy_basis, energy_target;
    107     opus_int   d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp, shift;
    108     VARDECL( opus_int16, d_comp );
    109     opus_int32 sum, threshold, lag_counter;
    110     opus_int   CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;
    111     opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new;
    112     VARDECL( silk_pe_stage3_vals, energies_st3 );
    113     VARDECL( silk_pe_stage3_vals, cross_corr_st3 );
    114     opus_int   frame_length, frame_length_8kHz, frame_length_4kHz;
    115     opus_int   sf_length;
    116     opus_int   min_lag;
    117     opus_int   max_lag;
    118     opus_int32 contour_bias_Q15, diff;
    119     opus_int   nb_cbk_search, cbk_size;
    120     opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;
    121     const opus_int8 *Lag_CB_ptr;
    122     SAVE_STACK;
    123 
    124     /* Check for valid sampling frequency */
    125     silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
    126 
    127     /* Check for valid complexity setting */
    128     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
    129     silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
    130 
    131     silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) );
    132     silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) );
    133 
    134     /* Set up frame lengths max / min lag for the sampling frequency */
    135     frame_length      = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
    136     frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
    137     frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
    138     sf_length         = PE_SUBFR_LENGTH_MS * Fs_kHz;
    139     min_lag           = PE_MIN_LAG_MS * Fs_kHz;
    140     max_lag           = PE_MAX_LAG_MS * Fs_kHz - 1;
    141 
    142     /* Downscale input if necessary */
    143     silk_sum_sqr_shift( &energy, &shift, frame_unscaled, frame_length );
    144     shift += 3 - silk_CLZ32( energy );        /* at least two bits headroom */
    145     ALLOC( frame_scaled, frame_length, opus_int16 );
    146     if( shift > 0 ) {
    147         shift = silk_RSHIFT( shift + 1, 1 );
    148         for( i = 0; i < frame_length; i++ ) {
    149             frame_scaled[ i ] = silk_RSHIFT( frame_unscaled[ i ], shift );
    150         }
    151         frame = frame_scaled;
    152     } else {
    153         frame = frame_unscaled;
    154     }
    155 
    156     ALLOC( frame_8kHz_buf, ( Fs_kHz == 8 ) ? 1 : frame_length_8kHz, opus_int16 );
    157     /* Resample from input sampled at Fs_kHz to 8 kHz */
    158     if( Fs_kHz == 16 ) {
    159         silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
    160         silk_resampler_down2( filt_state, frame_8kHz_buf, frame, frame_length );
    161         frame_8kHz = frame_8kHz_buf;
    162     } else if( Fs_kHz == 12 ) {
    163         silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
    164         silk_resampler_down2_3( filt_state, frame_8kHz_buf, frame, frame_length );
    165         frame_8kHz = frame_8kHz_buf;
    166     } else {
    167         silk_assert( Fs_kHz == 8 );
    168         frame_8kHz = frame;
    169     }
    170 
    171     /* Decimate again to 4 kHz */
    172     silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */
    173     ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 );
    174     silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz );
    175 
    176     /* Low-pass filter */
    177     for( i = frame_length_4kHz - 1; i > 0; i-- ) {
    178         frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] );
    179     }
    180 
    181 
    182     /******************************************************************************
    183     * FIRST STAGE, operating in 4 khz
    184     ******************************************************************************/
    185     ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 );
    186     ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 );
    187     silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) );
    188     target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ];
    189     for( k = 0; k < nb_subfr >> 1; k++ ) {
    190         /* Check that we are within range of the array */
    191         silk_assert( target_ptr >= frame_4kHz );
    192         silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
    193 
    194         basis_ptr = target_ptr - MIN_LAG_4KHZ;
    195 
    196         /* Check that we are within range of the array */
    197         silk_assert( basis_ptr >= frame_4kHz );
    198         silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
    199 
    200         celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch );
    201 
    202         /* Calculate first vector products before loop */
    203         cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
    204         normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch );
    205         normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr,  basis_ptr, SF_LENGTH_8KHZ, arch ) );
    206         normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
    207 
    208         matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =
    209             (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );                      /* Q13 */
    210 
    211         /* From now on normalizer is computed recursively */
    212         for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) {
    213             basis_ptr--;
    214 
    215             /* Check that we are within range of the array */
    216             silk_assert( basis_ptr >= frame_4kHz );
    217             silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
    218 
    219             cross_corr = xcorr32[ MAX_LAG_4KHZ - d ];
    220 
    221             /* Add contribution of new sample and remove contribution from oldest sample */
    222             normalizer = silk_ADD32( normalizer,
    223                 silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) -
    224                 silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) );
    225 
    226             matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) =
    227                 (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );                  /* Q13 */
    228         }
    229         /* Update target pointer */
    230         target_ptr += SF_LENGTH_8KHZ;
    231     }
    232 
    233     /* Combine two subframes into single correlation measure and apply short-lag bias */
    234     if( nb_subfr == PE_MAX_NB_SUBFR ) {
    235         for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
    236             sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ )
    237                 + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ );               /* Q14 */
    238             sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */
    239             C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */
    240         }
    241     } else {
    242         /* Only short-lag bias */
    243         for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
    244             sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 );                          /* Q14 */
    245             sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */
    246             C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */
    247         }
    248     }
    249 
    250     /* Sort */
    251     length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 );
    252     silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
    253     silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ,
    254                                           length_d_srch );
    255 
    256     /* Escape if correlation is very low already here */
    257     Cmax = (opus_int)C[ 0 ];                                                    /* Q14 */
    258     if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) {
    259         silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
    260         *LTPCorr_Q15  = 0;
    261         *lagIndex     = 0;
    262         *contourIndex = 0;
    263         RESTORE_STACK;
    264         return 1;
    265     }
    266 
    267     threshold = silk_SMULWB( search_thres1_Q16, Cmax );
    268     for( i = 0; i < length_d_srch; i++ ) {
    269         /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
    270         if( C[ i ] > threshold ) {
    271             d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 );
    272         } else {
    273             length_d_srch = i;
    274             break;
    275         }
    276     }
    277     silk_assert( length_d_srch > 0 );
    278 
    279     ALLOC( d_comp, D_COMP_STRIDE, opus_int16 );
    280     for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) {
    281         d_comp[ i - D_COMP_MIN ] = 0;
    282     }
    283     for( i = 0; i < length_d_srch; i++ ) {
    284         d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1;
    285     }
    286 
    287     /* Convolution */
    288     for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
    289         d_comp[ i - D_COMP_MIN ] +=
    290             d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ];
    291     }
    292 
    293     length_d_srch = 0;
    294     for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) {
    295         if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) {
    296             d_srch[ length_d_srch ] = i;
    297             length_d_srch++;
    298         }
    299     }
    300 
    301     /* Convolution */
    302     for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
    303         d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ]
    304             + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ];
    305     }
    306 
    307     length_d_comp = 0;
    308     for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) {
    309         if( d_comp[ i - D_COMP_MIN ] > 0 ) {
    310             d_comp[ length_d_comp ] = i - 2;
    311             length_d_comp++;
    312         }
    313     }
    314 
    315     /**********************************************************************************
    316     ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
    317     *************************************************************************************/
    318 
    319     /*********************************************************************************
    320     * Find energy of each subframe projected onto its history, for a range of delays
    321     *********************************************************************************/
    322     silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) );
    323 
    324     target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
    325     for( k = 0; k < nb_subfr; k++ ) {
    326 
    327         /* Check that we are within range of the array */
    328         silk_assert( target_ptr >= frame_8kHz );
    329         silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
    330 
    331         energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ), 1 );
    332         for( j = 0; j < length_d_comp; j++ ) {
    333             d = d_comp[ j ];
    334             basis_ptr = target_ptr - d;
    335 
    336             /* Check that we are within range of the array */
    337             silk_assert( basis_ptr >= frame_8kHz );
    338             silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
    339 
    340             cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ, arch );
    341             if( cross_corr > 0 ) {
    342                 energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch );
    343                 matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) =
    344                     (opus_int16)silk_DIV32_varQ( cross_corr,
    345                                                  silk_ADD32( energy_target,
    346                                                              energy_basis ),
    347                                                  13 + 1 );                                      /* Q13 */
    348             } else {
    349                 matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0;
    350             }
    351         }
    352         target_ptr += SF_LENGTH_8KHZ;
    353     }
    354 
    355     /* search over lag range and lags codebook */
    356     /* scale factor for lag codebook, as a function of center lag */
    357 
    358     CCmax   = silk_int32_MIN;
    359     CCmax_b = silk_int32_MIN;
    360 
    361     CBimax = 0; /* To avoid returning undefined lag values */
    362     lag = -1;   /* To check if lag with strong enough correlation has been found */
    363 
    364     if( prevLag > 0 ) {
    365         if( Fs_kHz == 12 ) {
    366             prevLag = silk_DIV32_16( silk_LSHIFT( prevLag, 1 ), 3 );
    367         } else if( Fs_kHz == 16 ) {
    368             prevLag = silk_RSHIFT( prevLag, 1 );
    369         }
    370         prevLag_log2_Q7 = silk_lin2log( (opus_int32)prevLag );
    371     } else {
    372         prevLag_log2_Q7 = 0;
    373     }
    374     silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) );
    375     /* Set up stage 2 codebook based on number of subframes */
    376     if( nb_subfr == PE_MAX_NB_SUBFR ) {
    377         cbk_size   = PE_NB_CBKS_STAGE2_EXT;
    378         Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
    379         if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) {
    380             /* If input is 8 khz use a larger codebook here because it is last stage */
    381             nb_cbk_search = PE_NB_CBKS_STAGE2_EXT;
    382         } else {
    383             nb_cbk_search = PE_NB_CBKS_STAGE2;
    384         }
    385     } else {
    386         cbk_size       = PE_NB_CBKS_STAGE2_10MS;
    387         Lag_CB_ptr     = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
    388         nb_cbk_search  = PE_NB_CBKS_STAGE2_10MS;
    389     }
    390 
    391     for( k = 0; k < length_d_srch; k++ ) {
    392         d = d_srch[ k ];
    393         for( j = 0; j < nb_cbk_search; j++ ) {
    394             CC[ j ] = 0;
    395             for( i = 0; i < nb_subfr; i++ ) {
    396                 opus_int d_subfr;
    397                 /* Try all codebooks */
    398                 d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size );
    399                 CC[ j ] = CC[ j ]
    400                     + (opus_int32)matrix_ptr( C, i,
    401                                               d_subfr - ( MIN_LAG_8KHZ - 2 ),
    402                                               CSTRIDE_8KHZ );
    403             }
    404         }
    405         /* Find best codebook */
    406         CCmax_new = silk_int32_MIN;
    407         CBimax_new = 0;
    408         for( i = 0; i < nb_cbk_search; i++ ) {
    409             if( CC[ i ] > CCmax_new ) {
    410                 CCmax_new = CC[ i ];
    411                 CBimax_new = i;
    412             }
    413         }
    414 
    415         /* Bias towards shorter lags */
    416         lag_log2_Q7 = silk_lin2log( d ); /* Q7 */
    417         silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) );
    418         silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) );
    419         CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */
    420 
    421         /* Bias towards previous lag */
    422         silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) );
    423         if( prevLag > 0 ) {
    424             delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7;
    425             silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) );
    426             delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 );
    427             prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */
    428             prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) );
    429             CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */
    430         }
    431 
    432         if( CCmax_new_b > CCmax_b                                   &&  /* Find maximum biased correlation                  */
    433             CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 )  &&  /* Correlation needs to be high enough to be voiced */
    434             silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ      /* Lag must be in range                             */
    435          ) {
    436             CCmax_b = CCmax_new_b;
    437             CCmax   = CCmax_new;
    438             lag     = d;
    439             CBimax  = CBimax_new;
    440         }
    441     }
    442 
    443     if( lag == -1 ) {
    444         /* No suitable candidate found */
    445         silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
    446         *LTPCorr_Q15  = 0;
    447         *lagIndex     = 0;
    448         *contourIndex = 0;
    449         RESTORE_STACK;
    450         return 1;
    451     }
    452 
    453     /* Output normalized correlation */
    454     *LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 );
    455     silk_assert( *LTPCorr_Q15 >= 0 );
    456 
    457     if( Fs_kHz > 8 ) {
    458         /* Search in original signal */
    459 
    460         CBimax_old = CBimax;
    461         /* Compensate for decimation */
    462         silk_assert( lag == silk_SAT16( lag ) );
    463         if( Fs_kHz == 12 ) {
    464             lag = silk_RSHIFT( silk_SMULBB( lag, 3 ), 1 );
    465         } else if( Fs_kHz == 16 ) {
    466             lag = silk_LSHIFT( lag, 1 );
    467         } else {
    468             lag = silk_SMULBB( lag, 3 );
    469         }
    470 
    471         lag = silk_LIMIT_int( lag, min_lag, max_lag );
    472         start_lag = silk_max_int( lag - 2, min_lag );
    473         end_lag   = silk_min_int( lag + 2, max_lag );
    474         lag_new   = lag;                                    /* to avoid undefined lag */
    475         CBimax    = 0;                                      /* to avoid undefined lag */
    476 
    477         CCmax = silk_int32_MIN;
    478         /* pitch lags according to second stage */
    479         for( k = 0; k < nb_subfr; k++ ) {
    480             pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ];
    481         }
    482 
    483         /* Set up codebook parameters according to complexity setting and frame length */
    484         if( nb_subfr == PE_MAX_NB_SUBFR ) {
    485             nb_cbk_search   = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
    486             cbk_size        = PE_NB_CBKS_STAGE3_MAX;
    487             Lag_CB_ptr      = &silk_CB_lags_stage3[ 0 ][ 0 ];
    488         } else {
    489             nb_cbk_search   = PE_NB_CBKS_STAGE3_10MS;
    490             cbk_size        = PE_NB_CBKS_STAGE3_10MS;
    491             Lag_CB_ptr      = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
    492         }
    493 
    494         /* Calculate the correlations and energies needed in stage 3 */
    495         ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
    496         ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
    497         silk_P_Ana_calc_corr_st3(  cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
    498         silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
    499 
    500         lag_counter = 0;
    501         silk_assert( lag == silk_SAT16( lag ) );
    502         contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
    503 
    504         target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
    505         energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length, arch ), 1 );
    506         for( d = start_lag; d <= end_lag; d++ ) {
    507             for( j = 0; j < nb_cbk_search; j++ ) {
    508                 cross_corr = 0;
    509                 energy     = energy_target;
    510                 for( k = 0; k < nb_subfr; k++ ) {
    511                     cross_corr = silk_ADD32( cross_corr,
    512                         matrix_ptr( cross_corr_st3, k, j,
    513                                     nb_cbk_search )[ lag_counter ] );
    514                     energy     = silk_ADD32( energy,
    515                         matrix_ptr( energies_st3, k, j,
    516                                     nb_cbk_search )[ lag_counter ] );
    517                     silk_assert( energy >= 0 );
    518                 }
    519                 if( cross_corr > 0 ) {
    520                     CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 );          /* Q13 */
    521                     /* Reduce depending on flatness of contour */
    522                     diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j );            /* Q15 */
    523                     silk_assert( diff == silk_SAT16( diff ) );
    524                     CCmax_new = silk_SMULWB( CCmax_new, diff );                         /* Q14 */
    525                 } else {
    526                     CCmax_new = 0;
    527                 }
    528 
    529                 if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
    530                     CCmax   = CCmax_new;
    531                     lag_new = d;
    532                     CBimax  = j;
    533                 }
    534             }
    535             lag_counter++;
    536         }
    537 
    538         for( k = 0; k < nb_subfr; k++ ) {
    539             pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
    540             pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz );
    541         }
    542         *lagIndex = (opus_int16)( lag_new - min_lag);
    543         *contourIndex = (opus_int8)CBimax;
    544     } else {        /* Fs_kHz == 8 */
    545         /* Save Lags */
    546         for( k = 0; k < nb_subfr; k++ ) {
    547             pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
    548             pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 );
    549         }
    550         *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ );
    551         *contourIndex = (opus_int8)CBimax;
    552     }
    553     silk_assert( *lagIndex >= 0 );
    554     /* return as voiced */
    555     RESTORE_STACK;
    556     return 0;
    557 }
    558 
    559 /***********************************************************************
    560  * Calculates the correlations used in stage 3 search. In order to cover
    561  * the whole lag codebook for all the searched offset lags (lag +- 2),
    562  * the following correlations are needed in each sub frame:
    563  *
    564  * sf1: lag range [-8,...,7] total 16 correlations
    565  * sf2: lag range [-4,...,4] total 9 correlations
    566  * sf3: lag range [-3,....4] total 8 correltions
    567  * sf4: lag range [-6,....8] total 15 correlations
    568  *
    569  * In total 48 correlations. The direct implementation computed in worst
    570  * case 4*12*5 = 240 correlations, but more likely around 120.
    571  ***********************************************************************/
    572 static void silk_P_Ana_calc_corr_st3(
    573     silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */
    574     const opus_int16  frame[],                         /* I vector to correlate         */
    575     opus_int          start_lag,                       /* I lag offset to search around */
    576     opus_int          sf_length,                       /* I length of a 5 ms subframe   */
    577     opus_int          nb_subfr,                        /* I number of subframes         */
    578     opus_int          complexity,                      /* I Complexity setting          */
    579     int               arch                             /* I Run-time architecture       */
    580 )
    581 {
    582     const opus_int16 *target_ptr;
    583     opus_int   i, j, k, lag_counter, lag_low, lag_high;
    584     opus_int   nb_cbk_search, delta, idx, cbk_size;
    585     VARDECL( opus_int32, scratch_mem );
    586     VARDECL( opus_int32, xcorr32 );
    587     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
    588     SAVE_STACK;
    589 
    590     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
    591     silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
    592 
    593     if( nb_subfr == PE_MAX_NB_SUBFR ) {
    594         Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
    595         Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
    596         nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
    597         cbk_size      = PE_NB_CBKS_STAGE3_MAX;
    598     } else {
    599         silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
    600         Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
    601         Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
    602         nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
    603         cbk_size      = PE_NB_CBKS_STAGE3_10MS;
    604     }
    605     ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
    606     ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 );
    607 
    608     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
    609     for( k = 0; k < nb_subfr; k++ ) {
    610         lag_counter = 0;
    611 
    612         /* Calculate the correlations for each subframe */
    613         lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
    614         lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
    615         silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
    616         celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch );
    617         for( j = lag_low; j <= lag_high; j++ ) {
    618             silk_assert( lag_counter < SCRATCH_SIZE );
    619             scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];
    620             lag_counter++;
    621         }
    622 
    623         delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
    624         for( i = 0; i < nb_cbk_search; i++ ) {
    625             /* Fill out the 3 dim array that stores the correlations for */
    626             /* each code_book vector for each start lag */
    627             idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
    628             for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
    629                 silk_assert( idx + j < SCRATCH_SIZE );
    630                 silk_assert( idx + j < lag_counter );
    631                 matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] =
    632                     scratch_mem[ idx + j ];
    633             }
    634         }
    635         target_ptr += sf_length;
    636     }
    637     RESTORE_STACK;
    638 }
    639 
    640 /********************************************************************/
    641 /* Calculate the energies for first two subframes. The energies are */
    642 /* calculated recursively.                                          */
    643 /********************************************************************/
    644 static void silk_P_Ana_calc_energy_st3(
    645     silk_pe_stage3_vals energies_st3[],                 /* O 3 DIM energy array */
    646     const opus_int16  frame[],                          /* I vector to calc energy in    */
    647     opus_int          start_lag,                        /* I lag offset to search around */
    648     opus_int          sf_length,                        /* I length of one 5 ms subframe */
    649     opus_int          nb_subfr,                         /* I number of subframes         */
    650     opus_int          complexity,                       /* I Complexity setting          */
    651     int               arch                              /* I Run-time architecture       */
    652 )
    653 {
    654     const opus_int16 *target_ptr, *basis_ptr;
    655     opus_int32 energy;
    656     opus_int   k, i, j, lag_counter;
    657     opus_int   nb_cbk_search, delta, idx, cbk_size, lag_diff;
    658     VARDECL( opus_int32, scratch_mem );
    659     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
    660     SAVE_STACK;
    661 
    662     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
    663     silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
    664 
    665     if( nb_subfr == PE_MAX_NB_SUBFR ) {
    666         Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
    667         Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
    668         nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
    669         cbk_size      = PE_NB_CBKS_STAGE3_MAX;
    670     } else {
    671         silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
    672         Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
    673         Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
    674         nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
    675         cbk_size      = PE_NB_CBKS_STAGE3_10MS;
    676     }
    677     ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
    678 
    679     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
    680     for( k = 0; k < nb_subfr; k++ ) {
    681         lag_counter = 0;
    682 
    683         /* Calculate the energy for first lag */
    684         basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
    685         energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length, arch );
    686         silk_assert( energy >= 0 );
    687         scratch_mem[ lag_counter ] = energy;
    688         lag_counter++;
    689 
    690         lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) -  matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 );
    691         for( i = 1; i < lag_diff; i++ ) {
    692             /* remove part outside new window */
    693             energy -= silk_SMULBB( basis_ptr[ sf_length - i ], basis_ptr[ sf_length - i ] );
    694             silk_assert( energy >= 0 );
    695 
    696             /* add part that comes into window */
    697             energy = silk_ADD_SAT32( energy, silk_SMULBB( basis_ptr[ -i ], basis_ptr[ -i ] ) );
    698             silk_assert( energy >= 0 );
    699             silk_assert( lag_counter < SCRATCH_SIZE );
    700             scratch_mem[ lag_counter ] = energy;
    701             lag_counter++;
    702         }
    703 
    704         delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
    705         for( i = 0; i < nb_cbk_search; i++ ) {
    706             /* Fill out the 3 dim array that stores the correlations for    */
    707             /* each code_book vector for each start lag                     */
    708             idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
    709             for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
    710                 silk_assert( idx + j < SCRATCH_SIZE );
    711                 silk_assert( idx + j < lag_counter );
    712                 matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] =
    713                     scratch_mem[ idx + j ];
    714                 silk_assert(
    715                     matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 );
    716             }
    717         }
    718         target_ptr += sf_length;
    719     }
    720     RESTORE_STACK;
    721 }
    722