Home | History | Annotate | Download | only in fixed
      1 /***********************************************************************
      2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
      3 Redistribution and use in source and binary forms, with or without
      4 modification, are permitted provided that the following conditions
      5 are met:
      6 - Redistributions of source code must retain the above copyright notice,
      7 this list of conditions and the following disclaimer.
      8 - Redistributions in binary form must reproduce the above copyright
      9 notice, this list of conditions and the following disclaimer in the
     10 documentation and/or other materials provided with the distribution.
     11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
     12 names of specific contributors, may be used to endorse or promote
     13 products derived from this software without specific prior written
     14 permission.
     15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     25 POSSIBILITY OF SUCH DAMAGE.
     26 ***********************************************************************/
     27 
     28 #ifdef HAVE_CONFIG_H
     29 #include "config.h"
     30 #endif
     31 
     32 /***********************************************************
     33 * Pitch analyser function
     34 ********************************************************** */
     35 #include "SigProc_FIX.h"
     36 #include "pitch_est_defines.h"
     37 #include "stack_alloc.h"
     38 #include "debug.h"
     39 #include "pitch.h"
     40 
     41 #define SCRATCH_SIZE    22
     42 #define SF_LENGTH_4KHZ  ( PE_SUBFR_LENGTH_MS * 4 )
     43 #define SF_LENGTH_8KHZ  ( PE_SUBFR_LENGTH_MS * 8 )
     44 #define MIN_LAG_4KHZ    ( PE_MIN_LAG_MS * 4 )
     45 #define MIN_LAG_8KHZ    ( PE_MIN_LAG_MS * 8 )
     46 #define MAX_LAG_4KHZ    ( PE_MAX_LAG_MS * 4 )
     47 #define MAX_LAG_8KHZ    ( PE_MAX_LAG_MS * 8 - 1 )
     48 #define CSTRIDE_4KHZ    ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ )
     49 #define CSTRIDE_8KHZ    ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) )
     50 #define D_COMP_MIN      ( MIN_LAG_8KHZ - 3 )
     51 #define D_COMP_MAX      ( MAX_LAG_8KHZ + 4 )
     52 #define D_COMP_STRIDE   ( D_COMP_MAX - D_COMP_MIN )
     53 
     54 typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ];
     55 
     56 /************************************************************/
     57 /* Internally used functions                                */
     58 /************************************************************/
     59 static void silk_P_Ana_calc_corr_st3(
     60     silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */
     61     const opus_int16  frame[],                         /* I vector to correlate         */
     62     opus_int          start_lag,                       /* I lag offset to search around */
     63     opus_int          sf_length,                       /* I length of a 5 ms subframe   */
     64     opus_int          nb_subfr,                        /* I number of subframes         */
     65     opus_int          complexity,                      /* I Complexity setting          */
     66     int               arch                             /* I Run-time architecture       */
     67 );
     68 
     69 static void silk_P_Ana_calc_energy_st3(
     70     silk_pe_stage3_vals energies_st3[],                /* O 3 DIM energy array */
     71     const opus_int16  frame[],                         /* I vector to calc energy in    */
     72     opus_int          start_lag,                       /* I lag offset to search around */
     73     opus_int          sf_length,                       /* I length of one 5 ms subframe */
     74     opus_int          nb_subfr,                        /* I number of subframes         */
     75     opus_int          complexity                       /* I Complexity setting          */
     76 );
     77 
     78 /*************************************************************/
     79 /*      FIXED POINT CORE PITCH ANALYSIS FUNCTION             */
     80 /*************************************************************/
     81 opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
     82     const opus_int16            *frame,             /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
     83     opus_int                    *pitch_out,         /* O    4 pitch lag values                                          */
     84     opus_int16                  *lagIndex,          /* O    Lag Index                                                   */
     85     opus_int8                   *contourIndex,      /* O    Pitch contour Index                                         */
     86     opus_int                    *LTPCorr_Q15,       /* I/O  Normalized correlation; input: value from previous frame    */
     87     opus_int                    prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
     88     const opus_int32            search_thres1_Q16,  /* I    First stage threshold for lag candidates 0 - 1              */
     89     const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */
     90     const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */
     91     const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
     92     const opus_int              nb_subfr,           /* I    number of 5 ms subframes                                    */
     93     int                         arch                /* I    Run-time architecture                                       */
     94 )
     95 {
     96     VARDECL( opus_int16, frame_8kHz );
     97     VARDECL( opus_int16, frame_4kHz );
     98     opus_int32 filt_state[ 6 ];
     99     const opus_int16 *input_frame_ptr;
    100     opus_int   i, k, d, j;
    101     VARDECL( opus_int16, C );
    102     VARDECL( opus_int32, xcorr32 );
    103     const opus_int16 *target_ptr, *basis_ptr;
    104     opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target;
    105     opus_int   d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp;
    106     VARDECL( opus_int16, d_comp );
    107     opus_int32 sum, threshold, lag_counter;
    108     opus_int   CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;
    109     opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new;
    110     VARDECL( silk_pe_stage3_vals, energies_st3 );
    111     VARDECL( silk_pe_stage3_vals, cross_corr_st3 );
    112     opus_int   frame_length, frame_length_8kHz, frame_length_4kHz;
    113     opus_int   sf_length;
    114     opus_int   min_lag;
    115     opus_int   max_lag;
    116     opus_int32 contour_bias_Q15, diff;
    117     opus_int   nb_cbk_search, cbk_size;
    118     opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;
    119     const opus_int8 *Lag_CB_ptr;
    120     SAVE_STACK;
    121     /* Check for valid sampling frequency */
    122     silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
    123 
    124     /* Check for valid complexity setting */
    125     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
    126     silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
    127 
    128     silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) );
    129     silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) );
    130 
    131     /* Set up frame lengths max / min lag for the sampling frequency */
    132     frame_length      = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
    133     frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
    134     frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
    135     sf_length         = PE_SUBFR_LENGTH_MS * Fs_kHz;
    136     min_lag           = PE_MIN_LAG_MS * Fs_kHz;
    137     max_lag           = PE_MAX_LAG_MS * Fs_kHz - 1;
    138 
    139     /* Resample from input sampled at Fs_kHz to 8 kHz */
    140     ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 );
    141     if( Fs_kHz == 16 ) {
    142         silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
    143         silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length );
    144     } else if( Fs_kHz == 12 ) {
    145         silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
    146         silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length );
    147     } else {
    148         silk_assert( Fs_kHz == 8 );
    149         silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) );
    150     }
    151 
    152     /* Decimate again to 4 kHz */
    153     silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */
    154     ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 );
    155     silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz );
    156 
    157     /* Low-pass filter */
    158     for( i = frame_length_4kHz - 1; i > 0; i-- ) {
    159         frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] );
    160     }
    161 
    162     /*******************************************************************************
    163     ** Scale 4 kHz signal down to prevent correlations measures from overflowing
    164     ** find scaling as max scaling for each 8kHz(?) subframe
    165     *******************************************************************************/
    166 
    167     /* Inner product is calculated with different lengths, so scale for the worst case */
    168     silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz );
    169     if( shift > 0 ) {
    170         shift = silk_RSHIFT( shift, 1 );
    171         for( i = 0; i < frame_length_4kHz; i++ ) {
    172             frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift );
    173         }
    174     }
    175 
    176     /******************************************************************************
    177     * FIRST STAGE, operating in 4 khz
    178     ******************************************************************************/
    179     ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 );
    180     ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 );
    181     silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) );
    182     target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ];
    183     for( k = 0; k < nb_subfr >> 1; k++ ) {
    184         /* Check that we are within range of the array */
    185         silk_assert( target_ptr >= frame_4kHz );
    186         silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
    187 
    188         basis_ptr = target_ptr - MIN_LAG_4KHZ;
    189 
    190         /* Check that we are within range of the array */
    191         silk_assert( basis_ptr >= frame_4kHz );
    192         silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
    193 
    194         celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch );
    195 
    196         /* Calculate first vector products before loop */
    197         cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
    198         normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ );
    199         normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr,  basis_ptr, SF_LENGTH_8KHZ ) );
    200         normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
    201 
    202         matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =
    203             (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );                      /* Q13 */
    204 
    205         /* From now on normalizer is computed recursively */
    206         for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) {
    207             basis_ptr--;
    208 
    209             /* Check that we are within range of the array */
    210             silk_assert( basis_ptr >= frame_4kHz );
    211             silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
    212 
    213             cross_corr = xcorr32[ MAX_LAG_4KHZ - d ];
    214 
    215             /* Add contribution of new sample and remove contribution from oldest sample */
    216             normalizer = silk_ADD32( normalizer,
    217                 silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) -
    218                 silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) );
    219 
    220             matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) =
    221                 (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );                  /* Q13 */
    222         }
    223         /* Update target pointer */
    224         target_ptr += SF_LENGTH_8KHZ;
    225     }
    226 
    227     /* Combine two subframes into single correlation measure and apply short-lag bias */
    228     if( nb_subfr == PE_MAX_NB_SUBFR ) {
    229         for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
    230             sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ )
    231                 + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ );               /* Q14 */
    232             sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */
    233             C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */
    234         }
    235     } else {
    236         /* Only short-lag bias */
    237         for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
    238             sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 );                          /* Q14 */
    239             sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */
    240             C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */
    241         }
    242     }
    243 
    244     /* Sort */
    245     length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 );
    246     silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
    247     silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ,
    248                                           length_d_srch );
    249 
    250     /* Escape if correlation is very low already here */
    251     Cmax = (opus_int)C[ 0 ];                                                    /* Q14 */
    252     if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) {
    253         silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
    254         *LTPCorr_Q15  = 0;
    255         *lagIndex     = 0;
    256         *contourIndex = 0;
    257         RESTORE_STACK;
    258         return 1;
    259     }
    260 
    261     threshold = silk_SMULWB( search_thres1_Q16, Cmax );
    262     for( i = 0; i < length_d_srch; i++ ) {
    263         /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
    264         if( C[ i ] > threshold ) {
    265             d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 );
    266         } else {
    267             length_d_srch = i;
    268             break;
    269         }
    270     }
    271     silk_assert( length_d_srch > 0 );
    272 
    273     ALLOC( d_comp, D_COMP_STRIDE, opus_int16 );
    274     for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) {
    275         d_comp[ i - D_COMP_MIN ] = 0;
    276     }
    277     for( i = 0; i < length_d_srch; i++ ) {
    278         d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1;
    279     }
    280 
    281     /* Convolution */
    282     for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
    283         d_comp[ i - D_COMP_MIN ] +=
    284             d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ];
    285     }
    286 
    287     length_d_srch = 0;
    288     for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) {
    289         if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) {
    290             d_srch[ length_d_srch ] = i;
    291             length_d_srch++;
    292         }
    293     }
    294 
    295     /* Convolution */
    296     for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
    297         d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ]
    298             + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ];
    299     }
    300 
    301     length_d_comp = 0;
    302     for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) {
    303         if( d_comp[ i - D_COMP_MIN ] > 0 ) {
    304             d_comp[ length_d_comp ] = i - 2;
    305             length_d_comp++;
    306         }
    307     }
    308 
    309     /**********************************************************************************
    310     ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
    311     *************************************************************************************/
    312 
    313     /******************************************************************************
    314     ** Scale signal down to avoid correlations measures from overflowing
    315     *******************************************************************************/
    316     /* find scaling as max scaling for each subframe */
    317     silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz );
    318     if( shift > 0 ) {
    319         shift = silk_RSHIFT( shift, 1 );
    320         for( i = 0; i < frame_length_8kHz; i++ ) {
    321             frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift );
    322         }
    323     }
    324 
    325     /*********************************************************************************
    326     * Find energy of each subframe projected onto its history, for a range of delays
    327     *********************************************************************************/
    328     silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) );
    329 
    330     target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
    331     for( k = 0; k < nb_subfr; k++ ) {
    332 
    333         /* Check that we are within range of the array */
    334         silk_assert( target_ptr >= frame_8kHz );
    335         silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
    336 
    337         energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ), 1 );
    338         for( j = 0; j < length_d_comp; j++ ) {
    339             d = d_comp[ j ];
    340             basis_ptr = target_ptr - d;
    341 
    342             /* Check that we are within range of the array */
    343             silk_assert( basis_ptr >= frame_8kHz );
    344             silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
    345 
    346             cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );
    347             if( cross_corr > 0 ) {
    348                 energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ );
    349                 matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) =
    350                     (opus_int16)silk_DIV32_varQ( cross_corr,
    351                                                  silk_ADD32( energy_target,
    352                                                              energy_basis ),
    353                                                  13 + 1 );                                      /* Q13 */
    354             } else {
    355                 matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0;
    356             }
    357         }
    358         target_ptr += SF_LENGTH_8KHZ;
    359     }
    360 
    361     /* search over lag range and lags codebook */
    362     /* scale factor for lag codebook, as a function of center lag */
    363 
    364     CCmax   = silk_int32_MIN;
    365     CCmax_b = silk_int32_MIN;
    366 
    367     CBimax = 0; /* To avoid returning undefined lag values */
    368     lag = -1;   /* To check if lag with strong enough correlation has been found */
    369 
    370     if( prevLag > 0 ) {
    371         if( Fs_kHz == 12 ) {
    372             prevLag = silk_DIV32_16( silk_LSHIFT( prevLag, 1 ), 3 );
    373         } else if( Fs_kHz == 16 ) {
    374             prevLag = silk_RSHIFT( prevLag, 1 );
    375         }
    376         prevLag_log2_Q7 = silk_lin2log( (opus_int32)prevLag );
    377     } else {
    378         prevLag_log2_Q7 = 0;
    379     }
    380     silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) );
    381     /* Set up stage 2 codebook based on number of subframes */
    382     if( nb_subfr == PE_MAX_NB_SUBFR ) {
    383         cbk_size   = PE_NB_CBKS_STAGE2_EXT;
    384         Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
    385         if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) {
    386             /* If input is 8 khz use a larger codebook here because it is last stage */
    387             nb_cbk_search = PE_NB_CBKS_STAGE2_EXT;
    388         } else {
    389             nb_cbk_search = PE_NB_CBKS_STAGE2;
    390         }
    391     } else {
    392         cbk_size       = PE_NB_CBKS_STAGE2_10MS;
    393         Lag_CB_ptr     = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
    394         nb_cbk_search  = PE_NB_CBKS_STAGE2_10MS;
    395     }
    396 
    397     for( k = 0; k < length_d_srch; k++ ) {
    398         d = d_srch[ k ];
    399         for( j = 0; j < nb_cbk_search; j++ ) {
    400             CC[ j ] = 0;
    401             for( i = 0; i < nb_subfr; i++ ) {
    402                 opus_int d_subfr;
    403                 /* Try all codebooks */
    404                 d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size );
    405                 CC[ j ] = CC[ j ]
    406                     + (opus_int32)matrix_ptr( C, i,
    407                                               d_subfr - ( MIN_LAG_8KHZ - 2 ),
    408                                               CSTRIDE_8KHZ );
    409             }
    410         }
    411         /* Find best codebook */
    412         CCmax_new = silk_int32_MIN;
    413         CBimax_new = 0;
    414         for( i = 0; i < nb_cbk_search; i++ ) {
    415             if( CC[ i ] > CCmax_new ) {
    416                 CCmax_new = CC[ i ];
    417                 CBimax_new = i;
    418             }
    419         }
    420 
    421         /* Bias towards shorter lags */
    422         lag_log2_Q7 = silk_lin2log( d ); /* Q7 */
    423         silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) );
    424         silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) );
    425         CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */
    426 
    427         /* Bias towards previous lag */
    428         silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) );
    429         if( prevLag > 0 ) {
    430             delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7;
    431             silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) );
    432             delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 );
    433             prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */
    434             prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) );
    435             CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */
    436         }
    437 
    438         if( CCmax_new_b > CCmax_b                                   &&  /* Find maximum biased correlation                  */
    439             CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 )  &&  /* Correlation needs to be high enough to be voiced */
    440             silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ      /* Lag must be in range                             */
    441          ) {
    442             CCmax_b = CCmax_new_b;
    443             CCmax   = CCmax_new;
    444             lag     = d;
    445             CBimax  = CBimax_new;
    446         }
    447     }
    448 
    449     if( lag == -1 ) {
    450         /* No suitable candidate found */
    451         silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
    452         *LTPCorr_Q15  = 0;
    453         *lagIndex     = 0;
    454         *contourIndex = 0;
    455         RESTORE_STACK;
    456         return 1;
    457     }
    458 
    459     /* Output normalized correlation */
    460     *LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 );
    461     silk_assert( *LTPCorr_Q15 >= 0 );
    462 
    463     if( Fs_kHz > 8 ) {
    464         VARDECL( opus_int16, scratch_mem );
    465         /***************************************************************************/
    466         /* Scale input signal down to avoid correlations measures from overflowing */
    467         /***************************************************************************/
    468         /* find scaling as max scaling for each subframe */
    469         silk_sum_sqr_shift( &energy, &shift, frame, frame_length );
    470         ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 );
    471         if( shift > 0 ) {
    472             /* Move signal to scratch mem because the input signal should be unchanged */
    473             shift = silk_RSHIFT( shift, 1 );
    474             for( i = 0; i < frame_length; i++ ) {
    475                 scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift );
    476             }
    477             input_frame_ptr = scratch_mem;
    478         } else {
    479             input_frame_ptr = frame;
    480         }
    481 
    482         /* Search in original signal */
    483 
    484         CBimax_old = CBimax;
    485         /* Compensate for decimation */
    486         silk_assert( lag == silk_SAT16( lag ) );
    487         if( Fs_kHz == 12 ) {
    488             lag = silk_RSHIFT( silk_SMULBB( lag, 3 ), 1 );
    489         } else if( Fs_kHz == 16 ) {
    490             lag = silk_LSHIFT( lag, 1 );
    491         } else {
    492             lag = silk_SMULBB( lag, 3 );
    493         }
    494 
    495         lag = silk_LIMIT_int( lag, min_lag, max_lag );
    496         start_lag = silk_max_int( lag - 2, min_lag );
    497         end_lag   = silk_min_int( lag + 2, max_lag );
    498         lag_new   = lag;                                    /* to avoid undefined lag */
    499         CBimax    = 0;                                      /* to avoid undefined lag */
    500 
    501         CCmax = silk_int32_MIN;
    502         /* pitch lags according to second stage */
    503         for( k = 0; k < nb_subfr; k++ ) {
    504             pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ];
    505         }
    506 
    507         /* Set up codebook parameters according to complexity setting and frame length */
    508         if( nb_subfr == PE_MAX_NB_SUBFR ) {
    509             nb_cbk_search   = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
    510             cbk_size        = PE_NB_CBKS_STAGE3_MAX;
    511             Lag_CB_ptr      = &silk_CB_lags_stage3[ 0 ][ 0 ];
    512         } else {
    513             nb_cbk_search   = PE_NB_CBKS_STAGE3_10MS;
    514             cbk_size        = PE_NB_CBKS_STAGE3_10MS;
    515             Lag_CB_ptr      = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
    516         }
    517 
    518         /* Calculate the correlations and energies needed in stage 3 */
    519         ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
    520         ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
    521         silk_P_Ana_calc_corr_st3(  cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
    522         silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
    523 
    524         lag_counter = 0;
    525         silk_assert( lag == silk_SAT16( lag ) );
    526         contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
    527 
    528         target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
    529         energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 );
    530         for( d = start_lag; d <= end_lag; d++ ) {
    531             for( j = 0; j < nb_cbk_search; j++ ) {
    532                 cross_corr = 0;
    533                 energy     = energy_target;
    534                 for( k = 0; k < nb_subfr; k++ ) {
    535                     cross_corr = silk_ADD32( cross_corr,
    536                         matrix_ptr( cross_corr_st3, k, j,
    537                                     nb_cbk_search )[ lag_counter ] );
    538                     energy     = silk_ADD32( energy,
    539                         matrix_ptr( energies_st3, k, j,
    540                                     nb_cbk_search )[ lag_counter ] );
    541                     silk_assert( energy >= 0 );
    542                 }
    543                 if( cross_corr > 0 ) {
    544                     CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 );          /* Q13 */
    545                     /* Reduce depending on flatness of contour */
    546                     diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j );            /* Q15 */
    547                     silk_assert( diff == silk_SAT16( diff ) );
    548                     CCmax_new = silk_SMULWB( CCmax_new, diff );                         /* Q14 */
    549                 } else {
    550                     CCmax_new = 0;
    551                 }
    552 
    553                 if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
    554                     CCmax   = CCmax_new;
    555                     lag_new = d;
    556                     CBimax  = j;
    557                 }
    558             }
    559             lag_counter++;
    560         }
    561 
    562         for( k = 0; k < nb_subfr; k++ ) {
    563             pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
    564             pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz );
    565         }
    566         *lagIndex = (opus_int16)( lag_new - min_lag);
    567         *contourIndex = (opus_int8)CBimax;
    568     } else {        /* Fs_kHz == 8 */
    569         /* Save Lags */
    570         for( k = 0; k < nb_subfr; k++ ) {
    571             pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
    572             pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 );
    573         }
    574         *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ );
    575         *contourIndex = (opus_int8)CBimax;
    576     }
    577     silk_assert( *lagIndex >= 0 );
    578     /* return as voiced */
    579     RESTORE_STACK;
    580     return 0;
    581 }
    582 
    583 /***********************************************************************
    584  * Calculates the correlations used in stage 3 search. In order to cover
    585  * the whole lag codebook for all the searched offset lags (lag +- 2),
    586  * the following correlations are needed in each sub frame:
    587  *
    588  * sf1: lag range [-8,...,7] total 16 correlations
    589  * sf2: lag range [-4,...,4] total 9 correlations
    590  * sf3: lag range [-3,....4] total 8 correltions
    591  * sf4: lag range [-6,....8] total 15 correlations
    592  *
    593  * In total 48 correlations. The direct implementation computed in worst
    594  * case 4*12*5 = 240 correlations, but more likely around 120.
    595  ***********************************************************************/
    596 static void silk_P_Ana_calc_corr_st3(
    597     silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */
    598     const opus_int16  frame[],                         /* I vector to correlate         */
    599     opus_int          start_lag,                       /* I lag offset to search around */
    600     opus_int          sf_length,                       /* I length of a 5 ms subframe   */
    601     opus_int          nb_subfr,                        /* I number of subframes         */
    602     opus_int          complexity,                      /* I Complexity setting          */
    603     int               arch                             /* I Run-time architecture       */
    604 )
    605 {
    606     const opus_int16 *target_ptr;
    607     opus_int   i, j, k, lag_counter, lag_low, lag_high;
    608     opus_int   nb_cbk_search, delta, idx, cbk_size;
    609     VARDECL( opus_int32, scratch_mem );
    610     VARDECL( opus_int32, xcorr32 );
    611     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
    612     SAVE_STACK;
    613 
    614     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
    615     silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
    616 
    617     if( nb_subfr == PE_MAX_NB_SUBFR ) {
    618         Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
    619         Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
    620         nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
    621         cbk_size      = PE_NB_CBKS_STAGE3_MAX;
    622     } else {
    623         silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
    624         Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
    625         Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
    626         nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
    627         cbk_size      = PE_NB_CBKS_STAGE3_10MS;
    628     }
    629     ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
    630     ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 );
    631 
    632     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
    633     for( k = 0; k < nb_subfr; k++ ) {
    634         lag_counter = 0;
    635 
    636         /* Calculate the correlations for each subframe */
    637         lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
    638         lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
    639         silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
    640         celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch );
    641         for( j = lag_low; j <= lag_high; j++ ) {
    642             silk_assert( lag_counter < SCRATCH_SIZE );
    643             scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];
    644             lag_counter++;
    645         }
    646 
    647         delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
    648         for( i = 0; i < nb_cbk_search; i++ ) {
    649             /* Fill out the 3 dim array that stores the correlations for */
    650             /* each code_book vector for each start lag */
    651             idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
    652             for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
    653                 silk_assert( idx + j < SCRATCH_SIZE );
    654                 silk_assert( idx + j < lag_counter );
    655                 matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] =
    656                     scratch_mem[ idx + j ];
    657             }
    658         }
    659         target_ptr += sf_length;
    660     }
    661     RESTORE_STACK;
    662 }
    663 
    664 /********************************************************************/
    665 /* Calculate the energies for first two subframes. The energies are */
    666 /* calculated recursively.                                          */
    667 /********************************************************************/
    668 static void silk_P_Ana_calc_energy_st3(
    669     silk_pe_stage3_vals energies_st3[],                 /* O 3 DIM energy array */
    670     const opus_int16  frame[],                          /* I vector to calc energy in    */
    671     opus_int          start_lag,                        /* I lag offset to search around */
    672     opus_int          sf_length,                        /* I length of one 5 ms subframe */
    673     opus_int          nb_subfr,                         /* I number of subframes         */
    674     opus_int          complexity                        /* I Complexity setting          */
    675 )
    676 {
    677     const opus_int16 *target_ptr, *basis_ptr;
    678     opus_int32 energy;
    679     opus_int   k, i, j, lag_counter;
    680     opus_int   nb_cbk_search, delta, idx, cbk_size, lag_diff;
    681     VARDECL( opus_int32, scratch_mem );
    682     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
    683     SAVE_STACK;
    684 
    685     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
    686     silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
    687 
    688     if( nb_subfr == PE_MAX_NB_SUBFR ) {
    689         Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
    690         Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
    691         nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
    692         cbk_size      = PE_NB_CBKS_STAGE3_MAX;
    693     } else {
    694         silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
    695         Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
    696         Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
    697         nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
    698         cbk_size      = PE_NB_CBKS_STAGE3_10MS;
    699     }
    700     ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
    701 
    702     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
    703     for( k = 0; k < nb_subfr; k++ ) {
    704         lag_counter = 0;
    705 
    706         /* Calculate the energy for first lag */
    707         basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
    708         energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length );
    709         silk_assert( energy >= 0 );
    710         scratch_mem[ lag_counter ] = energy;
    711         lag_counter++;
    712 
    713         lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) -  matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 );
    714         for( i = 1; i < lag_diff; i++ ) {
    715             /* remove part outside new window */
    716             energy -= silk_SMULBB( basis_ptr[ sf_length - i ], basis_ptr[ sf_length - i ] );
    717             silk_assert( energy >= 0 );
    718 
    719             /* add part that comes into window */
    720             energy = silk_ADD_SAT32( energy, silk_SMULBB( basis_ptr[ -i ], basis_ptr[ -i ] ) );
    721             silk_assert( energy >= 0 );
    722             silk_assert( lag_counter < SCRATCH_SIZE );
    723             scratch_mem[ lag_counter ] = energy;
    724             lag_counter++;
    725         }
    726 
    727         delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
    728         for( i = 0; i < nb_cbk_search; i++ ) {
    729             /* Fill out the 3 dim array that stores the correlations for    */
    730             /* each code_book vector for each start lag                     */
    731             idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
    732             for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
    733                 silk_assert( idx + j < SCRATCH_SIZE );
    734                 silk_assert( idx + j < lag_counter );
    735                 matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] =
    736                     scratch_mem[ idx + j ];
    737                 silk_assert(
    738                     matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 );
    739             }
    740         }
    741         target_ptr += sf_length;
    742     }
    743     RESTORE_STACK;
    744 }
    745