Home | History | Annotate | Download | only in fixed
      1 /***********************************************************************
      2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
      3 Redistribution and use in source and binary forms, with or without
      4 modification, are permitted provided that the following conditions
      5 are met:
      6 - Redistributions of source code must retain the above copyright notice,
      7 this list of conditions and the following disclaimer.
      8 - Redistributions in binary form must reproduce the above copyright
      9 notice, this list of conditions and the following disclaimer in the
     10 documentation and/or other materials provided with the distribution.
     11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
     12 names of specific contributors, may be used to endorse or promote
     13 products derived from this software without specific prior written
     14 permission.
     15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     25 POSSIBILITY OF SUCH DAMAGE.
     26 ***********************************************************************/
     27 
     28 #ifdef HAVE_CONFIG_H
     29 #include "config.h"
     30 #endif
     31 
     32 /***********************************************************
     33 * Pitch analyser function
     34 ********************************************************** */
     35 #include "SigProc_FIX.h"
     36 #include "pitch_est_defines.h"
     37 #include "stack_alloc.h"
     38 #include "debug.h"
     39 #include "pitch.h"
     40 
     41 #define SCRATCH_SIZE    22
     42 #define SF_LENGTH_4KHZ  ( PE_SUBFR_LENGTH_MS * 4 )
     43 #define SF_LENGTH_8KHZ  ( PE_SUBFR_LENGTH_MS * 8 )
     44 #define MIN_LAG_4KHZ    ( PE_MIN_LAG_MS * 4 )
     45 #define MIN_LAG_8KHZ    ( PE_MIN_LAG_MS * 8 )
     46 #define MAX_LAG_4KHZ    ( PE_MAX_LAG_MS * 4 )
     47 #define MAX_LAG_8KHZ    ( PE_MAX_LAG_MS * 8 - 1 )
     48 #define CSTRIDE_4KHZ    ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ )
     49 #define CSTRIDE_8KHZ    ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) )
     50 #define D_COMP_MIN      ( MIN_LAG_8KHZ - 3 )
     51 #define D_COMP_MAX      ( MAX_LAG_8KHZ + 4 )
     52 #define D_COMP_STRIDE   ( D_COMP_MAX - D_COMP_MIN )
     53 
     54 typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ];
     55 
     56 /************************************************************/
     57 /* Internally used functions                                */
     58 /************************************************************/
     59 static void silk_P_Ana_calc_corr_st3(
     60     silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */
     61     const opus_int16  frame[],                         /* I vector to correlate         */
     62     opus_int          start_lag,                       /* I lag offset to search around */
     63     opus_int          sf_length,                       /* I length of a 5 ms subframe   */
     64     opus_int          nb_subfr,                        /* I number of subframes         */
     65     opus_int          complexity,                      /* I Complexity setting          */
     66     int               arch                             /* I Run-time architecture       */
     67 );
     68 
     69 static void silk_P_Ana_calc_energy_st3(
     70     silk_pe_stage3_vals energies_st3[],                /* O 3 DIM energy array */
     71     const opus_int16  frame[],                         /* I vector to calc energy in    */
     72     opus_int          start_lag,                       /* I lag offset to search around */
     73     opus_int          sf_length,                       /* I length of one 5 ms subframe */
     74     opus_int          nb_subfr,                        /* I number of subframes         */
     75     opus_int          complexity,                      /* I Complexity setting          */
     76     int               arch                             /* I Run-time architecture       */
     77 );
     78 
     79 /*************************************************************/
     80 /*      FIXED POINT CORE PITCH ANALYSIS FUNCTION             */
     81 /*************************************************************/
     82 opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
     83     const opus_int16            *frame,             /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
     84     opus_int                    *pitch_out,         /* O    4 pitch lag values                                          */
     85     opus_int16                  *lagIndex,          /* O    Lag Index                                                   */
     86     opus_int8                   *contourIndex,      /* O    Pitch contour Index                                         */
     87     opus_int                    *LTPCorr_Q15,       /* I/O  Normalized correlation; input: value from previous frame    */
     88     opus_int                    prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
     89     const opus_int32            search_thres1_Q16,  /* I    First stage threshold for lag candidates 0 - 1              */
     90     const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */
     91     const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */
     92     const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
     93     const opus_int              nb_subfr,           /* I    number of 5 ms subframes                                    */
     94     int                         arch                /* I    Run-time architecture                                       */
     95 )
     96 {
     97     VARDECL( opus_int16, frame_8kHz );
     98     VARDECL( opus_int16, frame_4kHz );
     99     opus_int32 filt_state[ 6 ];
    100     const opus_int16 *input_frame_ptr;
    101     opus_int   i, k, d, j;
    102     VARDECL( opus_int16, C );
    103     VARDECL( opus_int32, xcorr32 );
    104     const opus_int16 *target_ptr, *basis_ptr;
    105     opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target;
    106     opus_int   d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp;
    107     VARDECL( opus_int16, d_comp );
    108     opus_int32 sum, threshold, lag_counter;
    109     opus_int   CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;
    110     opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new;
    111     VARDECL( silk_pe_stage3_vals, energies_st3 );
    112     VARDECL( silk_pe_stage3_vals, cross_corr_st3 );
    113     opus_int   frame_length, frame_length_8kHz, frame_length_4kHz;
    114     opus_int   sf_length;
    115     opus_int   min_lag;
    116     opus_int   max_lag;
    117     opus_int32 contour_bias_Q15, diff;
    118     opus_int   nb_cbk_search, cbk_size;
    119     opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;
    120     const opus_int8 *Lag_CB_ptr;
    121     SAVE_STACK;
    122     /* Check for valid sampling frequency */
    123     silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
    124 
    125     /* Check for valid complexity setting */
    126     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
    127     silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
    128 
    129     silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) );
    130     silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) );
    131 
    132     /* Set up frame lengths max / min lag for the sampling frequency */
    133     frame_length      = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
    134     frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
    135     frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
    136     sf_length         = PE_SUBFR_LENGTH_MS * Fs_kHz;
    137     min_lag           = PE_MIN_LAG_MS * Fs_kHz;
    138     max_lag           = PE_MAX_LAG_MS * Fs_kHz - 1;
    139 
    140     /* Resample from input sampled at Fs_kHz to 8 kHz */
    141     ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 );
    142     if( Fs_kHz == 16 ) {
    143         silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
    144         silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length );
    145     } else if( Fs_kHz == 12 ) {
    146         silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
    147         silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length );
    148     } else {
    149         silk_assert( Fs_kHz == 8 );
    150         silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) );
    151     }
    152 
    153     /* Decimate again to 4 kHz */
    154     silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */
    155     ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 );
    156     silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz );
    157 
    158     /* Low-pass filter */
    159     for( i = frame_length_4kHz - 1; i > 0; i-- ) {
    160         frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] );
    161     }
    162 
    163     /*******************************************************************************
    164     ** Scale 4 kHz signal down to prevent correlations measures from overflowing
    165     ** find scaling as max scaling for each 8kHz(?) subframe
    166     *******************************************************************************/
    167 
    168     /* Inner product is calculated with different lengths, so scale for the worst case */
    169     silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz );
    170     if( shift > 0 ) {
    171         shift = silk_RSHIFT( shift, 1 );
    172         for( i = 0; i < frame_length_4kHz; i++ ) {
    173             frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift );
    174         }
    175     }
    176 
    177     /******************************************************************************
    178     * FIRST STAGE, operating in 4 khz
    179     ******************************************************************************/
    180     ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 );
    181     ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 );
    182     silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) );
    183     target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ];
    184     for( k = 0; k < nb_subfr >> 1; k++ ) {
    185         /* Check that we are within range of the array */
    186         silk_assert( target_ptr >= frame_4kHz );
    187         silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
    188 
    189         basis_ptr = target_ptr - MIN_LAG_4KHZ;
    190 
    191         /* Check that we are within range of the array */
    192         silk_assert( basis_ptr >= frame_4kHz );
    193         silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
    194 
    195         celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch );
    196 
    197         /* Calculate first vector products before loop */
    198         cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
    199         normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch );
    200         normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr,  basis_ptr, SF_LENGTH_8KHZ, arch ) );
    201         normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
    202 
    203         matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =
    204             (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );                      /* Q13 */
    205 
    206         /* From now on normalizer is computed recursively */
    207         for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) {
    208             basis_ptr--;
    209 
    210             /* Check that we are within range of the array */
    211             silk_assert( basis_ptr >= frame_4kHz );
    212             silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
    213 
    214             cross_corr = xcorr32[ MAX_LAG_4KHZ - d ];
    215 
    216             /* Add contribution of new sample and remove contribution from oldest sample */
    217             normalizer = silk_ADD32( normalizer,
    218                 silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) -
    219                 silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) );
    220 
    221             matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) =
    222                 (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );                  /* Q13 */
    223         }
    224         /* Update target pointer */
    225         target_ptr += SF_LENGTH_8KHZ;
    226     }
    227 
    228     /* Combine two subframes into single correlation measure and apply short-lag bias */
    229     if( nb_subfr == PE_MAX_NB_SUBFR ) {
    230         for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
    231             sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ )
    232                 + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ );               /* Q14 */
    233             sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */
    234             C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */
    235         }
    236     } else {
    237         /* Only short-lag bias */
    238         for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
    239             sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 );                          /* Q14 */
    240             sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */
    241             C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */
    242         }
    243     }
    244 
    245     /* Sort */
    246     length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 );
    247     silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
    248     silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ,
    249                                           length_d_srch );
    250 
    251     /* Escape if correlation is very low already here */
    252     Cmax = (opus_int)C[ 0 ];                                                    /* Q14 */
    253     if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) {
    254         silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
    255         *LTPCorr_Q15  = 0;
    256         *lagIndex     = 0;
    257         *contourIndex = 0;
    258         RESTORE_STACK;
    259         return 1;
    260     }
    261 
    262     threshold = silk_SMULWB( search_thres1_Q16, Cmax );
    263     for( i = 0; i < length_d_srch; i++ ) {
    264         /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
    265         if( C[ i ] > threshold ) {
    266             d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 );
    267         } else {
    268             length_d_srch = i;
    269             break;
    270         }
    271     }
    272     silk_assert( length_d_srch > 0 );
    273 
    274     ALLOC( d_comp, D_COMP_STRIDE, opus_int16 );
    275     for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) {
    276         d_comp[ i - D_COMP_MIN ] = 0;
    277     }
    278     for( i = 0; i < length_d_srch; i++ ) {
    279         d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1;
    280     }
    281 
    282     /* Convolution */
    283     for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
    284         d_comp[ i - D_COMP_MIN ] +=
    285             d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ];
    286     }
    287 
    288     length_d_srch = 0;
    289     for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) {
    290         if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) {
    291             d_srch[ length_d_srch ] = i;
    292             length_d_srch++;
    293         }
    294     }
    295 
    296     /* Convolution */
    297     for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
    298         d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ]
    299             + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ];
    300     }
    301 
    302     length_d_comp = 0;
    303     for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) {
    304         if( d_comp[ i - D_COMP_MIN ] > 0 ) {
    305             d_comp[ length_d_comp ] = i - 2;
    306             length_d_comp++;
    307         }
    308     }
    309 
    310     /**********************************************************************************
    311     ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
    312     *************************************************************************************/
    313 
    314     /******************************************************************************
    315     ** Scale signal down to avoid correlations measures from overflowing
    316     *******************************************************************************/
    317     /* find scaling as max scaling for each subframe */
    318     silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz );
    319     if( shift > 0 ) {
    320         shift = silk_RSHIFT( shift, 1 );
    321         for( i = 0; i < frame_length_8kHz; i++ ) {
    322             frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift );
    323         }
    324     }
    325 
    326     /*********************************************************************************
    327     * Find energy of each subframe projected onto its history, for a range of delays
    328     *********************************************************************************/
    329     silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) );
    330 
    331     target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
    332     for( k = 0; k < nb_subfr; k++ ) {
    333 
    334         /* Check that we are within range of the array */
    335         silk_assert( target_ptr >= frame_8kHz );
    336         silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
    337 
    338         energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ), 1 );
    339         for( j = 0; j < length_d_comp; j++ ) {
    340             d = d_comp[ j ];
    341             basis_ptr = target_ptr - d;
    342 
    343             /* Check that we are within range of the array */
    344             silk_assert( basis_ptr >= frame_8kHz );
    345             silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
    346 
    347             cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ, arch );
    348             if( cross_corr > 0 ) {
    349                 energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch );
    350                 matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) =
    351                     (opus_int16)silk_DIV32_varQ( cross_corr,
    352                                                  silk_ADD32( energy_target,
    353                                                              energy_basis ),
    354                                                  13 + 1 );                                      /* Q13 */
    355             } else {
    356                 matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0;
    357             }
    358         }
    359         target_ptr += SF_LENGTH_8KHZ;
    360     }
    361 
    362     /* search over lag range and lags codebook */
    363     /* scale factor for lag codebook, as a function of center lag */
    364 
    365     CCmax   = silk_int32_MIN;
    366     CCmax_b = silk_int32_MIN;
    367 
    368     CBimax = 0; /* To avoid returning undefined lag values */
    369     lag = -1;   /* To check if lag with strong enough correlation has been found */
    370 
    371     if( prevLag > 0 ) {
    372         if( Fs_kHz == 12 ) {
    373             prevLag = silk_DIV32_16( silk_LSHIFT( prevLag, 1 ), 3 );
    374         } else if( Fs_kHz == 16 ) {
    375             prevLag = silk_RSHIFT( prevLag, 1 );
    376         }
    377         prevLag_log2_Q7 = silk_lin2log( (opus_int32)prevLag );
    378     } else {
    379         prevLag_log2_Q7 = 0;
    380     }
    381     silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) );
    382     /* Set up stage 2 codebook based on number of subframes */
    383     if( nb_subfr == PE_MAX_NB_SUBFR ) {
    384         cbk_size   = PE_NB_CBKS_STAGE2_EXT;
    385         Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
    386         if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) {
    387             /* If input is 8 khz use a larger codebook here because it is last stage */
    388             nb_cbk_search = PE_NB_CBKS_STAGE2_EXT;
    389         } else {
    390             nb_cbk_search = PE_NB_CBKS_STAGE2;
    391         }
    392     } else {
    393         cbk_size       = PE_NB_CBKS_STAGE2_10MS;
    394         Lag_CB_ptr     = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
    395         nb_cbk_search  = PE_NB_CBKS_STAGE2_10MS;
    396     }
    397 
    398     for( k = 0; k < length_d_srch; k++ ) {
    399         d = d_srch[ k ];
    400         for( j = 0; j < nb_cbk_search; j++ ) {
    401             CC[ j ] = 0;
    402             for( i = 0; i < nb_subfr; i++ ) {
    403                 opus_int d_subfr;
    404                 /* Try all codebooks */
    405                 d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size );
    406                 CC[ j ] = CC[ j ]
    407                     + (opus_int32)matrix_ptr( C, i,
    408                                               d_subfr - ( MIN_LAG_8KHZ - 2 ),
    409                                               CSTRIDE_8KHZ );
    410             }
    411         }
    412         /* Find best codebook */
    413         CCmax_new = silk_int32_MIN;
    414         CBimax_new = 0;
    415         for( i = 0; i < nb_cbk_search; i++ ) {
    416             if( CC[ i ] > CCmax_new ) {
    417                 CCmax_new = CC[ i ];
    418                 CBimax_new = i;
    419             }
    420         }
    421 
    422         /* Bias towards shorter lags */
    423         lag_log2_Q7 = silk_lin2log( d ); /* Q7 */
    424         silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) );
    425         silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) );
    426         CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */
    427 
    428         /* Bias towards previous lag */
    429         silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) );
    430         if( prevLag > 0 ) {
    431             delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7;
    432             silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) );
    433             delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 );
    434             prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */
    435             prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) );
    436             CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */
    437         }
    438 
    439         if( CCmax_new_b > CCmax_b                                   &&  /* Find maximum biased correlation                  */
    440             CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 )  &&  /* Correlation needs to be high enough to be voiced */
    441             silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ      /* Lag must be in range                             */
    442          ) {
    443             CCmax_b = CCmax_new_b;
    444             CCmax   = CCmax_new;
    445             lag     = d;
    446             CBimax  = CBimax_new;
    447         }
    448     }
    449 
    450     if( lag == -1 ) {
    451         /* No suitable candidate found */
    452         silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
    453         *LTPCorr_Q15  = 0;
    454         *lagIndex     = 0;
    455         *contourIndex = 0;
    456         RESTORE_STACK;
    457         return 1;
    458     }
    459 
    460     /* Output normalized correlation */
    461     *LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 );
    462     silk_assert( *LTPCorr_Q15 >= 0 );
    463 
    464     if( Fs_kHz > 8 ) {
    465         VARDECL( opus_int16, scratch_mem );
    466         /***************************************************************************/
    467         /* Scale input signal down to avoid correlations measures from overflowing */
    468         /***************************************************************************/
    469         /* find scaling as max scaling for each subframe */
    470         silk_sum_sqr_shift( &energy, &shift, frame, frame_length );
    471         ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 );
    472         if( shift > 0 ) {
    473             /* Move signal to scratch mem because the input signal should be unchanged */
    474             shift = silk_RSHIFT( shift, 1 );
    475             for( i = 0; i < frame_length; i++ ) {
    476                 scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift );
    477             }
    478             input_frame_ptr = scratch_mem;
    479         } else {
    480             input_frame_ptr = frame;
    481         }
    482 
    483         /* Search in original signal */
    484 
    485         CBimax_old = CBimax;
    486         /* Compensate for decimation */
    487         silk_assert( lag == silk_SAT16( lag ) );
    488         if( Fs_kHz == 12 ) {
    489             lag = silk_RSHIFT( silk_SMULBB( lag, 3 ), 1 );
    490         } else if( Fs_kHz == 16 ) {
    491             lag = silk_LSHIFT( lag, 1 );
    492         } else {
    493             lag = silk_SMULBB( lag, 3 );
    494         }
    495 
    496         lag = silk_LIMIT_int( lag, min_lag, max_lag );
    497         start_lag = silk_max_int( lag - 2, min_lag );
    498         end_lag   = silk_min_int( lag + 2, max_lag );
    499         lag_new   = lag;                                    /* to avoid undefined lag */
    500         CBimax    = 0;                                      /* to avoid undefined lag */
    501 
    502         CCmax = silk_int32_MIN;
    503         /* pitch lags according to second stage */
    504         for( k = 0; k < nb_subfr; k++ ) {
    505             pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ];
    506         }
    507 
    508         /* Set up codebook parameters according to complexity setting and frame length */
    509         if( nb_subfr == PE_MAX_NB_SUBFR ) {
    510             nb_cbk_search   = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
    511             cbk_size        = PE_NB_CBKS_STAGE3_MAX;
    512             Lag_CB_ptr      = &silk_CB_lags_stage3[ 0 ][ 0 ];
    513         } else {
    514             nb_cbk_search   = PE_NB_CBKS_STAGE3_10MS;
    515             cbk_size        = PE_NB_CBKS_STAGE3_10MS;
    516             Lag_CB_ptr      = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
    517         }
    518 
    519         /* Calculate the correlations and energies needed in stage 3 */
    520         ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
    521         ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
    522         silk_P_Ana_calc_corr_st3(  cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
    523         silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
    524 
    525         lag_counter = 0;
    526         silk_assert( lag == silk_SAT16( lag ) );
    527         contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
    528 
    529         target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
    530         energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length, arch ), 1 );
    531         for( d = start_lag; d <= end_lag; d++ ) {
    532             for( j = 0; j < nb_cbk_search; j++ ) {
    533                 cross_corr = 0;
    534                 energy     = energy_target;
    535                 for( k = 0; k < nb_subfr; k++ ) {
    536                     cross_corr = silk_ADD32( cross_corr,
    537                         matrix_ptr( cross_corr_st3, k, j,
    538                                     nb_cbk_search )[ lag_counter ] );
    539                     energy     = silk_ADD32( energy,
    540                         matrix_ptr( energies_st3, k, j,
    541                                     nb_cbk_search )[ lag_counter ] );
    542                     silk_assert( energy >= 0 );
    543                 }
    544                 if( cross_corr > 0 ) {
    545                     CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 );          /* Q13 */
    546                     /* Reduce depending on flatness of contour */
    547                     diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j );            /* Q15 */
    548                     silk_assert( diff == silk_SAT16( diff ) );
    549                     CCmax_new = silk_SMULWB( CCmax_new, diff );                         /* Q14 */
    550                 } else {
    551                     CCmax_new = 0;
    552                 }
    553 
    554                 if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
    555                     CCmax   = CCmax_new;
    556                     lag_new = d;
    557                     CBimax  = j;
    558                 }
    559             }
    560             lag_counter++;
    561         }
    562 
    563         for( k = 0; k < nb_subfr; k++ ) {
    564             pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
    565             pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz );
    566         }
    567         *lagIndex = (opus_int16)( lag_new - min_lag);
    568         *contourIndex = (opus_int8)CBimax;
    569     } else {        /* Fs_kHz == 8 */
    570         /* Save Lags */
    571         for( k = 0; k < nb_subfr; k++ ) {
    572             pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
    573             pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 );
    574         }
    575         *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ );
    576         *contourIndex = (opus_int8)CBimax;
    577     }
    578     silk_assert( *lagIndex >= 0 );
    579     /* return as voiced */
    580     RESTORE_STACK;
    581     return 0;
    582 }
    583 
    584 /***********************************************************************
    585  * Calculates the correlations used in stage 3 search. In order to cover
    586  * the whole lag codebook for all the searched offset lags (lag +- 2),
    587  * the following correlations are needed in each sub frame:
    588  *
    589  * sf1: lag range [-8,...,7] total 16 correlations
    590  * sf2: lag range [-4,...,4] total 9 correlations
    591  * sf3: lag range [-3,....4] total 8 correltions
    592  * sf4: lag range [-6,....8] total 15 correlations
    593  *
    594  * In total 48 correlations. The direct implementation computed in worst
    595  * case 4*12*5 = 240 correlations, but more likely around 120.
    596  ***********************************************************************/
    597 static void silk_P_Ana_calc_corr_st3(
    598     silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */
    599     const opus_int16  frame[],                         /* I vector to correlate         */
    600     opus_int          start_lag,                       /* I lag offset to search around */
    601     opus_int          sf_length,                       /* I length of a 5 ms subframe   */
    602     opus_int          nb_subfr,                        /* I number of subframes         */
    603     opus_int          complexity,                      /* I Complexity setting          */
    604     int               arch                             /* I Run-time architecture       */
    605 )
    606 {
    607     const opus_int16 *target_ptr;
    608     opus_int   i, j, k, lag_counter, lag_low, lag_high;
    609     opus_int   nb_cbk_search, delta, idx, cbk_size;
    610     VARDECL( opus_int32, scratch_mem );
    611     VARDECL( opus_int32, xcorr32 );
    612     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
    613     SAVE_STACK;
    614 
    615     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
    616     silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
    617 
    618     if( nb_subfr == PE_MAX_NB_SUBFR ) {
    619         Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
    620         Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
    621         nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
    622         cbk_size      = PE_NB_CBKS_STAGE3_MAX;
    623     } else {
    624         silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
    625         Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
    626         Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
    627         nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
    628         cbk_size      = PE_NB_CBKS_STAGE3_10MS;
    629     }
    630     ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
    631     ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 );
    632 
    633     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
    634     for( k = 0; k < nb_subfr; k++ ) {
    635         lag_counter = 0;
    636 
    637         /* Calculate the correlations for each subframe */
    638         lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
    639         lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
    640         silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
    641         celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch );
    642         for( j = lag_low; j <= lag_high; j++ ) {
    643             silk_assert( lag_counter < SCRATCH_SIZE );
    644             scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];
    645             lag_counter++;
    646         }
    647 
    648         delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
    649         for( i = 0; i < nb_cbk_search; i++ ) {
    650             /* Fill out the 3 dim array that stores the correlations for */
    651             /* each code_book vector for each start lag */
    652             idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
    653             for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
    654                 silk_assert( idx + j < SCRATCH_SIZE );
    655                 silk_assert( idx + j < lag_counter );
    656                 matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] =
    657                     scratch_mem[ idx + j ];
    658             }
    659         }
    660         target_ptr += sf_length;
    661     }
    662     RESTORE_STACK;
    663 }
    664 
    665 /********************************************************************/
    666 /* Calculate the energies for first two subframes. The energies are */
    667 /* calculated recursively.                                          */
    668 /********************************************************************/
    669 static void silk_P_Ana_calc_energy_st3(
    670     silk_pe_stage3_vals energies_st3[],                 /* O 3 DIM energy array */
    671     const opus_int16  frame[],                          /* I vector to calc energy in    */
    672     opus_int          start_lag,                        /* I lag offset to search around */
    673     opus_int          sf_length,                        /* I length of one 5 ms subframe */
    674     opus_int          nb_subfr,                         /* I number of subframes         */
    675     opus_int          complexity,                       /* I Complexity setting          */
    676     int               arch                              /* I Run-time architecture       */
    677 )
    678 {
    679     const opus_int16 *target_ptr, *basis_ptr;
    680     opus_int32 energy;
    681     opus_int   k, i, j, lag_counter;
    682     opus_int   nb_cbk_search, delta, idx, cbk_size, lag_diff;
    683     VARDECL( opus_int32, scratch_mem );
    684     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
    685     SAVE_STACK;
    686 
    687     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
    688     silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
    689 
    690     if( nb_subfr == PE_MAX_NB_SUBFR ) {
    691         Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
    692         Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
    693         nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
    694         cbk_size      = PE_NB_CBKS_STAGE3_MAX;
    695     } else {
    696         silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
    697         Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
    698         Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
    699         nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
    700         cbk_size      = PE_NB_CBKS_STAGE3_10MS;
    701     }
    702     ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
    703 
    704     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
    705     for( k = 0; k < nb_subfr; k++ ) {
    706         lag_counter = 0;
    707 
    708         /* Calculate the energy for first lag */
    709         basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
    710         energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length, arch );
    711         silk_assert( energy >= 0 );
    712         scratch_mem[ lag_counter ] = energy;
    713         lag_counter++;
    714 
    715         lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) -  matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 );
    716         for( i = 1; i < lag_diff; i++ ) {
    717             /* remove part outside new window */
    718             energy -= silk_SMULBB( basis_ptr[ sf_length - i ], basis_ptr[ sf_length - i ] );
    719             silk_assert( energy >= 0 );
    720 
    721             /* add part that comes into window */
    722             energy = silk_ADD_SAT32( energy, silk_SMULBB( basis_ptr[ -i ], basis_ptr[ -i ] ) );
    723             silk_assert( energy >= 0 );
    724             silk_assert( lag_counter < SCRATCH_SIZE );
    725             scratch_mem[ lag_counter ] = energy;
    726             lag_counter++;
    727         }
    728 
    729         delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
    730         for( i = 0; i < nb_cbk_search; i++ ) {
    731             /* Fill out the 3 dim array that stores the correlations for    */
    732             /* each code_book vector for each start lag                     */
    733             idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
    734             for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
    735                 silk_assert( idx + j < SCRATCH_SIZE );
    736                 silk_assert( idx + j < lag_counter );
    737                 matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] =
    738                     scratch_mem[ idx + j ];
    739                 silk_assert(
    740                     matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 );
    741             }
    742         }
    743         target_ptr += sf_length;
    744     }
    745     RESTORE_STACK;
    746 }
    747