Home | History | Annotate | Download | only in fixed
      1 /***********************************************************************
      2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
      3 Redistribution and use in source and binary forms, with or without
      4 modification, are permitted provided that the following conditions
      5 are met:
      6 - Redistributions of source code must retain the above copyright notice,
      7 this list of conditions and the following disclaimer.
      8 - Redistributions in binary form must reproduce the above copyright
      9 notice, this list of conditions and the following disclaimer in the
     10 documentation and/or other materials provided with the distribution.
     11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
     12 names of specific contributors, may be used to endorse or promote
     13 products derived from this software without specific prior written
     14 permission.
     15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
     16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     25 POSSIBILITY OF SUCH DAMAGE.
     26 ***********************************************************************/
     27 
     28 #ifdef HAVE_CONFIG_H
     29 #include "config.h"
     30 #endif
     31 
     32 /***********************************************************
     33 * Pitch analyser function
     34 ********************************************************** */
     35 #include "SigProc_FIX.h"
     36 #include "pitch_est_defines.h"
     37 #include "debug.h"
     38 
     39 #define SCRATCH_SIZE    22
     40 
     41 /************************************************************/
     42 /* Internally used functions                                */
     43 /************************************************************/
     44 void silk_P_Ana_calc_corr_st3(
     45     opus_int32        cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* (O) 3 DIM correlation array */
     46     const opus_int16  frame[],                         /* I vector to correlate         */
     47     opus_int          start_lag,                       /* I lag offset to search around */
     48     opus_int          sf_length,                       /* I length of a 5 ms subframe   */
     49     opus_int          nb_subfr,                        /* I number of subframes         */
     50     opus_int          complexity                       /* I Complexity setting          */
     51 );
     52 
     53 void silk_P_Ana_calc_energy_st3(
     54     opus_int32        energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* (O) 3 DIM energy array */
     55     const opus_int16  frame[],                         /* I vector to calc energy in    */
     56     opus_int          start_lag,                       /* I lag offset to search around */
     57     opus_int          sf_length,                       /* I length of one 5 ms subframe */
     58     opus_int          nb_subfr,                        /* I number of subframes         */
     59     opus_int          complexity                       /* I Complexity setting          */
     60 );
     61 
     62 opus_int32 silk_P_Ana_find_scaling(
     63     const opus_int16  *frame,
     64     const opus_int    frame_length,
     65     const opus_int    sum_sqr_len
     66 );
     67 
     68 /*************************************************************/
     69 /*      FIXED POINT CORE PITCH ANALYSIS FUNCTION             */
     70 /*************************************************************/
     71 opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
     72     const opus_int16            *frame,             /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
     73     opus_int                    *pitch_out,         /* O    4 pitch lag values                                          */
     74     opus_int16                  *lagIndex,          /* O    Lag Index                                                   */
     75     opus_int8                   *contourIndex,      /* O    Pitch contour Index                                         */
     76     opus_int                    *LTPCorr_Q15,       /* I/O  Normalized correlation; input: value from previous frame    */
     77     opus_int                    prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
     78     const opus_int32            search_thres1_Q16,  /* I    First stage threshold for lag candidates 0 - 1              */
     79     const opus_int              search_thres2_Q15,  /* I    Final threshold for lag candidates 0 - 1                    */
     80     const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */
     81     const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
     82     const opus_int              nb_subfr            /* I    number of 5 ms subframes                                    */
     83 )
     84 {
     85     opus_int16 frame_8kHz[ PE_MAX_FRAME_LENGTH_ST_2 ];
     86     opus_int16 frame_4kHz[ PE_MAX_FRAME_LENGTH_ST_1 ];
     87     opus_int32 filt_state[ 6 ];
     88     opus_int32 scratch_mem[ 3 * PE_MAX_FRAME_LENGTH ];
     89     opus_int16 *input_frame_ptr;
     90     opus_int   i, k, d, j;
     91     opus_int16 C[ PE_MAX_NB_SUBFR ][ ( PE_MAX_LAG >> 1 ) + 5 ];
     92     const opus_int16 *target_ptr, *basis_ptr;
     93     opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target;
     94     opus_int   d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp;
     95     opus_int16 d_comp[ ( PE_MAX_LAG >> 1 ) + 5 ];
     96     opus_int32 sum, threshold, temp32, lag_counter;
     97     opus_int   CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;
     98     opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new;
     99     opus_int32 energies_st3[  PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ];
    100     opus_int32 crosscorr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ];
    101     opus_int   frame_length, frame_length_8kHz, frame_length_4kHz, max_sum_sq_length;
    102     opus_int   sf_length, sf_length_8kHz, sf_length_4kHz;
    103     opus_int   min_lag, min_lag_8kHz, min_lag_4kHz;
    104     opus_int   max_lag, max_lag_8kHz, max_lag_4kHz;
    105     opus_int32 contour_bias_Q20, diff, lz, lshift;
    106     opus_int   nb_cbk_search, cbk_size;
    107     opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q15, corr_thres_Q15;
    108     const opus_int8 *Lag_CB_ptr;
    109     /* Check for valid sampling frequency */
    110     silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
    111 
    112     /* Check for valid complexity setting */
    113     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
    114     silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
    115 
    116     silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) );
    117     silk_assert( search_thres2_Q15 >= 0 && search_thres2_Q15 <= (1<<15) );
    118 
    119     /* Set up frame lengths max / min lag for the sampling frequency */
    120     frame_length      = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
    121     frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
    122     frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
    123     sf_length         = PE_SUBFR_LENGTH_MS * Fs_kHz;
    124     sf_length_4kHz    = PE_SUBFR_LENGTH_MS * 4;
    125     sf_length_8kHz    = PE_SUBFR_LENGTH_MS * 8;
    126     min_lag           = PE_MIN_LAG_MS * Fs_kHz;
    127     min_lag_4kHz      = PE_MIN_LAG_MS * 4;
    128     min_lag_8kHz      = PE_MIN_LAG_MS * 8;
    129     max_lag           = PE_MAX_LAG_MS * Fs_kHz - 1;
    130     max_lag_4kHz      = PE_MAX_LAG_MS * 4;
    131     max_lag_8kHz      = PE_MAX_LAG_MS * 8 - 1;
    132 
    133     silk_memset( C, 0, sizeof( opus_int16 ) * nb_subfr * ( ( PE_MAX_LAG >> 1 ) + 5) );
    134 
    135     /* Resample from input sampled at Fs_kHz to 8 kHz */
    136     if( Fs_kHz == 16 ) {
    137         silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
    138         silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length );
    139     } else if( Fs_kHz == 12 ) {
    140         silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
    141         silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length );
    142     } else {
    143         silk_assert( Fs_kHz == 8 );
    144         silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) );
    145     }
    146 
    147     /* Decimate again to 4 kHz */
    148     silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */
    149     silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz );
    150 
    151     /* Low-pass filter */
    152     for( i = frame_length_4kHz - 1; i > 0; i-- ) {
    153         frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] );
    154     }
    155 
    156     /*******************************************************************************
    157     ** Scale 4 kHz signal down to prevent correlations measures from overflowing
    158     ** find scaling as max scaling for each 8kHz(?) subframe
    159     *******************************************************************************/
    160 
    161     /* Inner product is calculated with different lengths, so scale for the worst case */
    162     max_sum_sq_length = silk_max_32( sf_length_8kHz, silk_LSHIFT( sf_length_4kHz, 2 ) );
    163     shift = silk_P_Ana_find_scaling( frame_4kHz, frame_length_4kHz, max_sum_sq_length );
    164     if( shift > 0 ) {
    165         for( i = 0; i < frame_length_4kHz; i++ ) {
    166             frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift );
    167         }
    168     }
    169 
    170     /******************************************************************************
    171     * FIRST STAGE, operating in 4 khz
    172     ******************************************************************************/
    173     target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ];
    174     for( k = 0; k < nb_subfr >> 1; k++ ) {
    175         /* Check that we are within range of the array */
    176         silk_assert( target_ptr >= frame_4kHz );
    177         silk_assert( target_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
    178 
    179         basis_ptr = target_ptr - min_lag_4kHz;
    180 
    181         /* Check that we are within range of the array */
    182         silk_assert( basis_ptr >= frame_4kHz );
    183         silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
    184 
    185         /* Calculate first vector products before loop */
    186         cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );
    187         normalizer = silk_inner_prod_aligned( basis_ptr,  basis_ptr, sf_length_8kHz );
    188         normalizer = silk_ADD_SAT32( normalizer, silk_SMULBB( sf_length_8kHz, 4000 ) );
    189 
    190         temp32 = silk_DIV32( cross_corr, silk_SQRT_APPROX( normalizer ) + 1 );
    191         C[ k ][ min_lag_4kHz ] = (opus_int16)silk_SAT16( temp32 );        /* Q0 */
    192 
    193         /* From now on normalizer is computed recursively */
    194         for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) {
    195             basis_ptr--;
    196 
    197             /* Check that we are within range of the array */
    198             silk_assert( basis_ptr >= frame_4kHz );
    199             silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
    200 
    201             cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );
    202 
    203             /* Add contribution of new sample and remove contribution from oldest sample */
    204             normalizer +=
    205                 silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) -
    206                 silk_SMULBB( basis_ptr[ sf_length_8kHz ], basis_ptr[ sf_length_8kHz ] );
    207 
    208             temp32 = silk_DIV32( cross_corr, silk_SQRT_APPROX( normalizer ) + 1 );
    209             C[ k ][ d ] = (opus_int16)silk_SAT16( temp32 );                        /* Q0 */
    210         }
    211         /* Update target pointer */
    212         target_ptr += sf_length_8kHz;
    213     }
    214 
    215     /* Combine two subframes into single correlation measure and apply short-lag bias */
    216     if( nb_subfr == PE_MAX_NB_SUBFR ) {
    217         for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) {
    218             sum = (opus_int32)C[ 0 ][ i ] + (opus_int32)C[ 1 ][ i ];                /* Q0 */
    219             silk_assert( silk_RSHIFT( sum, 1 ) == silk_SAT16( silk_RSHIFT( sum, 1 ) ) );
    220             sum = silk_RSHIFT( sum, 1 );                                           /* Q-1 */
    221             silk_assert( silk_LSHIFT( (opus_int32)-i, 4 ) == silk_SAT16( silk_LSHIFT( (opus_int32)-i, 4 ) ) );
    222             sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                    /* Q-1 */
    223             silk_assert( sum == silk_SAT16( sum ) );
    224             C[ 0 ][ i ] = (opus_int16)sum;                                         /* Q-1 */
    225         }
    226     } else {
    227         /* Only short-lag bias */
    228         for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) {
    229             sum = (opus_int32)C[ 0 ][ i ];
    230             sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                    /* Q-1 */
    231             C[ 0 ][ i ] = (opus_int16)sum;                                         /* Q-1 */
    232         }
    233     }
    234 
    235     /* Sort */
    236     length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 );
    237     silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
    238     silk_insertion_sort_decreasing_int16( &C[ 0 ][ min_lag_4kHz ], d_srch, max_lag_4kHz - min_lag_4kHz + 1, length_d_srch );
    239 
    240     /* Escape if correlation is very low already here */
    241     target_ptr = &frame_4kHz[ silk_SMULBB( sf_length_4kHz, nb_subfr ) ];
    242     energy = silk_inner_prod_aligned( target_ptr, target_ptr, silk_LSHIFT( sf_length_4kHz, 2 ) );
    243     energy = silk_ADD_SAT32( energy, 1000 );                                  /* Q0 */
    244     Cmax = (opus_int)C[ 0 ][ min_lag_4kHz ];                                  /* Q-1 */
    245     threshold = silk_SMULBB( Cmax, Cmax );                                    /* Q-2 */
    246 
    247     /* Compare in Q-2 domain */
    248     if( silk_RSHIFT( energy, 4 + 2 ) > threshold ) {
    249         silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
    250         *LTPCorr_Q15  = 0;
    251         *lagIndex     = 0;
    252         *contourIndex = 0;
    253         return 1;
    254     }
    255 
    256     threshold = silk_SMULWB( search_thres1_Q16, Cmax );
    257     for( i = 0; i < length_d_srch; i++ ) {
    258         /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
    259         if( C[ 0 ][ min_lag_4kHz + i ] > threshold ) {
    260             d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + min_lag_4kHz, 1 );
    261         } else {
    262             length_d_srch = i;
    263             break;
    264         }
    265     }
    266     silk_assert( length_d_srch > 0 );
    267 
    268     for( i = min_lag_8kHz - 5; i < max_lag_8kHz + 5; i++ ) {
    269         d_comp[ i ] = 0;
    270     }
    271     for( i = 0; i < length_d_srch; i++ ) {
    272         d_comp[ d_srch[ i ] ] = 1;
    273     }
    274 
    275     /* Convolution */
    276     for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) {
    277         d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ];
    278     }
    279 
    280     length_d_srch = 0;
    281     for( i = min_lag_8kHz; i < max_lag_8kHz + 1; i++ ) {
    282         if( d_comp[ i + 1 ] > 0 ) {
    283             d_srch[ length_d_srch ] = i;
    284             length_d_srch++;
    285         }
    286     }
    287 
    288     /* Convolution */
    289     for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) {
    290         d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ] + d_comp[ i - 3 ];
    291     }
    292 
    293     length_d_comp = 0;
    294     for( i = min_lag_8kHz; i < max_lag_8kHz + 4; i++ ) {
    295         if( d_comp[ i ] > 0 ) {
    296             d_comp[ length_d_comp ] = i - 2;
    297             length_d_comp++;
    298         }
    299     }
    300 
    301     /**********************************************************************************
    302     ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
    303     *************************************************************************************/
    304 
    305     /******************************************************************************
    306     ** Scale signal down to avoid correlations measures from overflowing
    307     *******************************************************************************/
    308     /* find scaling as max scaling for each subframe */
    309     shift = silk_P_Ana_find_scaling( frame_8kHz, frame_length_8kHz, sf_length_8kHz );
    310     if( shift > 0 ) {
    311         for( i = 0; i < frame_length_8kHz; i++ ) {
    312             frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift );
    313         }
    314     }
    315 
    316     /*********************************************************************************
    317     * Find energy of each subframe projected onto its history, for a range of delays
    318     *********************************************************************************/
    319     silk_memset( C, 0, PE_MAX_NB_SUBFR * ( ( PE_MAX_LAG >> 1 ) + 5 ) * sizeof( opus_int16 ) );
    320 
    321     target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
    322     for( k = 0; k < nb_subfr; k++ ) {
    323 
    324         /* Check that we are within range of the array */
    325         silk_assert( target_ptr >= frame_8kHz );
    326         silk_assert( target_ptr + sf_length_8kHz <= frame_8kHz + frame_length_8kHz );
    327 
    328         energy_target = silk_inner_prod_aligned( target_ptr, target_ptr, sf_length_8kHz );
    329         for( j = 0; j < length_d_comp; j++ ) {
    330             d = d_comp[ j ];
    331             basis_ptr = target_ptr - d;
    332 
    333             /* Check that we are within range of the array */
    334             silk_assert( basis_ptr >= frame_8kHz );
    335             silk_assert( basis_ptr + sf_length_8kHz <= frame_8kHz + frame_length_8kHz );
    336 
    337             cross_corr   = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );
    338             energy_basis = silk_inner_prod_aligned( basis_ptr,  basis_ptr, sf_length_8kHz );
    339             if( cross_corr > 0 ) {
    340                 energy = silk_max( energy_target, energy_basis ); /* Find max to make sure first division < 1.0 */
    341                 lz = silk_CLZ32( cross_corr );
    342                 lshift = silk_LIMIT_32( lz - 1, 0, 15 );
    343                 temp32 = silk_DIV32( silk_LSHIFT( cross_corr, lshift ), silk_RSHIFT( energy, 15 - lshift ) + 1 ); /* Q15 */
    344                 silk_assert( temp32 == silk_SAT16( temp32 ) );
    345                 temp32 = silk_SMULWB( cross_corr, temp32 ); /* Q(-1), cc * ( cc / max(b, t) ) */
    346                 temp32 = silk_ADD_SAT32( temp32, temp32 );  /* Q(0) */
    347                 lz = silk_CLZ32( temp32 );
    348                 lshift = silk_LIMIT_32( lz - 1, 0, 15 );
    349                 energy = silk_min( energy_target, energy_basis );
    350                 C[ k ][ d ] = silk_DIV32( silk_LSHIFT( temp32, lshift ), silk_RSHIFT( energy, 15 - lshift ) + 1 ); /* Q15*/
    351             } else {
    352                 C[ k ][ d ] = 0;
    353             }
    354         }
    355         target_ptr += sf_length_8kHz;
    356     }
    357 
    358     /* search over lag range and lags codebook */
    359     /* scale factor for lag codebook, as a function of center lag */
    360 
    361     CCmax   = silk_int32_MIN;
    362     CCmax_b = silk_int32_MIN;
    363 
    364     CBimax = 0; /* To avoid returning undefined lag values */
    365     lag = -1;   /* To check if lag with strong enough correlation has been found */
    366 
    367     if( prevLag > 0 ) {
    368         if( Fs_kHz == 12 ) {
    369             prevLag = silk_DIV32_16( silk_LSHIFT( prevLag, 1 ), 3 );
    370         } else if( Fs_kHz == 16 ) {
    371             prevLag = silk_RSHIFT( prevLag, 1 );
    372         }
    373         prevLag_log2_Q7 = silk_lin2log( (opus_int32)prevLag );
    374     } else {
    375         prevLag_log2_Q7 = 0;
    376     }
    377     silk_assert( search_thres2_Q15 == silk_SAT16( search_thres2_Q15 ) );
    378     /* Set up stage 2 codebook based on number of subframes */
    379     if( nb_subfr == PE_MAX_NB_SUBFR ) {
    380         cbk_size   = PE_NB_CBKS_STAGE2_EXT;
    381         Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
    382         if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) {
    383             /* If input is 8 khz use a larger codebook here because it is last stage */
    384             nb_cbk_search = PE_NB_CBKS_STAGE2_EXT;
    385         } else {
    386             nb_cbk_search = PE_NB_CBKS_STAGE2;
    387         }
    388         corr_thres_Q15 = silk_RSHIFT( silk_SMULBB( search_thres2_Q15, search_thres2_Q15 ), 13 );
    389     } else {
    390         cbk_size       = PE_NB_CBKS_STAGE2_10MS;
    391         Lag_CB_ptr     = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
    392         nb_cbk_search  = PE_NB_CBKS_STAGE2_10MS;
    393         corr_thres_Q15 = silk_RSHIFT( silk_SMULBB( search_thres2_Q15, search_thres2_Q15 ), 14 );
    394     }
    395 
    396     for( k = 0; k < length_d_srch; k++ ) {
    397         d = d_srch[ k ];
    398         for( j = 0; j < nb_cbk_search; j++ ) {
    399             CC[ j ] = 0;
    400             for( i = 0; i < nb_subfr; i++ ) {
    401                 /* Try all codebooks */
    402                 CC[ j ] = CC[ j ] + (opus_int32)C[ i ][ d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size )];
    403             }
    404         }
    405         /* Find best codebook */
    406         CCmax_new = silk_int32_MIN;
    407         CBimax_new = 0;
    408         for( i = 0; i < nb_cbk_search; i++ ) {
    409             if( CC[ i ] > CCmax_new ) {
    410                 CCmax_new = CC[ i ];
    411                 CBimax_new = i;
    412             }
    413         }
    414 
    415         /* Bias towards shorter lags */
    416         lag_log2_Q7 = silk_lin2log( (opus_int32)d ); /* Q7 */
    417         silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) );
    418         silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 15 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 15 ) ) );
    419         CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 15 ), lag_log2_Q7 ), 7 ); /* Q15 */
    420 
    421         /* Bias towards previous lag */
    422         silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 15 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 15 ) ) );
    423         if( prevLag > 0 ) {
    424             delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7;
    425             silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) );
    426             delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 );
    427             prev_lag_bias_Q15 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 15 ), *LTPCorr_Q15 ), 15 ); /* Q15 */
    428             prev_lag_bias_Q15 = silk_DIV32( silk_MUL( prev_lag_bias_Q15, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + ( 1 << 6 ) );
    429             CCmax_new_b -= prev_lag_bias_Q15; /* Q15 */
    430         }
    431 
    432         if( CCmax_new_b > CCmax_b                                   &&  /* Find maximum biased correlation                  */
    433             CCmax_new > corr_thres_Q15                              &&  /* Correlation needs to be high enough to be voiced */
    434             silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= min_lag_8kHz      /* Lag must be in range                             */
    435          ) {
    436             CCmax_b = CCmax_new_b;
    437             CCmax   = CCmax_new;
    438             lag     = d;
    439             CBimax  = CBimax_new;
    440         }
    441     }
    442 
    443     if( lag == -1 ) {
    444         /* No suitable candidate found */
    445         silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
    446         *LTPCorr_Q15  = 0;
    447         *lagIndex     = 0;
    448         *contourIndex = 0;
    449         return 1;
    450     }
    451 
    452     if( Fs_kHz > 8 ) {
    453         /***************************************************************************/
    454         /* Scale input signal down to avoid correlations measures from overflowing */
    455         /***************************************************************************/
    456         /* find scaling as max scaling for each subframe */
    457         shift = silk_P_Ana_find_scaling( frame, frame_length, sf_length );
    458         if( shift > 0 ) {
    459             /* Move signal to scratch mem because the input signal should be unchanged */
    460             /* Reuse the 32 bit scratch mem vector, use a 16 bit pointer from now */
    461             input_frame_ptr = (opus_int16*)scratch_mem;
    462             for( i = 0; i < frame_length; i++ ) {
    463                 input_frame_ptr[ i ] = silk_RSHIFT( frame[ i ], shift );
    464             }
    465         } else {
    466             input_frame_ptr = (opus_int16*)frame;
    467         }
    468 
    469         /* Search in original signal */
    470 
    471         CBimax_old = CBimax;
    472         /* Compensate for decimation */
    473         silk_assert( lag == silk_SAT16( lag ) );
    474         if( Fs_kHz == 12 ) {
    475             lag = silk_RSHIFT( silk_SMULBB( lag, 3 ), 1 );
    476         } else if( Fs_kHz == 16 ) {
    477             lag = silk_LSHIFT( lag, 1 );
    478         } else {
    479             lag = silk_SMULBB( lag, 3 );
    480         }
    481 
    482         lag = silk_LIMIT_int( lag, min_lag, max_lag );
    483         start_lag = silk_max_int( lag - 2, min_lag );
    484         end_lag   = silk_min_int( lag + 2, max_lag );
    485         lag_new   = lag;                                    /* to avoid undefined lag */
    486         CBimax    = 0;                                        /* to avoid undefined lag */
    487         silk_assert( silk_LSHIFT( CCmax, 13 ) >= 0 );
    488         *LTPCorr_Q15 = (opus_int)silk_SQRT_APPROX( silk_LSHIFT( CCmax, 13 ) ); /* Output normalized correlation */
    489 
    490         CCmax = silk_int32_MIN;
    491         /* pitch lags according to second stage */
    492         for( k = 0; k < nb_subfr; k++ ) {
    493             pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ];
    494         }
    495         /* Calculate the correlations and energies needed in stage 3 */
    496         silk_P_Ana_calc_corr_st3(  crosscorr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
    497         silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
    498 
    499         lag_counter = 0;
    500         silk_assert( lag == silk_SAT16( lag ) );
    501         contour_bias_Q20 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 20 ), lag );
    502 
    503         /* Set up codebook parameters according to complexity setting and frame length */
    504         if( nb_subfr == PE_MAX_NB_SUBFR ) {
    505             nb_cbk_search   = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
    506             cbk_size        = PE_NB_CBKS_STAGE3_MAX;
    507             Lag_CB_ptr      = &silk_CB_lags_stage3[ 0 ][ 0 ];
    508         } else {
    509             nb_cbk_search   = PE_NB_CBKS_STAGE3_10MS;
    510             cbk_size        = PE_NB_CBKS_STAGE3_10MS;
    511             Lag_CB_ptr      = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
    512         }
    513         for( d = start_lag; d <= end_lag; d++ ) {
    514             for( j = 0; j < nb_cbk_search; j++ ) {
    515                 cross_corr = 0;
    516                 energy     = 0;
    517                 for( k = 0; k < nb_subfr; k++ ) {
    518                     silk_assert( PE_MAX_NB_SUBFR == 4 );
    519                     energy     += silk_RSHIFT( energies_st3[  k ][ j ][ lag_counter ], 2 ); /* use mean, to avoid overflow */
    520                     silk_assert( energy >= 0 );
    521                     cross_corr += silk_RSHIFT( crosscorr_st3[ k ][ j ][ lag_counter ], 2 ); /* use mean, to avoid overflow */
    522                 }
    523                 if( cross_corr > 0 ) {
    524                     /* Divide cross_corr / energy and get result in Q15 */
    525                     lz = silk_CLZ32( cross_corr );
    526                     /* Divide with result in Q13, cross_corr could be larger than energy */
    527                     lshift = silk_LIMIT_32( lz - 1, 0, 13 );
    528                     CCmax_new = silk_DIV32( silk_LSHIFT( cross_corr, lshift ), silk_RSHIFT( energy, 13 - lshift ) + 1 );
    529                     CCmax_new = silk_SAT16( CCmax_new );
    530                     CCmax_new = silk_SMULWB( cross_corr, CCmax_new );
    531                     /* Saturate */
    532                     if( CCmax_new > silk_RSHIFT( silk_int32_MAX, 3 ) ) {
    533                         CCmax_new = silk_int32_MAX;
    534                     } else {
    535                         CCmax_new = silk_LSHIFT( CCmax_new, 3 );
    536                     }
    537                     /* Reduce depending on flatness of contour */
    538                     diff = silk_int16_MAX - silk_RSHIFT( silk_MUL( contour_bias_Q20, j ), 5 ); /* Q20 -> Q15 */
    539                     silk_assert( diff == silk_SAT16( diff ) );
    540                     CCmax_new = silk_LSHIFT( silk_SMULWB( CCmax_new, diff ), 1 );
    541                 } else {
    542                     CCmax_new = 0;
    543                 }
    544 
    545                 if( CCmax_new > CCmax                                               &&
    546                    ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag
    547                    ) {
    548                     CCmax   = CCmax_new;
    549                     lag_new = d;
    550                     CBimax  = j;
    551                 }
    552             }
    553             lag_counter++;
    554         }
    555 
    556         for( k = 0; k < nb_subfr; k++ ) {
    557             pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
    558             pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz );
    559         }
    560         *lagIndex = (opus_int16)( lag_new - min_lag);
    561         *contourIndex = (opus_int8)CBimax;
    562     } else {        /* Fs_kHz == 8 */
    563         /* Save Lags and correlation */
    564         CCmax = silk_max( CCmax, 0 );
    565         *LTPCorr_Q15 = (opus_int)silk_SQRT_APPROX( silk_LSHIFT( CCmax, 13 ) ); /* Output normalized correlation */
    566         for( k = 0; k < nb_subfr; k++ ) {
    567             pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
    568             pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * Fs_kHz );
    569         }
    570         *lagIndex = (opus_int16)( lag - min_lag_8kHz );
    571         *contourIndex = (opus_int8)CBimax;
    572     }
    573     silk_assert( *lagIndex >= 0 );
    574     /* return as voiced */
    575     return 0;
    576 }
    577 
    578 /*************************************************************************/
    579 /* Calculates the correlations used in stage 3 search. In order to cover */
    580 /* the whole lag codebook for all the searched offset lags (lag +- 2),   */
    581 /*************************************************************************/
    582 void silk_P_Ana_calc_corr_st3(
    583     opus_int32        cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* (O) 3 DIM correlation array */
    584     const opus_int16  frame[],                         /* I vector to correlate         */
    585     opus_int          start_lag,                       /* I lag offset to search around */
    586     opus_int          sf_length,                       /* I length of a 5 ms subframe   */
    587     opus_int          nb_subfr,                        /* I number of subframes         */
    588     opus_int          complexity                       /* I Complexity setting          */
    589 )
    590 {
    591     const opus_int16 *target_ptr, *basis_ptr;
    592     opus_int32 cross_corr;
    593     opus_int   i, j, k, lag_counter, lag_low, lag_high;
    594     opus_int   nb_cbk_search, delta, idx, cbk_size;
    595     opus_int32 scratch_mem[ SCRATCH_SIZE ];
    596     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
    597 
    598     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
    599     silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
    600 
    601     if( nb_subfr == PE_MAX_NB_SUBFR ) {
    602         Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
    603         Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
    604         nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
    605         cbk_size      = PE_NB_CBKS_STAGE3_MAX;
    606     } else {
    607         silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
    608         Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
    609         Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
    610         nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
    611         cbk_size      = PE_NB_CBKS_STAGE3_10MS;
    612     }
    613 
    614     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
    615     for( k = 0; k < nb_subfr; k++ ) {
    616         lag_counter = 0;
    617 
    618         /* Calculate the correlations for each subframe */
    619         lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
    620         lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
    621         for( j = lag_low; j <= lag_high; j++ ) {
    622             basis_ptr = target_ptr - ( start_lag + j );
    623             cross_corr = silk_inner_prod_aligned( (opus_int16*)target_ptr, (opus_int16*)basis_ptr, sf_length );
    624             silk_assert( lag_counter < SCRATCH_SIZE );
    625             scratch_mem[ lag_counter ] = cross_corr;
    626             lag_counter++;
    627         }
    628 
    629         delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
    630         for( i = 0; i < nb_cbk_search; i++ ) {
    631             /* Fill out the 3 dim array that stores the correlations for */
    632             /* each code_book vector for each start lag */
    633             idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
    634             for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
    635                 silk_assert( idx + j < SCRATCH_SIZE );
    636                 silk_assert( idx + j < lag_counter );
    637                 cross_corr_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ];
    638             }
    639         }
    640         target_ptr += sf_length;
    641     }
    642 }
    643 
    644 /********************************************************************/
    645 /* Calculate the energies for first two subframes. The energies are */
    646 /* calculated recursively.                                          */
    647 /********************************************************************/
    648 void silk_P_Ana_calc_energy_st3(
    649     opus_int32        energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* (O) 3 DIM energy array */
    650     const opus_int16  frame[],                         /* I vector to calc energy in    */
    651     opus_int          start_lag,                       /* I lag offset to search around */
    652     opus_int          sf_length,                       /* I length of one 5 ms subframe */
    653     opus_int          nb_subfr,                     /* I number of subframes         */
    654     opus_int          complexity                       /* I Complexity setting          */
    655 )
    656 {
    657     const opus_int16 *target_ptr, *basis_ptr;
    658     opus_int32 energy;
    659     opus_int   k, i, j, lag_counter;
    660     opus_int   nb_cbk_search, delta, idx, cbk_size, lag_diff;
    661     opus_int32 scratch_mem[ SCRATCH_SIZE ];
    662     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
    663 
    664     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
    665     silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
    666 
    667     if( nb_subfr == PE_MAX_NB_SUBFR ) {
    668         Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
    669         Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
    670         nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
    671         cbk_size      = PE_NB_CBKS_STAGE3_MAX;
    672     } else {
    673         silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
    674         Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
    675         Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
    676         nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
    677         cbk_size      = PE_NB_CBKS_STAGE3_10MS;
    678     }
    679     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
    680     for( k = 0; k < nb_subfr; k++ ) {
    681         lag_counter = 0;
    682 
    683         /* Calculate the energy for first lag */
    684         basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
    685         energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length );
    686         silk_assert( energy >= 0 );
    687         scratch_mem[ lag_counter ] = energy;
    688         lag_counter++;
    689 
    690         lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) -  matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 );
    691         for( i = 1; i < lag_diff; i++ ) {
    692             /* remove part outside new window */
    693             energy -= silk_SMULBB( basis_ptr[ sf_length - i ], basis_ptr[ sf_length - i ] );
    694             silk_assert( energy >= 0 );
    695 
    696             /* add part that comes into window */
    697             energy = silk_ADD_SAT32( energy, silk_SMULBB( basis_ptr[ -i ], basis_ptr[ -i ] ) );
    698             silk_assert( energy >= 0 );
    699             silk_assert( lag_counter < SCRATCH_SIZE );
    700             scratch_mem[ lag_counter ] = energy;
    701             lag_counter++;
    702         }
    703 
    704         delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
    705         for( i = 0; i < nb_cbk_search; i++ ) {
    706             /* Fill out the 3 dim array that stores the correlations for    */
    707             /* each code_book vector for each start lag                     */
    708             idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
    709             for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
    710                 silk_assert( idx + j < SCRATCH_SIZE );
    711                 silk_assert( idx + j < lag_counter );
    712                 energies_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ];
    713                 silk_assert( energies_st3[ k ][ i ][ j ] >= 0 );
    714             }
    715         }
    716         target_ptr += sf_length;
    717     }
    718 }
    719 
    720 opus_int32 silk_P_Ana_find_scaling(
    721     const opus_int16  *frame,
    722     const opus_int    frame_length,
    723     const opus_int    sum_sqr_len
    724 )
    725 {
    726     opus_int32 nbits, x_max;
    727 
    728     x_max = silk_int16_array_maxabs( frame, frame_length );
    729 
    730     if( x_max < silk_int16_MAX ) {
    731         /* Number of bits needed for the sum of the squares */
    732         nbits = 32 - silk_CLZ32( silk_SMULBB( x_max, x_max ) );
    733     } else {
    734         /* Here we don't know if x_max should have been silk_int16_MAX + 1, so we expect the worst case */
    735         nbits = 30;
    736     }
    737     nbits += 17 - silk_CLZ16( sum_sqr_len );
    738 
    739     /* Without a guarantee of saturation, we need to keep the 31st bit free */
    740     if( nbits < 31 ) {
    741         return 0;
    742     } else {
    743         return( nbits - 30 );
    744     }
    745 }
    746