1 /*********************************************************************** 2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. 3 Redistribution and use in source and binary forms, with or without 4 modification, are permitted provided that the following conditions 5 are met: 6 - Redistributions of source code must retain the above copyright notice, 7 this list of conditions and the following disclaimer. 8 - Redistributions in binary form must reproduce the above copyright 9 notice, this list of conditions and the following disclaimer in the 10 documentation and/or other materials provided with the distribution. 11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 12 names of specific contributors, may be used to endorse or promote 13 products derived from this software without specific prior written 14 permission. 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 POSSIBILITY OF SUCH DAMAGE. 26 ***********************************************************************/ 27 28 #ifdef HAVE_CONFIG_H 29 #include "config.h" 30 #endif 31 32 /*********************************************************** 33 * Pitch analyser function 34 ********************************************************** */ 35 #include "SigProc_FIX.h" 36 #include "pitch_est_defines.h" 37 #include "stack_alloc.h" 38 #include "debug.h" 39 #include "pitch.h" 40 41 #define SCRATCH_SIZE 22 42 #define SF_LENGTH_4KHZ ( PE_SUBFR_LENGTH_MS * 4 ) 43 #define SF_LENGTH_8KHZ ( PE_SUBFR_LENGTH_MS * 8 ) 44 #define MIN_LAG_4KHZ ( PE_MIN_LAG_MS * 4 ) 45 #define MIN_LAG_8KHZ ( PE_MIN_LAG_MS * 8 ) 46 #define MAX_LAG_4KHZ ( PE_MAX_LAG_MS * 4 ) 47 #define MAX_LAG_8KHZ ( PE_MAX_LAG_MS * 8 - 1 ) 48 #define CSTRIDE_4KHZ ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ ) 49 #define CSTRIDE_8KHZ ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) ) 50 #define D_COMP_MIN ( MIN_LAG_8KHZ - 3 ) 51 #define D_COMP_MAX ( MAX_LAG_8KHZ + 4 ) 52 #define D_COMP_STRIDE ( D_COMP_MAX - D_COMP_MIN ) 53 54 typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ]; 55 56 /************************************************************/ 57 /* Internally used functions */ 58 /************************************************************/ 59 static void silk_P_Ana_calc_corr_st3( 60 silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */ 61 const opus_int16 frame[], /* I vector to correlate */ 62 opus_int start_lag, /* I lag offset to search around */ 63 opus_int sf_length, /* I length of a 5 ms subframe */ 64 opus_int nb_subfr, /* I number of subframes */ 65 opus_int complexity, /* I Complexity setting */ 66 int arch /* I Run-time architecture */ 67 ); 68 69 static void silk_P_Ana_calc_energy_st3( 70 silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */ 71 const opus_int16 frame[], /* I vector to calc energy in */ 72 opus_int start_lag, /* I lag offset to search around */ 73 opus_int sf_length, /* I length of one 5 ms subframe */ 74 opus_int nb_subfr, /* I number of subframes */ 75 opus_int complexity, /* I Complexity setting */ 76 int arch /* I Run-time architecture */ 77 ); 78 79 /*************************************************************/ 80 /* FIXED POINT CORE PITCH ANALYSIS FUNCTION */ 81 /*************************************************************/ 82 opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */ 83 const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ 84 opus_int *pitch_out, /* O 4 pitch lag values */ 85 opus_int16 *lagIndex, /* O Lag Index */ 86 opus_int8 *contourIndex, /* O Pitch contour Index */ 87 opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */ 88 opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ 89 const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */ 90 const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */ 91 const opus_int Fs_kHz, /* I Sample frequency (kHz) */ 92 const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ 93 const opus_int nb_subfr, /* I number of 5 ms subframes */ 94 int arch /* I Run-time architecture */ 95 ) 96 { 97 VARDECL( opus_int16, frame_8kHz ); 98 VARDECL( opus_int16, frame_4kHz ); 99 opus_int32 filt_state[ 6 ]; 100 const opus_int16 *input_frame_ptr; 101 opus_int i, k, d, j; 102 VARDECL( opus_int16, C ); 103 VARDECL( opus_int32, xcorr32 ); 104 const opus_int16 *target_ptr, *basis_ptr; 105 opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target; 106 opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp; 107 VARDECL( opus_int16, d_comp ); 108 opus_int32 sum, threshold, lag_counter; 109 opus_int CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new; 110 opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new; 111 VARDECL( silk_pe_stage3_vals, energies_st3 ); 112 VARDECL( silk_pe_stage3_vals, cross_corr_st3 ); 113 opus_int frame_length, frame_length_8kHz, frame_length_4kHz; 114 opus_int sf_length; 115 opus_int min_lag; 116 opus_int max_lag; 117 opus_int32 contour_bias_Q15, diff; 118 opus_int nb_cbk_search, cbk_size; 119 opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13; 120 const opus_int8 *Lag_CB_ptr; 121 SAVE_STACK; 122 /* Check for valid sampling frequency */ 123 silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 ); 124 125 /* Check for valid complexity setting */ 126 silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); 127 silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); 128 129 silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) ); 130 silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) ); 131 132 /* Set up frame lengths max / min lag for the sampling frequency */ 133 frame_length = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz; 134 frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4; 135 frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8; 136 sf_length = PE_SUBFR_LENGTH_MS * Fs_kHz; 137 min_lag = PE_MIN_LAG_MS * Fs_kHz; 138 max_lag = PE_MAX_LAG_MS * Fs_kHz - 1; 139 140 /* Resample from input sampled at Fs_kHz to 8 kHz */ 141 ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 ); 142 if( Fs_kHz == 16 ) { 143 silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) ); 144 silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length ); 145 } else if( Fs_kHz == 12 ) { 146 silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) ); 147 silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length ); 148 } else { 149 silk_assert( Fs_kHz == 8 ); 150 silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) ); 151 } 152 153 /* Decimate again to 4 kHz */ 154 silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */ 155 ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 ); 156 silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz ); 157 158 /* Low-pass filter */ 159 for( i = frame_length_4kHz - 1; i > 0; i-- ) { 160 frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] ); 161 } 162 163 /******************************************************************************* 164 ** Scale 4 kHz signal down to prevent correlations measures from overflowing 165 ** find scaling as max scaling for each 8kHz(?) subframe 166 *******************************************************************************/ 167 168 /* Inner product is calculated with different lengths, so scale for the worst case */ 169 silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz ); 170 if( shift > 0 ) { 171 shift = silk_RSHIFT( shift, 1 ); 172 for( i = 0; i < frame_length_4kHz; i++ ) { 173 frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift ); 174 } 175 } 176 177 /****************************************************************************** 178 * FIRST STAGE, operating in 4 khz 179 ******************************************************************************/ 180 ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 ); 181 ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 ); 182 silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) ); 183 target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ]; 184 for( k = 0; k < nb_subfr >> 1; k++ ) { 185 /* Check that we are within range of the array */ 186 silk_assert( target_ptr >= frame_4kHz ); 187 silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); 188 189 basis_ptr = target_ptr - MIN_LAG_4KHZ; 190 191 /* Check that we are within range of the array */ 192 silk_assert( basis_ptr >= frame_4kHz ); 193 silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); 194 195 celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch ); 196 197 /* Calculate first vector products before loop */ 198 cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ]; 199 normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ); 200 normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch ) ); 201 normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) ); 202 203 matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) = 204 (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */ 205 206 /* From now on normalizer is computed recursively */ 207 for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) { 208 basis_ptr--; 209 210 /* Check that we are within range of the array */ 211 silk_assert( basis_ptr >= frame_4kHz ); 212 silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); 213 214 cross_corr = xcorr32[ MAX_LAG_4KHZ - d ]; 215 216 /* Add contribution of new sample and remove contribution from oldest sample */ 217 normalizer = silk_ADD32( normalizer, 218 silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) - 219 silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) ); 220 221 matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) = 222 (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */ 223 } 224 /* Update target pointer */ 225 target_ptr += SF_LENGTH_8KHZ; 226 } 227 228 /* Combine two subframes into single correlation measure and apply short-lag bias */ 229 if( nb_subfr == PE_MAX_NB_SUBFR ) { 230 for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) { 231 sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ ) 232 + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ ); /* Q14 */ 233 sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */ 234 C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */ 235 } 236 } else { 237 /* Only short-lag bias */ 238 for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) { 239 sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 ); /* Q14 */ 240 sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */ 241 C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */ 242 } 243 } 244 245 /* Sort */ 246 length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 ); 247 silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH ); 248 silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ, 249 length_d_srch ); 250 251 /* Escape if correlation is very low already here */ 252 Cmax = (opus_int)C[ 0 ]; /* Q14 */ 253 if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) { 254 silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) ); 255 *LTPCorr_Q15 = 0; 256 *lagIndex = 0; 257 *contourIndex = 0; 258 RESTORE_STACK; 259 return 1; 260 } 261 262 threshold = silk_SMULWB( search_thres1_Q16, Cmax ); 263 for( i = 0; i < length_d_srch; i++ ) { 264 /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */ 265 if( C[ i ] > threshold ) { 266 d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 ); 267 } else { 268 length_d_srch = i; 269 break; 270 } 271 } 272 silk_assert( length_d_srch > 0 ); 273 274 ALLOC( d_comp, D_COMP_STRIDE, opus_int16 ); 275 for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) { 276 d_comp[ i - D_COMP_MIN ] = 0; 277 } 278 for( i = 0; i < length_d_srch; i++ ) { 279 d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1; 280 } 281 282 /* Convolution */ 283 for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) { 284 d_comp[ i - D_COMP_MIN ] += 285 d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ]; 286 } 287 288 length_d_srch = 0; 289 for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) { 290 if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) { 291 d_srch[ length_d_srch ] = i; 292 length_d_srch++; 293 } 294 } 295 296 /* Convolution */ 297 for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) { 298 d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ] 299 + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ]; 300 } 301 302 length_d_comp = 0; 303 for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) { 304 if( d_comp[ i - D_COMP_MIN ] > 0 ) { 305 d_comp[ length_d_comp ] = i - 2; 306 length_d_comp++; 307 } 308 } 309 310 /********************************************************************************** 311 ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation 312 *************************************************************************************/ 313 314 /****************************************************************************** 315 ** Scale signal down to avoid correlations measures from overflowing 316 *******************************************************************************/ 317 /* find scaling as max scaling for each subframe */ 318 silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz ); 319 if( shift > 0 ) { 320 shift = silk_RSHIFT( shift, 1 ); 321 for( i = 0; i < frame_length_8kHz; i++ ) { 322 frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift ); 323 } 324 } 325 326 /********************************************************************************* 327 * Find energy of each subframe projected onto its history, for a range of delays 328 *********************************************************************************/ 329 silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) ); 330 331 target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ]; 332 for( k = 0; k < nb_subfr; k++ ) { 333 334 /* Check that we are within range of the array */ 335 silk_assert( target_ptr >= frame_8kHz ); 336 silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz ); 337 338 energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ), 1 ); 339 for( j = 0; j < length_d_comp; j++ ) { 340 d = d_comp[ j ]; 341 basis_ptr = target_ptr - d; 342 343 /* Check that we are within range of the array */ 344 silk_assert( basis_ptr >= frame_8kHz ); 345 silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz ); 346 347 cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ, arch ); 348 if( cross_corr > 0 ) { 349 energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch ); 350 matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 351 (opus_int16)silk_DIV32_varQ( cross_corr, 352 silk_ADD32( energy_target, 353 energy_basis ), 354 13 + 1 ); /* Q13 */ 355 } else { 356 matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0; 357 } 358 } 359 target_ptr += SF_LENGTH_8KHZ; 360 } 361 362 /* search over lag range and lags codebook */ 363 /* scale factor for lag codebook, as a function of center lag */ 364 365 CCmax = silk_int32_MIN; 366 CCmax_b = silk_int32_MIN; 367 368 CBimax = 0; /* To avoid returning undefined lag values */ 369 lag = -1; /* To check if lag with strong enough correlation has been found */ 370 371 if( prevLag > 0 ) { 372 if( Fs_kHz == 12 ) { 373 prevLag = silk_DIV32_16( silk_LSHIFT( prevLag, 1 ), 3 ); 374 } else if( Fs_kHz == 16 ) { 375 prevLag = silk_RSHIFT( prevLag, 1 ); 376 } 377 prevLag_log2_Q7 = silk_lin2log( (opus_int32)prevLag ); 378 } else { 379 prevLag_log2_Q7 = 0; 380 } 381 silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) ); 382 /* Set up stage 2 codebook based on number of subframes */ 383 if( nb_subfr == PE_MAX_NB_SUBFR ) { 384 cbk_size = PE_NB_CBKS_STAGE2_EXT; 385 Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ]; 386 if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) { 387 /* If input is 8 khz use a larger codebook here because it is last stage */ 388 nb_cbk_search = PE_NB_CBKS_STAGE2_EXT; 389 } else { 390 nb_cbk_search = PE_NB_CBKS_STAGE2; 391 } 392 } else { 393 cbk_size = PE_NB_CBKS_STAGE2_10MS; 394 Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ]; 395 nb_cbk_search = PE_NB_CBKS_STAGE2_10MS; 396 } 397 398 for( k = 0; k < length_d_srch; k++ ) { 399 d = d_srch[ k ]; 400 for( j = 0; j < nb_cbk_search; j++ ) { 401 CC[ j ] = 0; 402 for( i = 0; i < nb_subfr; i++ ) { 403 opus_int d_subfr; 404 /* Try all codebooks */ 405 d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size ); 406 CC[ j ] = CC[ j ] 407 + (opus_int32)matrix_ptr( C, i, 408 d_subfr - ( MIN_LAG_8KHZ - 2 ), 409 CSTRIDE_8KHZ ); 410 } 411 } 412 /* Find best codebook */ 413 CCmax_new = silk_int32_MIN; 414 CBimax_new = 0; 415 for( i = 0; i < nb_cbk_search; i++ ) { 416 if( CC[ i ] > CCmax_new ) { 417 CCmax_new = CC[ i ]; 418 CBimax_new = i; 419 } 420 } 421 422 /* Bias towards shorter lags */ 423 lag_log2_Q7 = silk_lin2log( d ); /* Q7 */ 424 silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) ); 425 silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) ); 426 CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */ 427 428 /* Bias towards previous lag */ 429 silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) ); 430 if( prevLag > 0 ) { 431 delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7; 432 silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) ); 433 delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 ); 434 prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */ 435 prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) ); 436 CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */ 437 } 438 439 if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */ 440 CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 ) && /* Correlation needs to be high enough to be voiced */ 441 silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ /* Lag must be in range */ 442 ) { 443 CCmax_b = CCmax_new_b; 444 CCmax = CCmax_new; 445 lag = d; 446 CBimax = CBimax_new; 447 } 448 } 449 450 if( lag == -1 ) { 451 /* No suitable candidate found */ 452 silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) ); 453 *LTPCorr_Q15 = 0; 454 *lagIndex = 0; 455 *contourIndex = 0; 456 RESTORE_STACK; 457 return 1; 458 } 459 460 /* Output normalized correlation */ 461 *LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 ); 462 silk_assert( *LTPCorr_Q15 >= 0 ); 463 464 if( Fs_kHz > 8 ) { 465 VARDECL( opus_int16, scratch_mem ); 466 /***************************************************************************/ 467 /* Scale input signal down to avoid correlations measures from overflowing */ 468 /***************************************************************************/ 469 /* find scaling as max scaling for each subframe */ 470 silk_sum_sqr_shift( &energy, &shift, frame, frame_length ); 471 ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 ); 472 if( shift > 0 ) { 473 /* Move signal to scratch mem because the input signal should be unchanged */ 474 shift = silk_RSHIFT( shift, 1 ); 475 for( i = 0; i < frame_length; i++ ) { 476 scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift ); 477 } 478 input_frame_ptr = scratch_mem; 479 } else { 480 input_frame_ptr = frame; 481 } 482 483 /* Search in original signal */ 484 485 CBimax_old = CBimax; 486 /* Compensate for decimation */ 487 silk_assert( lag == silk_SAT16( lag ) ); 488 if( Fs_kHz == 12 ) { 489 lag = silk_RSHIFT( silk_SMULBB( lag, 3 ), 1 ); 490 } else if( Fs_kHz == 16 ) { 491 lag = silk_LSHIFT( lag, 1 ); 492 } else { 493 lag = silk_SMULBB( lag, 3 ); 494 } 495 496 lag = silk_LIMIT_int( lag, min_lag, max_lag ); 497 start_lag = silk_max_int( lag - 2, min_lag ); 498 end_lag = silk_min_int( lag + 2, max_lag ); 499 lag_new = lag; /* to avoid undefined lag */ 500 CBimax = 0; /* to avoid undefined lag */ 501 502 CCmax = silk_int32_MIN; 503 /* pitch lags according to second stage */ 504 for( k = 0; k < nb_subfr; k++ ) { 505 pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ]; 506 } 507 508 /* Set up codebook parameters according to complexity setting and frame length */ 509 if( nb_subfr == PE_MAX_NB_SUBFR ) { 510 nb_cbk_search = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ]; 511 cbk_size = PE_NB_CBKS_STAGE3_MAX; 512 Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; 513 } else { 514 nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; 515 cbk_size = PE_NB_CBKS_STAGE3_10MS; 516 Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; 517 } 518 519 /* Calculate the correlations and energies needed in stage 3 */ 520 ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); 521 ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); 522 silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch ); 523 silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch ); 524 525 lag_counter = 0; 526 silk_assert( lag == silk_SAT16( lag ) ); 527 contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag ); 528 529 target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ]; 530 energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length, arch ), 1 ); 531 for( d = start_lag; d <= end_lag; d++ ) { 532 for( j = 0; j < nb_cbk_search; j++ ) { 533 cross_corr = 0; 534 energy = energy_target; 535 for( k = 0; k < nb_subfr; k++ ) { 536 cross_corr = silk_ADD32( cross_corr, 537 matrix_ptr( cross_corr_st3, k, j, 538 nb_cbk_search )[ lag_counter ] ); 539 energy = silk_ADD32( energy, 540 matrix_ptr( energies_st3, k, j, 541 nb_cbk_search )[ lag_counter ] ); 542 silk_assert( energy >= 0 ); 543 } 544 if( cross_corr > 0 ) { 545 CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 ); /* Q13 */ 546 /* Reduce depending on flatness of contour */ 547 diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j ); /* Q15 */ 548 silk_assert( diff == silk_SAT16( diff ) ); 549 CCmax_new = silk_SMULWB( CCmax_new, diff ); /* Q14 */ 550 } else { 551 CCmax_new = 0; 552 } 553 554 if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) { 555 CCmax = CCmax_new; 556 lag_new = d; 557 CBimax = j; 558 } 559 } 560 lag_counter++; 561 } 562 563 for( k = 0; k < nb_subfr; k++ ) { 564 pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); 565 pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz ); 566 } 567 *lagIndex = (opus_int16)( lag_new - min_lag); 568 *contourIndex = (opus_int8)CBimax; 569 } else { /* Fs_kHz == 8 */ 570 /* Save Lags */ 571 for( k = 0; k < nb_subfr; k++ ) { 572 pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); 573 pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 ); 574 } 575 *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ ); 576 *contourIndex = (opus_int8)CBimax; 577 } 578 silk_assert( *lagIndex >= 0 ); 579 /* return as voiced */ 580 RESTORE_STACK; 581 return 0; 582 } 583 584 /*********************************************************************** 585 * Calculates the correlations used in stage 3 search. In order to cover 586 * the whole lag codebook for all the searched offset lags (lag +- 2), 587 * the following correlations are needed in each sub frame: 588 * 589 * sf1: lag range [-8,...,7] total 16 correlations 590 * sf2: lag range [-4,...,4] total 9 correlations 591 * sf3: lag range [-3,....4] total 8 correltions 592 * sf4: lag range [-6,....8] total 15 correlations 593 * 594 * In total 48 correlations. The direct implementation computed in worst 595 * case 4*12*5 = 240 correlations, but more likely around 120. 596 ***********************************************************************/ 597 static void silk_P_Ana_calc_corr_st3( 598 silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */ 599 const opus_int16 frame[], /* I vector to correlate */ 600 opus_int start_lag, /* I lag offset to search around */ 601 opus_int sf_length, /* I length of a 5 ms subframe */ 602 opus_int nb_subfr, /* I number of subframes */ 603 opus_int complexity, /* I Complexity setting */ 604 int arch /* I Run-time architecture */ 605 ) 606 { 607 const opus_int16 *target_ptr; 608 opus_int i, j, k, lag_counter, lag_low, lag_high; 609 opus_int nb_cbk_search, delta, idx, cbk_size; 610 VARDECL( opus_int32, scratch_mem ); 611 VARDECL( opus_int32, xcorr32 ); 612 const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; 613 SAVE_STACK; 614 615 silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); 616 silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); 617 618 if( nb_subfr == PE_MAX_NB_SUBFR ) { 619 Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; 620 Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; 621 nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; 622 cbk_size = PE_NB_CBKS_STAGE3_MAX; 623 } else { 624 silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); 625 Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; 626 Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; 627 nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; 628 cbk_size = PE_NB_CBKS_STAGE3_10MS; 629 } 630 ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 ); 631 ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 ); 632 633 target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */ 634 for( k = 0; k < nb_subfr; k++ ) { 635 lag_counter = 0; 636 637 /* Calculate the correlations for each subframe */ 638 lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 ); 639 lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 ); 640 silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); 641 celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch ); 642 for( j = lag_low; j <= lag_high; j++ ) { 643 silk_assert( lag_counter < SCRATCH_SIZE ); 644 scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ]; 645 lag_counter++; 646 } 647 648 delta = matrix_ptr( Lag_range_ptr, k, 0, 2 ); 649 for( i = 0; i < nb_cbk_search; i++ ) { 650 /* Fill out the 3 dim array that stores the correlations for */ 651 /* each code_book vector for each start lag */ 652 idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta; 653 for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) { 654 silk_assert( idx + j < SCRATCH_SIZE ); 655 silk_assert( idx + j < lag_counter ); 656 matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] = 657 scratch_mem[ idx + j ]; 658 } 659 } 660 target_ptr += sf_length; 661 } 662 RESTORE_STACK; 663 } 664 665 /********************************************************************/ 666 /* Calculate the energies for first two subframes. The energies are */ 667 /* calculated recursively. */ 668 /********************************************************************/ 669 static void silk_P_Ana_calc_energy_st3( 670 silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */ 671 const opus_int16 frame[], /* I vector to calc energy in */ 672 opus_int start_lag, /* I lag offset to search around */ 673 opus_int sf_length, /* I length of one 5 ms subframe */ 674 opus_int nb_subfr, /* I number of subframes */ 675 opus_int complexity, /* I Complexity setting */ 676 int arch /* I Run-time architecture */ 677 ) 678 { 679 const opus_int16 *target_ptr, *basis_ptr; 680 opus_int32 energy; 681 opus_int k, i, j, lag_counter; 682 opus_int nb_cbk_search, delta, idx, cbk_size, lag_diff; 683 VARDECL( opus_int32, scratch_mem ); 684 const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; 685 SAVE_STACK; 686 687 silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); 688 silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); 689 690 if( nb_subfr == PE_MAX_NB_SUBFR ) { 691 Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; 692 Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; 693 nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; 694 cbk_size = PE_NB_CBKS_STAGE3_MAX; 695 } else { 696 silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); 697 Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; 698 Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; 699 nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; 700 cbk_size = PE_NB_CBKS_STAGE3_10MS; 701 } 702 ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 ); 703 704 target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; 705 for( k = 0; k < nb_subfr; k++ ) { 706 lag_counter = 0; 707 708 /* Calculate the energy for first lag */ 709 basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) ); 710 energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length, arch ); 711 silk_assert( energy >= 0 ); 712 scratch_mem[ lag_counter ] = energy; 713 lag_counter++; 714 715 lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) - matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 ); 716 for( i = 1; i < lag_diff; i++ ) { 717 /* remove part outside new window */ 718 energy -= silk_SMULBB( basis_ptr[ sf_length - i ], basis_ptr[ sf_length - i ] ); 719 silk_assert( energy >= 0 ); 720 721 /* add part that comes into window */ 722 energy = silk_ADD_SAT32( energy, silk_SMULBB( basis_ptr[ -i ], basis_ptr[ -i ] ) ); 723 silk_assert( energy >= 0 ); 724 silk_assert( lag_counter < SCRATCH_SIZE ); 725 scratch_mem[ lag_counter ] = energy; 726 lag_counter++; 727 } 728 729 delta = matrix_ptr( Lag_range_ptr, k, 0, 2 ); 730 for( i = 0; i < nb_cbk_search; i++ ) { 731 /* Fill out the 3 dim array that stores the correlations for */ 732 /* each code_book vector for each start lag */ 733 idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta; 734 for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) { 735 silk_assert( idx + j < SCRATCH_SIZE ); 736 silk_assert( idx + j < lag_counter ); 737 matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] = 738 scratch_mem[ idx + j ]; 739 silk_assert( 740 matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 ); 741 } 742 } 743 target_ptr += sf_length; 744 } 745 RESTORE_STACK; 746 } 747