1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "pitch_estimator.h" 12 13 #include <math.h> 14 #include <memory.h> 15 #ifdef WEBRTC_ANDROID 16 #include <stdlib.h> 17 #endif 18 19 static const double kInterpolWin[8] = {-0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160, 20 0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640}; 21 22 /* interpolation filter */ 23 __inline static void IntrepolFilter(double *data_ptr, double *intrp) 24 { 25 *intrp = kInterpolWin[0] * data_ptr[-3]; 26 *intrp += kInterpolWin[1] * data_ptr[-2]; 27 *intrp += kInterpolWin[2] * data_ptr[-1]; 28 *intrp += kInterpolWin[3] * data_ptr[0]; 29 *intrp += kInterpolWin[4] * data_ptr[1]; 30 *intrp += kInterpolWin[5] * data_ptr[2]; 31 *intrp += kInterpolWin[6] * data_ptr[3]; 32 *intrp += kInterpolWin[7] * data_ptr[4]; 33 } 34 35 36 /* 2D parabolic interpolation */ 37 /* probably some 0.5 factors can be eliminated, and the square-roots can be removed from the Cholesky fact. */ 38 __inline static void Intrpol2D(double T[3][3], double *x, double *y, double *peak_val) 39 { 40 double c, b[2], A[2][2]; 41 double t1, t2, d; 42 double delta1, delta2; 43 44 45 // double T[3][3] = {{-1.25, -.25,-.25}, {-.25, .75, .75}, {-.25, .75, .75}}; 46 // should result in: delta1 = 0.5; delta2 = 0.0; peak_val = 1.0 47 48 c = T[1][1]; 49 b[0] = 0.5 * (T[1][2] + T[2][1] - T[0][1] - T[1][0]); 50 b[1] = 0.5 * (T[1][0] + T[2][1] - T[0][1] - T[1][2]); 51 A[0][1] = -0.5 * (T[0][1] + T[2][1] - T[1][0] - T[1][2]); 52 t1 = 0.5 * (T[0][0] + T[2][2]) - c; 53 t2 = 0.5 * (T[2][0] + T[0][2]) - c; 54 d = (T[0][1] + T[1][2] + T[1][0] + T[2][1]) - 4.0 * c - t1 - t2; 55 A[0][0] = -t1 - 0.5 * d; 56 A[1][1] = -t2 - 0.5 * d; 57 58 /* deal with singularities or ill-conditioned cases */ 59 if ( (A[0][0] < 1e-7) || ((A[0][0] * A[1][1] - A[0][1] * A[0][1]) < 1e-7) ) { 60 *peak_val = T[1][1]; 61 return; 62 } 63 64 /* Cholesky decomposition: replace A by upper-triangular factor */ 65 A[0][0] = sqrt(A[0][0]); 66 A[0][1] = A[0][1] / A[0][0]; 67 A[1][1] = sqrt(A[1][1] - A[0][1] * A[0][1]); 68 69 /* compute [x; y] = -0.5 * inv(A) * b */ 70 t1 = b[0] / A[0][0]; 71 t2 = (b[1] - t1 * A[0][1]) / A[1][1]; 72 delta2 = t2 / A[1][1]; 73 delta1 = 0.5 * (t1 - delta2 * A[0][1]) / A[0][0]; 74 delta2 *= 0.5; 75 76 /* limit norm */ 77 t1 = delta1 * delta1 + delta2 * delta2; 78 if (t1 > 1.0) { 79 delta1 /= t1; 80 delta2 /= t1; 81 } 82 83 *peak_val = 0.5 * (b[0] * delta1 + b[1] * delta2) + c; 84 85 *x += delta1; 86 *y += delta2; 87 } 88 89 90 static void PCorr(const double *in, double *outcorr) 91 { 92 double sum, ysum, prod; 93 const double *x, *inptr; 94 int k, n; 95 96 //ysum = 1e-6; /* use this with float (i.s.o. double)! */ 97 ysum = 1e-13; 98 sum = 0.0; 99 x = in + PITCH_MAX_LAG/2 + 2; 100 for (n = 0; n < PITCH_CORR_LEN2; n++) { 101 ysum += in[n] * in[n]; 102 sum += x[n] * in[n]; 103 } 104 105 outcorr += PITCH_LAG_SPAN2 - 1; /* index of last element in array */ 106 *outcorr = sum / sqrt(ysum); 107 108 for (k = 1; k < PITCH_LAG_SPAN2; k++) { 109 ysum -= in[k-1] * in[k-1]; 110 ysum += in[PITCH_CORR_LEN2 + k - 1] * in[PITCH_CORR_LEN2 + k - 1]; 111 sum = 0.0; 112 inptr = &in[k]; 113 prod = x[0] * inptr[0]; 114 for (n = 1; n < PITCH_CORR_LEN2; n++) { 115 sum += prod; 116 prod = x[n] * inptr[n]; 117 } 118 sum += prod; 119 outcorr--; 120 *outcorr = sum / sqrt(ysum); 121 } 122 } 123 124 125 void WebRtcIsac_InitializePitch(const double *in, 126 const double old_lag, 127 const double old_gain, 128 PitchAnalysisStruct *State, 129 double *lags) 130 { 131 double buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2]; 132 double ratio, log_lag, gain_bias; 133 double bias; 134 double corrvec1[PITCH_LAG_SPAN2]; 135 double corrvec2[PITCH_LAG_SPAN2]; 136 int m, k; 137 // Allocating 10 extra entries at the begining of the CorrSurf 138 double corrSurfBuff[10 + (2*PITCH_BW+3)*(PITCH_LAG_SPAN2+4)]; 139 double* CorrSurf[2*PITCH_BW+3]; 140 double *CorrSurfPtr1, *CorrSurfPtr2; 141 double LagWin[3] = {0.2, 0.5, 0.98}; 142 int ind1, ind2, peaks_ind, peak, max_ind; 143 int peaks[PITCH_MAX_NUM_PEAKS]; 144 double adj, gain_tmp; 145 double corr, corr_max; 146 double intrp_a, intrp_b, intrp_c, intrp_d; 147 double peak_vals[PITCH_MAX_NUM_PEAKS]; 148 double lags1[PITCH_MAX_NUM_PEAKS]; 149 double lags2[PITCH_MAX_NUM_PEAKS]; 150 double T[3][3]; 151 int row; 152 153 for(k = 0; k < 2*PITCH_BW+3; k++) 154 { 155 CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2+4)]; 156 } 157 /* reset CorrSurf matrix */ 158 memset(corrSurfBuff, 0, sizeof(double) * (10 + (2*PITCH_BW+3) * (PITCH_LAG_SPAN2+4))); 159 160 //warnings -DH 161 max_ind = 0; 162 peak = 0; 163 164 /* copy old values from state buffer */ 165 memcpy(buf_dec, State->dec_buffer, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2)); 166 167 /* decimation; put result after the old values */ 168 WebRtcIsac_DecimateAllpass(in, State->decimator_state, PITCH_FRAME_LEN, 169 &buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2]); 170 171 /* low-pass filtering */ 172 for (k = PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2; k < PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2; k++) 173 buf_dec[k] += 0.75 * buf_dec[k-1] - 0.25 * buf_dec[k-2]; 174 175 /* copy end part back into state buffer */ 176 memcpy(State->dec_buffer, buf_dec+PITCH_FRAME_LEN/2, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2)); 177 178 /* compute correlation for first and second half of the frame */ 179 PCorr(buf_dec, corrvec1); 180 PCorr(buf_dec + PITCH_CORR_STEP2, corrvec2); 181 182 /* bias towards pitch lag of previous frame */ 183 log_lag = log(0.5 * old_lag); 184 gain_bias = 4.0 * old_gain * old_gain; 185 if (gain_bias > 0.8) gain_bias = 0.8; 186 for (k = 0; k < PITCH_LAG_SPAN2; k++) 187 { 188 ratio = log((double) (k + (PITCH_MIN_LAG/2-2))) - log_lag; 189 bias = 1.0 + gain_bias * exp(-5.0 * ratio * ratio); 190 corrvec1[k] *= bias; 191 } 192 193 /* taper correlation functions */ 194 for (k = 0; k < 3; k++) { 195 gain_tmp = LagWin[k]; 196 corrvec1[k] *= gain_tmp; 197 corrvec2[k] *= gain_tmp; 198 corrvec1[PITCH_LAG_SPAN2-1-k] *= gain_tmp; 199 corrvec2[PITCH_LAG_SPAN2-1-k] *= gain_tmp; 200 } 201 202 corr_max = 0.0; 203 /* fill middle row of correlation surface */ 204 ind1 = 0; 205 ind2 = 0; 206 CorrSurfPtr1 = &CorrSurf[PITCH_BW][2]; 207 for (k = 0; k < PITCH_LAG_SPAN2; k++) { 208 corr = corrvec1[ind1++] + corrvec2[ind2++]; 209 CorrSurfPtr1[k] = corr; 210 if (corr > corr_max) { 211 corr_max = corr; /* update maximum */ 212 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); 213 } 214 } 215 /* fill first and last rows of correlation surface */ 216 ind1 = 0; 217 ind2 = PITCH_BW; 218 CorrSurfPtr1 = &CorrSurf[0][2]; 219 CorrSurfPtr2 = &CorrSurf[2*PITCH_BW][PITCH_BW+2]; 220 for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW; k++) { 221 ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); 222 adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ 223 corr = adj * (corrvec1[ind1] + corrvec2[ind2]); 224 CorrSurfPtr1[k] = corr; 225 if (corr > corr_max) { 226 corr_max = corr; /* update maximum */ 227 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); 228 } 229 corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); 230 CorrSurfPtr2[k] = corr; 231 if (corr > corr_max) { 232 corr_max = corr; /* update maximum */ 233 max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); 234 } 235 } 236 /* fill second and next to last rows of correlation surface */ 237 ind1 = 0; 238 ind2 = PITCH_BW-1; 239 CorrSurfPtr1 = &CorrSurf[1][2]; 240 CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-1][PITCH_BW+1]; 241 for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+1; k++) { 242 ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); 243 adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ 244 corr = adj * (corrvec1[ind1] + corrvec2[ind2]); 245 CorrSurfPtr1[k] = corr; 246 if (corr > corr_max) { 247 corr_max = corr; /* update maximum */ 248 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); 249 } 250 corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); 251 CorrSurfPtr2[k] = corr; 252 if (corr > corr_max) { 253 corr_max = corr; /* update maximum */ 254 max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); 255 } 256 } 257 /* fill remainder of correlation surface */ 258 for (m = 2; m < PITCH_BW; m++) { 259 ind1 = 0; 260 ind2 = PITCH_BW - m; /* always larger than ind1 */ 261 CorrSurfPtr1 = &CorrSurf[m][2]; 262 CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-m][PITCH_BW+2-m]; 263 for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+m; k++) { 264 ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); 265 adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ 266 corr = adj * (corrvec1[ind1] + corrvec2[ind2]); 267 CorrSurfPtr1[k] = corr; 268 if (corr > corr_max) { 269 corr_max = corr; /* update maximum */ 270 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); 271 } 272 corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); 273 CorrSurfPtr2[k] = corr; 274 if (corr > corr_max) { 275 corr_max = corr; /* update maximum */ 276 max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); 277 } 278 } 279 } 280 281 /* threshold value to qualify as a peak */ 282 corr_max *= 0.6; 283 284 peaks_ind = 0; 285 /* find peaks */ 286 for (m = 1; m < PITCH_BW+1; m++) { 287 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; 288 CorrSurfPtr1 = &CorrSurf[m][2]; 289 for (k = 2; k < PITCH_LAG_SPAN2-PITCH_BW-2+m; k++) { 290 corr = CorrSurfPtr1[k]; 291 if (corr > corr_max) { 292 if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) { 293 if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) { 294 /* found a peak; store index into matrix */ 295 peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); 296 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; 297 } 298 } 299 } 300 } 301 } 302 for (m = PITCH_BW+1; m < 2*PITCH_BW; m++) { 303 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; 304 CorrSurfPtr1 = &CorrSurf[m][2]; 305 for (k = 2+m-PITCH_BW; k < PITCH_LAG_SPAN2-2; k++) { 306 corr = CorrSurfPtr1[k]; 307 if (corr > corr_max) { 308 if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) { 309 if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) { 310 /* found a peak; store index into matrix */ 311 peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); 312 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; 313 } 314 } 315 } 316 } 317 } 318 319 if (peaks_ind > 0) { 320 /* examine each peak */ 321 CorrSurfPtr1 = &CorrSurf[0][0]; 322 for (k = 0; k < peaks_ind; k++) { 323 peak = peaks[k]; 324 325 /* compute four interpolated values around current peak */ 326 IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)], &intrp_a); 327 IntrepolFilter(&CorrSurfPtr1[peak - 1 ], &intrp_b); 328 IntrepolFilter(&CorrSurfPtr1[peak ], &intrp_c); 329 IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)], &intrp_d); 330 331 /* determine maximum of the interpolated values */ 332 corr = CorrSurfPtr1[peak]; 333 corr_max = intrp_a; 334 if (intrp_b > corr_max) corr_max = intrp_b; 335 if (intrp_c > corr_max) corr_max = intrp_c; 336 if (intrp_d > corr_max) corr_max = intrp_d; 337 338 /* determine where the peak sits and fill a 3x3 matrix around it */ 339 row = peak / (PITCH_LAG_SPAN2+4); 340 lags1[k] = (double) ((peak - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4); 341 lags2[k] = (double) (lags1[k] + PITCH_BW - row); 342 if ( corr > corr_max ) { 343 T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; 344 T[2][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; 345 T[1][1] = corr; 346 T[0][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; 347 T[2][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; 348 T[1][0] = intrp_a; 349 T[0][1] = intrp_b; 350 T[2][1] = intrp_c; 351 T[1][2] = intrp_d; 352 } else { 353 if (intrp_a == corr_max) { 354 lags1[k] -= 0.5; 355 lags2[k] += 0.5; 356 IntrepolFilter(&CorrSurfPtr1[peak - 2*(PITCH_LAG_SPAN2+5)], &T[0][0]); 357 IntrepolFilter(&CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)], &T[2][0]); 358 T[1][1] = intrp_a; 359 T[0][2] = intrp_b; 360 T[2][2] = intrp_c; 361 T[1][0] = CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)]; 362 T[0][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; 363 T[2][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; 364 T[1][2] = corr; 365 } else if (intrp_b == corr_max) { 366 lags1[k] -= 0.5; 367 lags2[k] -= 0.5; 368 IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+6)], &T[0][0]); 369 T[2][0] = intrp_a; 370 T[1][1] = intrp_b; 371 IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+3)], &T[0][2]); 372 T[2][2] = intrp_d; 373 T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; 374 T[0][1] = CorrSurfPtr1[peak - 1]; 375 T[2][1] = corr; 376 T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; 377 } else if (intrp_c == corr_max) { 378 lags1[k] += 0.5; 379 lags2[k] += 0.5; 380 T[0][0] = intrp_a; 381 IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)], &T[2][0]); 382 T[1][1] = intrp_c; 383 T[0][2] = intrp_d; 384 IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)], &T[2][2]); 385 T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; 386 T[0][1] = corr; 387 T[2][1] = CorrSurfPtr1[peak + 1]; 388 T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; 389 } else { 390 lags1[k] += 0.5; 391 lags2[k] -= 0.5; 392 T[0][0] = intrp_b; 393 T[2][0] = intrp_c; 394 T[1][1] = intrp_d; 395 IntrepolFilter(&CorrSurfPtr1[peak + 2*(PITCH_LAG_SPAN2+4)], &T[0][2]); 396 IntrepolFilter(&CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)], &T[2][2]); 397 T[1][0] = corr; 398 T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; 399 T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; 400 T[1][2] = CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)]; 401 } 402 } 403 404 /* 2D parabolic interpolation gives more accurate lags and peak value */ 405 Intrpol2D(T, &lags1[k], &lags2[k], &peak_vals[k]); 406 } 407 408 /* determine the highest peak, after applying a bias towards short lags */ 409 corr_max = 0.0; 410 for (k = 0; k < peaks_ind; k++) { 411 corr = peak_vals[k] * pow(PITCH_PEAK_DECAY, log(lags1[k] + lags2[k])); 412 if (corr > corr_max) { 413 corr_max = corr; 414 peak = k; 415 } 416 } 417 418 lags1[peak] *= 2.0; 419 lags2[peak] *= 2.0; 420 421 if (lags1[peak] < (double) PITCH_MIN_LAG) lags1[peak] = (double) PITCH_MIN_LAG; 422 if (lags2[peak] < (double) PITCH_MIN_LAG) lags2[peak] = (double) PITCH_MIN_LAG; 423 if (lags1[peak] > (double) PITCH_MAX_LAG) lags1[peak] = (double) PITCH_MAX_LAG; 424 if (lags2[peak] > (double) PITCH_MAX_LAG) lags2[peak] = (double) PITCH_MAX_LAG; 425 426 /* store lags of highest peak in output array */ 427 lags[0] = lags1[peak]; 428 lags[1] = lags1[peak]; 429 lags[2] = lags2[peak]; 430 lags[3] = lags2[peak]; 431 } 432 else 433 { 434 row = max_ind / (PITCH_LAG_SPAN2+4); 435 lags1[0] = (double) ((max_ind - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4); 436 lags2[0] = (double) (lags1[0] + PITCH_BW - row); 437 438 if (lags1[0] < (double) PITCH_MIN_LAG) lags1[0] = (double) PITCH_MIN_LAG; 439 if (lags2[0] < (double) PITCH_MIN_LAG) lags2[0] = (double) PITCH_MIN_LAG; 440 if (lags1[0] > (double) PITCH_MAX_LAG) lags1[0] = (double) PITCH_MAX_LAG; 441 if (lags2[0] > (double) PITCH_MAX_LAG) lags2[0] = (double) PITCH_MAX_LAG; 442 443 /* store lags of highest peak in output array */ 444 lags[0] = lags1[0]; 445 lags[1] = lags1[0]; 446 lags[2] = lags2[0]; 447 lags[3] = lags2[0]; 448 } 449 } 450 451 452 453 /* create weighting matrix by orthogonalizing a basis of polynomials of increasing order 454 * t = (0:4)'; 455 * A = [t.^0, t.^1, t.^2, t.^3, t.^4]; 456 * [Q, dummy] = qr(A); 457 * P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */ 458 static const double kWeight[5][5] = { 459 { 0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857, 0.01714285714286}, 460 {-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429, 0.05142857142857}, 461 {-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857, -0.05714285714286}, 462 { 0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571, -0.30857142857143}, 463 { 0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143, 0.29714285714286} 464 }; 465 466 467 void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN samples */ 468 double *out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */ 469 PitchAnalysisStruct *State, 470 double *lags, 471 double *gains) 472 { 473 double HPin[PITCH_FRAME_LEN]; 474 double Weighted[PITCH_FRAME_LEN]; 475 double Whitened[PITCH_FRAME_LEN + QLOOKAHEAD]; 476 double inbuf[PITCH_FRAME_LEN + QLOOKAHEAD]; 477 double out_G[PITCH_FRAME_LEN + QLOOKAHEAD]; // could be removed by using out instead 478 double out_dG[4][PITCH_FRAME_LEN + QLOOKAHEAD]; 479 double old_lag, old_gain; 480 double nrg_wht, tmp; 481 double Wnrg, Wfluct, Wgain; 482 double H[4][4]; 483 double grad[4]; 484 double dG[4]; 485 int k, m, n, iter; 486 487 /* high pass filtering using second order pole-zero filter */ 488 WebRtcIsac_Highpass(in, HPin, State->hp_state, PITCH_FRAME_LEN); 489 490 /* copy from state into buffer */ 491 memcpy(Whitened, State->whitened_buf, sizeof(double) * QLOOKAHEAD); 492 493 /* compute weighted and whitened signals */ 494 WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD], &(State->Wghtstr)); 495 496 /* copy from buffer into state */ 497 memcpy(State->whitened_buf, Whitened+PITCH_FRAME_LEN, sizeof(double) * QLOOKAHEAD); 498 499 old_lag = State->PFstr_wght.oldlagp[0]; 500 old_gain = State->PFstr_wght.oldgainp[0]; 501 502 /* inital pitch estimate */ 503 WebRtcIsac_InitializePitch(Weighted, old_lag, old_gain, State, lags); 504 505 506 /* Iterative optimization of lags - to be done */ 507 508 /* compute energy of whitened signal */ 509 nrg_wht = 0.0; 510 for (k = 0; k < PITCH_FRAME_LEN + QLOOKAHEAD; k++) 511 nrg_wht += Whitened[k] * Whitened[k]; 512 513 514 /* Iterative optimization of gains */ 515 516 /* set weights for energy, gain fluctiation, and spectral gain penalty functions */ 517 Wnrg = 1.0 / nrg_wht; 518 Wgain = 0.005; 519 Wfluct = 3.0; 520 521 /* set initial gains */ 522 for (k = 0; k < 4; k++) 523 gains[k] = PITCH_MAX_GAIN_06; 524 525 /* two iterations should be enough */ 526 for (iter = 0; iter < 2; iter++) { 527 /* compute Jacobian of pre-filter output towards gains */ 528 WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG, &(State->PFstr_wght), lags, gains); 529 530 /* gradient and approximate Hessian (lower triangle) for minimizing the filter's output power */ 531 for (k = 0; k < 4; k++) { 532 tmp = 0.0; 533 for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++) 534 tmp += out_G[n] * out_dG[k][n]; 535 grad[k] = tmp * Wnrg; 536 } 537 for (k = 0; k < 4; k++) { 538 for (m = 0; m <= k; m++) { 539 tmp = 0.0; 540 for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++) 541 tmp += out_dG[m][n] * out_dG[k][n]; 542 H[k][m] = tmp * Wnrg; 543 } 544 } 545 546 /* add gradient and Hessian (lower triangle) for dampening fast gain changes */ 547 for (k = 0; k < 4; k++) { 548 tmp = kWeight[k+1][0] * old_gain; 549 for (m = 0; m < 4; m++) 550 tmp += kWeight[k+1][m+1] * gains[m]; 551 grad[k] += tmp * Wfluct; 552 } 553 for (k = 0; k < 4; k++) { 554 for (m = 0; m <= k; m++) { 555 H[k][m] += kWeight[k+1][m+1] * Wfluct; 556 } 557 } 558 559 /* add gradient and Hessian for dampening gain */ 560 for (k = 0; k < 3; k++) { 561 tmp = 1.0 / (1 - gains[k]); 562 grad[k] += tmp * tmp * Wgain; 563 H[k][k] += 2.0 * tmp * (tmp * tmp * Wgain); 564 } 565 tmp = 1.0 / (1 - gains[3]); 566 grad[3] += 1.33 * (tmp * tmp * Wgain); 567 H[3][3] += 2.66 * tmp * (tmp * tmp * Wgain); 568 569 570 /* compute Cholesky factorization of Hessian 571 * by overwritting the upper triangle; scale factors on diagonal 572 * (for non pc-platforms store the inverse of the diagonals seperately to minimize divisions) */ 573 H[0][1] = H[1][0] / H[0][0]; 574 H[0][2] = H[2][0] / H[0][0]; 575 H[0][3] = H[3][0] / H[0][0]; 576 H[1][1] -= H[0][0] * H[0][1] * H[0][1]; 577 H[1][2] = (H[2][1] - H[0][1] * H[2][0]) / H[1][1]; 578 H[1][3] = (H[3][1] - H[0][1] * H[3][0]) / H[1][1]; 579 H[2][2] -= H[0][0] * H[0][2] * H[0][2] + H[1][1] * H[1][2] * H[1][2]; 580 H[2][3] = (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2]; 581 H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] + H[2][2] * H[2][3] * H[2][3]; 582 583 /* Compute update as delta_gains = -inv(H) * grad */ 584 /* copy and negate */ 585 for (k = 0; k < 4; k++) 586 dG[k] = -grad[k]; 587 /* back substitution */ 588 dG[1] -= dG[0] * H[0][1]; 589 dG[2] -= dG[0] * H[0][2] + dG[1] * H[1][2]; 590 dG[3] -= dG[0] * H[0][3] + dG[1] * H[1][3] + dG[2] * H[2][3]; 591 /* scale */ 592 for (k = 0; k < 4; k++) 593 dG[k] /= H[k][k]; 594 /* back substitution */ 595 dG[2] -= dG[3] * H[2][3]; 596 dG[1] -= dG[3] * H[1][3] + dG[2] * H[1][2]; 597 dG[0] -= dG[3] * H[0][3] + dG[2] * H[0][2] + dG[1] * H[0][1]; 598 599 /* update gains and check range */ 600 for (k = 0; k < 4; k++) { 601 gains[k] += dG[k]; 602 if (gains[k] > PITCH_MAX_GAIN) 603 gains[k] = PITCH_MAX_GAIN; 604 else if (gains[k] < 0.0) 605 gains[k] = 0.0; 606 } 607 } 608 609 /* update state for next frame */ 610 WebRtcIsac_PitchfilterPre(Whitened, out, &(State->PFstr_wght), lags, gains); 611 612 /* concatenate previous input's end and current input */ 613 memcpy(inbuf, State->inbuf, sizeof(double) * QLOOKAHEAD); 614 memcpy(inbuf+QLOOKAHEAD, in, sizeof(double) * PITCH_FRAME_LEN); 615 616 /* lookahead pitch filtering for masking analysis */ 617 WebRtcIsac_PitchfilterPre_la(inbuf, out, &(State->PFstr), lags, gains); 618 619 /* store last part of input */ 620 for (k = 0; k < QLOOKAHEAD; k++) 621 State->inbuf[k] = inbuf[k + PITCH_FRAME_LEN]; 622 } 623