1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "pitch_estimator.h" 12 13 #include <math.h> 14 #include <memory.h> 15 #include <string.h> 16 #ifdef WEBRTC_ANDROID 17 #include <stdlib.h> 18 #endif 19 20 static const double kInterpolWin[8] = {-0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160, 21 0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640}; 22 23 /* interpolation filter */ 24 __inline static void IntrepolFilter(double *data_ptr, double *intrp) 25 { 26 *intrp = kInterpolWin[0] * data_ptr[-3]; 27 *intrp += kInterpolWin[1] * data_ptr[-2]; 28 *intrp += kInterpolWin[2] * data_ptr[-1]; 29 *intrp += kInterpolWin[3] * data_ptr[0]; 30 *intrp += kInterpolWin[4] * data_ptr[1]; 31 *intrp += kInterpolWin[5] * data_ptr[2]; 32 *intrp += kInterpolWin[6] * data_ptr[3]; 33 *intrp += kInterpolWin[7] * data_ptr[4]; 34 } 35 36 37 /* 2D parabolic interpolation */ 38 /* probably some 0.5 factors can be eliminated, and the square-roots can be removed from the Cholesky fact. */ 39 __inline static void Intrpol2D(double T[3][3], double *x, double *y, double *peak_val) 40 { 41 double c, b[2], A[2][2]; 42 double t1, t2, d; 43 double delta1, delta2; 44 45 46 // double T[3][3] = {{-1.25, -.25,-.25}, {-.25, .75, .75}, {-.25, .75, .75}}; 47 // should result in: delta1 = 0.5; delta2 = 0.0; peak_val = 1.0 48 49 c = T[1][1]; 50 b[0] = 0.5 * (T[1][2] + T[2][1] - T[0][1] - T[1][0]); 51 b[1] = 0.5 * (T[1][0] + T[2][1] - T[0][1] - T[1][2]); 52 A[0][1] = -0.5 * (T[0][1] + T[2][1] - T[1][0] - T[1][2]); 53 t1 = 0.5 * (T[0][0] + T[2][2]) - c; 54 t2 = 0.5 * (T[2][0] + T[0][2]) - c; 55 d = (T[0][1] + T[1][2] + T[1][0] + T[2][1]) - 4.0 * c - t1 - t2; 56 A[0][0] = -t1 - 0.5 * d; 57 A[1][1] = -t2 - 0.5 * d; 58 59 /* deal with singularities or ill-conditioned cases */ 60 if ( (A[0][0] < 1e-7) || ((A[0][0] * A[1][1] - A[0][1] * A[0][1]) < 1e-7) ) { 61 *peak_val = T[1][1]; 62 return; 63 } 64 65 /* Cholesky decomposition: replace A by upper-triangular factor */ 66 A[0][0] = sqrt(A[0][0]); 67 A[0][1] = A[0][1] / A[0][0]; 68 A[1][1] = sqrt(A[1][1] - A[0][1] * A[0][1]); 69 70 /* compute [x; y] = -0.5 * inv(A) * b */ 71 t1 = b[0] / A[0][0]; 72 t2 = (b[1] - t1 * A[0][1]) / A[1][1]; 73 delta2 = t2 / A[1][1]; 74 delta1 = 0.5 * (t1 - delta2 * A[0][1]) / A[0][0]; 75 delta2 *= 0.5; 76 77 /* limit norm */ 78 t1 = delta1 * delta1 + delta2 * delta2; 79 if (t1 > 1.0) { 80 delta1 /= t1; 81 delta2 /= t1; 82 } 83 84 *peak_val = 0.5 * (b[0] * delta1 + b[1] * delta2) + c; 85 86 *x += delta1; 87 *y += delta2; 88 } 89 90 91 static void PCorr(const double *in, double *outcorr) 92 { 93 double sum, ysum, prod; 94 const double *x, *inptr; 95 int k, n; 96 97 //ysum = 1e-6; /* use this with float (i.s.o. double)! */ 98 ysum = 1e-13; 99 sum = 0.0; 100 x = in + PITCH_MAX_LAG/2 + 2; 101 for (n = 0; n < PITCH_CORR_LEN2; n++) { 102 ysum += in[n] * in[n]; 103 sum += x[n] * in[n]; 104 } 105 106 outcorr += PITCH_LAG_SPAN2 - 1; /* index of last element in array */ 107 *outcorr = sum / sqrt(ysum); 108 109 for (k = 1; k < PITCH_LAG_SPAN2; k++) { 110 ysum -= in[k-1] * in[k-1]; 111 ysum += in[PITCH_CORR_LEN2 + k - 1] * in[PITCH_CORR_LEN2 + k - 1]; 112 sum = 0.0; 113 inptr = &in[k]; 114 prod = x[0] * inptr[0]; 115 for (n = 1; n < PITCH_CORR_LEN2; n++) { 116 sum += prod; 117 prod = x[n] * inptr[n]; 118 } 119 sum += prod; 120 outcorr--; 121 *outcorr = sum / sqrt(ysum); 122 } 123 } 124 125 126 void WebRtcIsac_InitializePitch(const double *in, 127 const double old_lag, 128 const double old_gain, 129 PitchAnalysisStruct *State, 130 double *lags) 131 { 132 double buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2]; 133 double ratio, log_lag, gain_bias; 134 double bias; 135 double corrvec1[PITCH_LAG_SPAN2]; 136 double corrvec2[PITCH_LAG_SPAN2]; 137 int m, k; 138 // Allocating 10 extra entries at the begining of the CorrSurf 139 double corrSurfBuff[10 + (2*PITCH_BW+3)*(PITCH_LAG_SPAN2+4)]; 140 double* CorrSurf[2*PITCH_BW+3]; 141 double *CorrSurfPtr1, *CorrSurfPtr2; 142 double LagWin[3] = {0.2, 0.5, 0.98}; 143 int ind1, ind2, peaks_ind, peak, max_ind; 144 int peaks[PITCH_MAX_NUM_PEAKS]; 145 double adj, gain_tmp; 146 double corr, corr_max; 147 double intrp_a, intrp_b, intrp_c, intrp_d; 148 double peak_vals[PITCH_MAX_NUM_PEAKS]; 149 double lags1[PITCH_MAX_NUM_PEAKS]; 150 double lags2[PITCH_MAX_NUM_PEAKS]; 151 double T[3][3]; 152 int row; 153 154 for(k = 0; k < 2*PITCH_BW+3; k++) 155 { 156 CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2+4)]; 157 } 158 /* reset CorrSurf matrix */ 159 memset(corrSurfBuff, 0, sizeof(double) * (10 + (2*PITCH_BW+3) * (PITCH_LAG_SPAN2+4))); 160 161 //warnings -DH 162 max_ind = 0; 163 peak = 0; 164 165 /* copy old values from state buffer */ 166 memcpy(buf_dec, State->dec_buffer, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2)); 167 168 /* decimation; put result after the old values */ 169 WebRtcIsac_DecimateAllpass(in, State->decimator_state, PITCH_FRAME_LEN, 170 &buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2]); 171 172 /* low-pass filtering */ 173 for (k = PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2; k < PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2; k++) 174 buf_dec[k] += 0.75 * buf_dec[k-1] - 0.25 * buf_dec[k-2]; 175 176 /* copy end part back into state buffer */ 177 memcpy(State->dec_buffer, buf_dec+PITCH_FRAME_LEN/2, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2)); 178 179 /* compute correlation for first and second half of the frame */ 180 PCorr(buf_dec, corrvec1); 181 PCorr(buf_dec + PITCH_CORR_STEP2, corrvec2); 182 183 /* bias towards pitch lag of previous frame */ 184 log_lag = log(0.5 * old_lag); 185 gain_bias = 4.0 * old_gain * old_gain; 186 if (gain_bias > 0.8) gain_bias = 0.8; 187 for (k = 0; k < PITCH_LAG_SPAN2; k++) 188 { 189 ratio = log((double) (k + (PITCH_MIN_LAG/2-2))) - log_lag; 190 bias = 1.0 + gain_bias * exp(-5.0 * ratio * ratio); 191 corrvec1[k] *= bias; 192 } 193 194 /* taper correlation functions */ 195 for (k = 0; k < 3; k++) { 196 gain_tmp = LagWin[k]; 197 corrvec1[k] *= gain_tmp; 198 corrvec2[k] *= gain_tmp; 199 corrvec1[PITCH_LAG_SPAN2-1-k] *= gain_tmp; 200 corrvec2[PITCH_LAG_SPAN2-1-k] *= gain_tmp; 201 } 202 203 corr_max = 0.0; 204 /* fill middle row of correlation surface */ 205 ind1 = 0; 206 ind2 = 0; 207 CorrSurfPtr1 = &CorrSurf[PITCH_BW][2]; 208 for (k = 0; k < PITCH_LAG_SPAN2; k++) { 209 corr = corrvec1[ind1++] + corrvec2[ind2++]; 210 CorrSurfPtr1[k] = corr; 211 if (corr > corr_max) { 212 corr_max = corr; /* update maximum */ 213 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); 214 } 215 } 216 /* fill first and last rows of correlation surface */ 217 ind1 = 0; 218 ind2 = PITCH_BW; 219 CorrSurfPtr1 = &CorrSurf[0][2]; 220 CorrSurfPtr2 = &CorrSurf[2*PITCH_BW][PITCH_BW+2]; 221 for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW; k++) { 222 ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); 223 adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ 224 corr = adj * (corrvec1[ind1] + corrvec2[ind2]); 225 CorrSurfPtr1[k] = corr; 226 if (corr > corr_max) { 227 corr_max = corr; /* update maximum */ 228 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); 229 } 230 corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); 231 CorrSurfPtr2[k] = corr; 232 if (corr > corr_max) { 233 corr_max = corr; /* update maximum */ 234 max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); 235 } 236 } 237 /* fill second and next to last rows of correlation surface */ 238 ind1 = 0; 239 ind2 = PITCH_BW-1; 240 CorrSurfPtr1 = &CorrSurf[1][2]; 241 CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-1][PITCH_BW+1]; 242 for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+1; k++) { 243 ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); 244 adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ 245 corr = adj * (corrvec1[ind1] + corrvec2[ind2]); 246 CorrSurfPtr1[k] = corr; 247 if (corr > corr_max) { 248 corr_max = corr; /* update maximum */ 249 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); 250 } 251 corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); 252 CorrSurfPtr2[k] = corr; 253 if (corr > corr_max) { 254 corr_max = corr; /* update maximum */ 255 max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); 256 } 257 } 258 /* fill remainder of correlation surface */ 259 for (m = 2; m < PITCH_BW; m++) { 260 ind1 = 0; 261 ind2 = PITCH_BW - m; /* always larger than ind1 */ 262 CorrSurfPtr1 = &CorrSurf[m][2]; 263 CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-m][PITCH_BW+2-m]; 264 for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+m; k++) { 265 ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); 266 adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ 267 corr = adj * (corrvec1[ind1] + corrvec2[ind2]); 268 CorrSurfPtr1[k] = corr; 269 if (corr > corr_max) { 270 corr_max = corr; /* update maximum */ 271 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); 272 } 273 corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); 274 CorrSurfPtr2[k] = corr; 275 if (corr > corr_max) { 276 corr_max = corr; /* update maximum */ 277 max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); 278 } 279 } 280 } 281 282 /* threshold value to qualify as a peak */ 283 corr_max *= 0.6; 284 285 peaks_ind = 0; 286 /* find peaks */ 287 for (m = 1; m < PITCH_BW+1; m++) { 288 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; 289 CorrSurfPtr1 = &CorrSurf[m][2]; 290 for (k = 2; k < PITCH_LAG_SPAN2-PITCH_BW-2+m; k++) { 291 corr = CorrSurfPtr1[k]; 292 if (corr > corr_max) { 293 if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) { 294 if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) { 295 /* found a peak; store index into matrix */ 296 peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); 297 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; 298 } 299 } 300 } 301 } 302 } 303 for (m = PITCH_BW+1; m < 2*PITCH_BW; m++) { 304 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; 305 CorrSurfPtr1 = &CorrSurf[m][2]; 306 for (k = 2+m-PITCH_BW; k < PITCH_LAG_SPAN2-2; k++) { 307 corr = CorrSurfPtr1[k]; 308 if (corr > corr_max) { 309 if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) { 310 if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) { 311 /* found a peak; store index into matrix */ 312 peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); 313 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; 314 } 315 } 316 } 317 } 318 } 319 320 if (peaks_ind > 0) { 321 /* examine each peak */ 322 CorrSurfPtr1 = &CorrSurf[0][0]; 323 for (k = 0; k < peaks_ind; k++) { 324 peak = peaks[k]; 325 326 /* compute four interpolated values around current peak */ 327 IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)], &intrp_a); 328 IntrepolFilter(&CorrSurfPtr1[peak - 1 ], &intrp_b); 329 IntrepolFilter(&CorrSurfPtr1[peak ], &intrp_c); 330 IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)], &intrp_d); 331 332 /* determine maximum of the interpolated values */ 333 corr = CorrSurfPtr1[peak]; 334 corr_max = intrp_a; 335 if (intrp_b > corr_max) corr_max = intrp_b; 336 if (intrp_c > corr_max) corr_max = intrp_c; 337 if (intrp_d > corr_max) corr_max = intrp_d; 338 339 /* determine where the peak sits and fill a 3x3 matrix around it */ 340 row = peak / (PITCH_LAG_SPAN2+4); 341 lags1[k] = (double) ((peak - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4); 342 lags2[k] = (double) (lags1[k] + PITCH_BW - row); 343 if ( corr > corr_max ) { 344 T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; 345 T[2][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; 346 T[1][1] = corr; 347 T[0][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; 348 T[2][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; 349 T[1][0] = intrp_a; 350 T[0][1] = intrp_b; 351 T[2][1] = intrp_c; 352 T[1][2] = intrp_d; 353 } else { 354 if (intrp_a == corr_max) { 355 lags1[k] -= 0.5; 356 lags2[k] += 0.5; 357 IntrepolFilter(&CorrSurfPtr1[peak - 2*(PITCH_LAG_SPAN2+5)], &T[0][0]); 358 IntrepolFilter(&CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)], &T[2][0]); 359 T[1][1] = intrp_a; 360 T[0][2] = intrp_b; 361 T[2][2] = intrp_c; 362 T[1][0] = CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)]; 363 T[0][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; 364 T[2][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; 365 T[1][2] = corr; 366 } else if (intrp_b == corr_max) { 367 lags1[k] -= 0.5; 368 lags2[k] -= 0.5; 369 IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+6)], &T[0][0]); 370 T[2][0] = intrp_a; 371 T[1][1] = intrp_b; 372 IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+3)], &T[0][2]); 373 T[2][2] = intrp_d; 374 T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; 375 T[0][1] = CorrSurfPtr1[peak - 1]; 376 T[2][1] = corr; 377 T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; 378 } else if (intrp_c == corr_max) { 379 lags1[k] += 0.5; 380 lags2[k] += 0.5; 381 T[0][0] = intrp_a; 382 IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)], &T[2][0]); 383 T[1][1] = intrp_c; 384 T[0][2] = intrp_d; 385 IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)], &T[2][2]); 386 T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; 387 T[0][1] = corr; 388 T[2][1] = CorrSurfPtr1[peak + 1]; 389 T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; 390 } else { 391 lags1[k] += 0.5; 392 lags2[k] -= 0.5; 393 T[0][0] = intrp_b; 394 T[2][0] = intrp_c; 395 T[1][1] = intrp_d; 396 IntrepolFilter(&CorrSurfPtr1[peak + 2*(PITCH_LAG_SPAN2+4)], &T[0][2]); 397 IntrepolFilter(&CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)], &T[2][2]); 398 T[1][0] = corr; 399 T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; 400 T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; 401 T[1][2] = CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)]; 402 } 403 } 404 405 /* 2D parabolic interpolation gives more accurate lags and peak value */ 406 Intrpol2D(T, &lags1[k], &lags2[k], &peak_vals[k]); 407 } 408 409 /* determine the highest peak, after applying a bias towards short lags */ 410 corr_max = 0.0; 411 for (k = 0; k < peaks_ind; k++) { 412 corr = peak_vals[k] * pow(PITCH_PEAK_DECAY, log(lags1[k] + lags2[k])); 413 if (corr > corr_max) { 414 corr_max = corr; 415 peak = k; 416 } 417 } 418 419 lags1[peak] *= 2.0; 420 lags2[peak] *= 2.0; 421 422 if (lags1[peak] < (double) PITCH_MIN_LAG) lags1[peak] = (double) PITCH_MIN_LAG; 423 if (lags2[peak] < (double) PITCH_MIN_LAG) lags2[peak] = (double) PITCH_MIN_LAG; 424 if (lags1[peak] > (double) PITCH_MAX_LAG) lags1[peak] = (double) PITCH_MAX_LAG; 425 if (lags2[peak] > (double) PITCH_MAX_LAG) lags2[peak] = (double) PITCH_MAX_LAG; 426 427 /* store lags of highest peak in output array */ 428 lags[0] = lags1[peak]; 429 lags[1] = lags1[peak]; 430 lags[2] = lags2[peak]; 431 lags[3] = lags2[peak]; 432 } 433 else 434 { 435 row = max_ind / (PITCH_LAG_SPAN2+4); 436 lags1[0] = (double) ((max_ind - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4); 437 lags2[0] = (double) (lags1[0] + PITCH_BW - row); 438 439 if (lags1[0] < (double) PITCH_MIN_LAG) lags1[0] = (double) PITCH_MIN_LAG; 440 if (lags2[0] < (double) PITCH_MIN_LAG) lags2[0] = (double) PITCH_MIN_LAG; 441 if (lags1[0] > (double) PITCH_MAX_LAG) lags1[0] = (double) PITCH_MAX_LAG; 442 if (lags2[0] > (double) PITCH_MAX_LAG) lags2[0] = (double) PITCH_MAX_LAG; 443 444 /* store lags of highest peak in output array */ 445 lags[0] = lags1[0]; 446 lags[1] = lags1[0]; 447 lags[2] = lags2[0]; 448 lags[3] = lags2[0]; 449 } 450 } 451 452 453 454 /* create weighting matrix by orthogonalizing a basis of polynomials of increasing order 455 * t = (0:4)'; 456 * A = [t.^0, t.^1, t.^2, t.^3, t.^4]; 457 * [Q, dummy] = qr(A); 458 * P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */ 459 static const double kWeight[5][5] = { 460 { 0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857, 0.01714285714286}, 461 {-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429, 0.05142857142857}, 462 {-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857, -0.05714285714286}, 463 { 0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571, -0.30857142857143}, 464 { 0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143, 0.29714285714286} 465 }; 466 467 468 void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN samples */ 469 double *out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */ 470 PitchAnalysisStruct *State, 471 double *lags, 472 double *gains) 473 { 474 double HPin[PITCH_FRAME_LEN]; 475 double Weighted[PITCH_FRAME_LEN]; 476 double Whitened[PITCH_FRAME_LEN + QLOOKAHEAD]; 477 double inbuf[PITCH_FRAME_LEN + QLOOKAHEAD]; 478 double out_G[PITCH_FRAME_LEN + QLOOKAHEAD]; // could be removed by using out instead 479 double out_dG[4][PITCH_FRAME_LEN + QLOOKAHEAD]; 480 double old_lag, old_gain; 481 double nrg_wht, tmp; 482 double Wnrg, Wfluct, Wgain; 483 double H[4][4]; 484 double grad[4]; 485 double dG[4]; 486 int k, m, n, iter; 487 488 /* high pass filtering using second order pole-zero filter */ 489 WebRtcIsac_Highpass(in, HPin, State->hp_state, PITCH_FRAME_LEN); 490 491 /* copy from state into buffer */ 492 memcpy(Whitened, State->whitened_buf, sizeof(double) * QLOOKAHEAD); 493 494 /* compute weighted and whitened signals */ 495 WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD], &(State->Wghtstr)); 496 497 /* copy from buffer into state */ 498 memcpy(State->whitened_buf, Whitened+PITCH_FRAME_LEN, sizeof(double) * QLOOKAHEAD); 499 500 old_lag = State->PFstr_wght.oldlagp[0]; 501 old_gain = State->PFstr_wght.oldgainp[0]; 502 503 /* inital pitch estimate */ 504 WebRtcIsac_InitializePitch(Weighted, old_lag, old_gain, State, lags); 505 506 507 /* Iterative optimization of lags - to be done */ 508 509 /* compute energy of whitened signal */ 510 nrg_wht = 0.0; 511 for (k = 0; k < PITCH_FRAME_LEN + QLOOKAHEAD; k++) 512 nrg_wht += Whitened[k] * Whitened[k]; 513 514 515 /* Iterative optimization of gains */ 516 517 /* set weights for energy, gain fluctiation, and spectral gain penalty functions */ 518 Wnrg = 1.0 / nrg_wht; 519 Wgain = 0.005; 520 Wfluct = 3.0; 521 522 /* set initial gains */ 523 for (k = 0; k < 4; k++) 524 gains[k] = PITCH_MAX_GAIN_06; 525 526 /* two iterations should be enough */ 527 for (iter = 0; iter < 2; iter++) { 528 /* compute Jacobian of pre-filter output towards gains */ 529 WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG, &(State->PFstr_wght), lags, gains); 530 531 /* gradient and approximate Hessian (lower triangle) for minimizing the filter's output power */ 532 for (k = 0; k < 4; k++) { 533 tmp = 0.0; 534 for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++) 535 tmp += out_G[n] * out_dG[k][n]; 536 grad[k] = tmp * Wnrg; 537 } 538 for (k = 0; k < 4; k++) { 539 for (m = 0; m <= k; m++) { 540 tmp = 0.0; 541 for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++) 542 tmp += out_dG[m][n] * out_dG[k][n]; 543 H[k][m] = tmp * Wnrg; 544 } 545 } 546 547 /* add gradient and Hessian (lower triangle) for dampening fast gain changes */ 548 for (k = 0; k < 4; k++) { 549 tmp = kWeight[k+1][0] * old_gain; 550 for (m = 0; m < 4; m++) 551 tmp += kWeight[k+1][m+1] * gains[m]; 552 grad[k] += tmp * Wfluct; 553 } 554 for (k = 0; k < 4; k++) { 555 for (m = 0; m <= k; m++) { 556 H[k][m] += kWeight[k+1][m+1] * Wfluct; 557 } 558 } 559 560 /* add gradient and Hessian for dampening gain */ 561 for (k = 0; k < 3; k++) { 562 tmp = 1.0 / (1 - gains[k]); 563 grad[k] += tmp * tmp * Wgain; 564 H[k][k] += 2.0 * tmp * (tmp * tmp * Wgain); 565 } 566 tmp = 1.0 / (1 - gains[3]); 567 grad[3] += 1.33 * (tmp * tmp * Wgain); 568 H[3][3] += 2.66 * tmp * (tmp * tmp * Wgain); 569 570 571 /* compute Cholesky factorization of Hessian 572 * by overwritting the upper triangle; scale factors on diagonal 573 * (for non pc-platforms store the inverse of the diagonals seperately to minimize divisions) */ 574 H[0][1] = H[1][0] / H[0][0]; 575 H[0][2] = H[2][0] / H[0][0]; 576 H[0][3] = H[3][0] / H[0][0]; 577 H[1][1] -= H[0][0] * H[0][1] * H[0][1]; 578 H[1][2] = (H[2][1] - H[0][1] * H[2][0]) / H[1][1]; 579 H[1][3] = (H[3][1] - H[0][1] * H[3][0]) / H[1][1]; 580 H[2][2] -= H[0][0] * H[0][2] * H[0][2] + H[1][1] * H[1][2] * H[1][2]; 581 H[2][3] = (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2]; 582 H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] + H[2][2] * H[2][3] * H[2][3]; 583 584 /* Compute update as delta_gains = -inv(H) * grad */ 585 /* copy and negate */ 586 for (k = 0; k < 4; k++) 587 dG[k] = -grad[k]; 588 /* back substitution */ 589 dG[1] -= dG[0] * H[0][1]; 590 dG[2] -= dG[0] * H[0][2] + dG[1] * H[1][2]; 591 dG[3] -= dG[0] * H[0][3] + dG[1] * H[1][3] + dG[2] * H[2][3]; 592 /* scale */ 593 for (k = 0; k < 4; k++) 594 dG[k] /= H[k][k]; 595 /* back substitution */ 596 dG[2] -= dG[3] * H[2][3]; 597 dG[1] -= dG[3] * H[1][3] + dG[2] * H[1][2]; 598 dG[0] -= dG[3] * H[0][3] + dG[2] * H[0][2] + dG[1] * H[0][1]; 599 600 /* update gains and check range */ 601 for (k = 0; k < 4; k++) { 602 gains[k] += dG[k]; 603 if (gains[k] > PITCH_MAX_GAIN) 604 gains[k] = PITCH_MAX_GAIN; 605 else if (gains[k] < 0.0) 606 gains[k] = 0.0; 607 } 608 } 609 610 /* update state for next frame */ 611 WebRtcIsac_PitchfilterPre(Whitened, out, &(State->PFstr_wght), lags, gains); 612 613 /* concatenate previous input's end and current input */ 614 memcpy(inbuf, State->inbuf, sizeof(double) * QLOOKAHEAD); 615 memcpy(inbuf+QLOOKAHEAD, in, sizeof(double) * PITCH_FRAME_LEN); 616 617 /* lookahead pitch filtering for masking analysis */ 618 WebRtcIsac_PitchfilterPre_la(inbuf, out, &(State->PFstr), lags, gains); 619 620 /* store last part of input */ 621 for (k = 0; k < QLOOKAHEAD; k++) 622 State->inbuf[k] = inbuf[k + PITCH_FRAME_LEN]; 623 } 624