1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "pitch_estimator.h" 12 13 #include <math.h> 14 #include <memory.h> 15 #include <stdlib.h> 16 17 #include "os_specific_inline.h" 18 19 /* 20 * We are implementing the following filters; 21 * 22 * Pre-filtering: 23 * y(z) = x(z) + damper(z) * gain * (x(z) + y(z)) * z ^ (-lag); 24 * 25 * Post-filtering: 26 * y(z) = x(z) - damper(z) * gain * (x(z) + y(z)) * z ^ (-lag); 27 * 28 * Note that |lag| is a floating number so we perform an interpolation to 29 * obtain the correct |lag|. 30 * 31 */ 32 33 static const double kDampFilter[PITCH_DAMPORDER] = {-0.07, 0.25, 0.64, 0.25, 34 -0.07}; 35 36 /* interpolation coefficients; generated by design_pitch_filter.m */ 37 static const double kIntrpCoef[PITCH_FRACS][PITCH_FRACORDER] = { 38 {-0.02239172458614, 0.06653315052934, -0.16515880017569, 0.60701333734125, 39 0.64671399919202, -0.20249000396417, 0.09926548334755, -0.04765933793109, 40 0.01754159521746}, 41 {-0.01985640750434, 0.05816126837866, -0.13991265473714, 0.44560418147643, 42 0.79117042386876, -0.20266133815188, 0.09585268418555, -0.04533310458084, 43 0.01654127246314}, 44 {-0.01463300534216, 0.04229888475060, -0.09897034715253, 0.28284326017787, 45 0.90385267956632, -0.16976950138649, 0.07704272393639, -0.03584218578311, 46 0.01295781500709}, 47 {-0.00764851320885, 0.02184035544377, -0.04985561057281, 0.13083306574393, 48 0.97545011664662, -0.10177807997561, 0.04400901776474, -0.02010737175166, 49 0.00719783432422}, 50 {-0.00000000000000, 0.00000000000000, -0.00000000000001, 0.00000000000001, 51 0.99999999999999, 0.00000000000001, -0.00000000000001, 0.00000000000000, 52 -0.00000000000000}, 53 {0.00719783432422, -0.02010737175166, 0.04400901776474, -0.10177807997562, 54 0.97545011664663, 0.13083306574393, -0.04985561057280, 0.02184035544377, 55 -0.00764851320885}, 56 {0.01295781500710, -0.03584218578312, 0.07704272393640, -0.16976950138650, 57 0.90385267956634, 0.28284326017785, -0.09897034715252, 0.04229888475059, 58 -0.01463300534216}, 59 {0.01654127246315, -0.04533310458085, 0.09585268418557, -0.20266133815190, 60 0.79117042386878, 0.44560418147640, -0.13991265473712, 0.05816126837865, 61 -0.01985640750433} 62 }; 63 64 /* 65 * Enumerating the operation of the filter. 66 * iSAC has 4 different pitch-filter which are very similar in their structure. 67 * 68 * kPitchFilterPre : In this mode the filter is operating as pitch 69 * pre-filter. This is used at the encoder. 70 * kPitchFilterPost : In this mode the filter is operating as pitch 71 * post-filter. This is the inverse of pre-filter and used 72 * in the decoder. 73 * kPitchFilterPreLa : This is, in structure, similar to pre-filtering but 74 * utilizing 3 millisecond lookahead. It is used to 75 * obtain the signal for LPC analysis. 76 * kPitchFilterPreGain : This is, in structure, similar to pre-filtering but 77 * differential changes in gain is considered. This is 78 * used to find the optimal gain. 79 */ 80 typedef enum { 81 kPitchFilterPre, kPitchFilterPost, kPitchFilterPreLa, kPitchFilterPreGain 82 } PitchFilterOperation; 83 84 /* 85 * Structure with parameters used for pitch-filtering. 86 * buffer : a buffer where the sum of previous inputs and outputs 87 * are stored. 88 * damper_state : the state of the damping filter. The filter is defined by 89 * |kDampFilter|. 90 * interpol_coeff : pointer to a set of coefficient which are used to utilize 91 * fractional pitch by interpolation. 92 * gain : pitch-gain to be applied to the current segment of input. 93 * lag : pitch-lag for the current segment of input. 94 * lag_offset : the offset of lag w.r.t. current sample. 95 * sub_frame : sub-frame index, there are 4 pitch sub-frames in an iSAC 96 * frame. 97 * This specifies the usage of the filter. See 98 * 'PitchFilterOperation' for operational modes. 99 * num_samples : number of samples to be processed in each segment. 100 * index : index of the input and output sample. 101 * damper_state_dg : state of damping filter for different trial gains. 102 * gain_mult : differential changes to gain. 103 */ 104 typedef struct { 105 double buffer[PITCH_INTBUFFSIZE + QLOOKAHEAD]; 106 double damper_state[PITCH_DAMPORDER]; 107 const double *interpol_coeff; 108 double gain; 109 double lag; 110 int lag_offset; 111 112 int sub_frame; 113 PitchFilterOperation mode; 114 int num_samples; 115 int index; 116 117 double damper_state_dg[4][PITCH_DAMPORDER]; 118 double gain_mult[4]; 119 } PitchFilterParam; 120 121 /********************************************************************** 122 * FilterSegment() 123 * Filter one segment, a quarter of a frame. 124 * 125 * Inputs 126 * in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate. 127 * filter_param : pitch filter parameters. 128 * 129 * Outputs 130 * out_data : pointer to a buffer where the filtered signal is written to. 131 * out_dg : [only used in kPitchFilterPreGain] pointer to a buffer 132 * where the output of different gain values (differential 133 * change to gain) is written. 134 */ 135 static void FilterSegment(const double* in_data, PitchFilterParam* parameters, 136 double* out_data, 137 double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) { 138 int n; 139 int m; 140 int j; 141 double sum; 142 double sum2; 143 /* Index of |parameters->buffer| where the output is written to. */ 144 int pos = parameters->index + PITCH_BUFFSIZE; 145 /* Index of |parameters->buffer| where samples are read for fractional-lag 146 * computation. */ 147 int pos_lag = pos - parameters->lag_offset; 148 149 for (n = 0; n < parameters->num_samples; ++n) { 150 /* Shift low pass filter states. */ 151 for (m = PITCH_DAMPORDER - 1; m > 0; --m) { 152 parameters->damper_state[m] = parameters->damper_state[m - 1]; 153 } 154 /* Filter to get fractional pitch. */ 155 sum = 0.0; 156 for (m = 0; m < PITCH_FRACORDER; ++m) { 157 sum += parameters->buffer[pos_lag + m] * parameters->interpol_coeff[m]; 158 } 159 /* Multiply with gain. */ 160 parameters->damper_state[0] = parameters->gain * sum; 161 162 if (parameters->mode == kPitchFilterPreGain) { 163 int lag_index = parameters->index - parameters->lag_offset; 164 int m_tmp = (lag_index < 0) ? -lag_index : 0; 165 /* Update the damper state for the new sample. */ 166 for (m = PITCH_DAMPORDER - 1; m > 0; --m) { 167 for (j = 0; j < 4; ++j) { 168 parameters->damper_state_dg[j][m] = 169 parameters->damper_state_dg[j][m - 1]; 170 } 171 } 172 173 for (j = 0; j < parameters->sub_frame + 1; ++j) { 174 /* Filter for fractional pitch. */ 175 sum2 = 0.0; 176 for (m = PITCH_FRACORDER-1; m >= m_tmp; --m) { 177 /* |lag_index + m| is always larger than or equal to zero, see how 178 * m_tmp is computed. This is equivalent to assume samples outside 179 * |out_dg[j]| are zero. */ 180 sum2 += out_dg[j][lag_index + m] * parameters->interpol_coeff[m]; 181 } 182 /* Add the contribution of differential gain change. */ 183 parameters->damper_state_dg[j][0] = parameters->gain_mult[j] * sum + 184 parameters->gain * sum2; 185 } 186 187 /* Filter with damping filter, and store the results. */ 188 for (j = 0; j < parameters->sub_frame + 1; ++j) { 189 sum = 0.0; 190 for (m = 0; m < PITCH_DAMPORDER; ++m) { 191 sum -= parameters->damper_state_dg[j][m] * kDampFilter[m]; 192 } 193 out_dg[j][parameters->index] = sum; 194 } 195 } 196 /* Filter with damping filter. */ 197 sum = 0.0; 198 for (m = 0; m < PITCH_DAMPORDER; ++m) { 199 sum += parameters->damper_state[m] * kDampFilter[m]; 200 } 201 202 /* Subtract from input and update buffer. */ 203 out_data[parameters->index] = in_data[parameters->index] - sum; 204 parameters->buffer[pos] = in_data[parameters->index] + 205 out_data[parameters->index]; 206 207 ++parameters->index; 208 ++pos; 209 ++pos_lag; 210 } 211 return; 212 } 213 214 /* Update filter parameters based on the pitch-gains and pitch-lags. */ 215 static void Update(PitchFilterParam* parameters) { 216 double fraction; 217 int fraction_index; 218 /* Compute integer lag-offset. */ 219 parameters->lag_offset = WebRtcIsac_lrint(parameters->lag + PITCH_FILTDELAY + 220 0.5); 221 /* Find correct set of coefficients for computing fractional pitch. */ 222 fraction = parameters->lag_offset - (parameters->lag + PITCH_FILTDELAY); 223 fraction_index = WebRtcIsac_lrint(PITCH_FRACS * fraction - 0.5); 224 parameters->interpol_coeff = kIntrpCoef[fraction_index]; 225 226 if (parameters->mode == kPitchFilterPreGain) { 227 /* If in this mode make a differential change to pitch gain. */ 228 parameters->gain_mult[parameters->sub_frame] += 0.2; 229 if (parameters->gain_mult[parameters->sub_frame] > 1.0) { 230 parameters->gain_mult[parameters->sub_frame] = 1.0; 231 } 232 if (parameters->sub_frame > 0) { 233 parameters->gain_mult[parameters->sub_frame - 1] -= 0.2; 234 } 235 } 236 } 237 238 /****************************************************************************** 239 * FilterFrame() 240 * Filter a frame of 30 millisecond, given pitch-lags and pitch-gains. 241 * 242 * Inputs 243 * in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate. 244 * lags : pointer to pitch-lags, 4 lags per frame. 245 * gains : pointer to pitch-gians, 4 gains per frame. 246 * mode : defining the functionality of the filter. It takes the 247 * following values. 248 * kPitchFilterPre: Pitch pre-filter, used at encoder. 249 * kPitchFilterPost: Pitch post-filter, used at decoder. 250 * kPitchFilterPreLa: Pitch pre-filter with lookahead. 251 * kPitchFilterPreGain: Pitch pre-filter used to otain optimal 252 * pitch-gains. 253 * 254 * Outputs 255 * out_data : pointer to a buffer where the filtered signal is written to. 256 * out_dg : [only used in kPitchFilterPreGain] pointer to a buffer 257 * where the output of different gain values (differential 258 * change to gain) is written. 259 */ 260 static void FilterFrame(const double* in_data, PitchFiltstr* filter_state, 261 double* lags, double* gains, PitchFilterOperation mode, 262 double* out_data, 263 double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) { 264 PitchFilterParam filter_parameters; 265 double gain_delta, lag_delta; 266 double old_lag, old_gain; 267 int n; 268 int m; 269 const double kEnhancer = 1.3; 270 271 /* Set up buffer and states. */ 272 filter_parameters.index = 0; 273 filter_parameters.lag_offset = 0; 274 filter_parameters.mode = mode; 275 /* Copy states to local variables. */ 276 memcpy(filter_parameters.buffer, filter_state->ubuf, 277 sizeof(filter_state->ubuf)); 278 memcpy(filter_parameters.damper_state, filter_state->ystate, 279 sizeof(filter_state->ystate)); 280 281 if (mode == kPitchFilterPreGain) { 282 /* Clear buffers. */ 283 memset(filter_parameters.gain_mult, 0, sizeof(filter_parameters.gain_mult)); 284 memset(filter_parameters.damper_state_dg, 0, 285 sizeof(filter_parameters.damper_state_dg)); 286 for (n = 0; n < PITCH_SUBFRAMES; ++n) { 287 //memset(out_dg[n], 0, sizeof(double) * (PITCH_FRAME_LEN + QLOOKAHEAD)); 288 memset(out_dg[n], 0, sizeof(out_dg[n])); 289 } 290 } else if (mode == kPitchFilterPost) { 291 /* Make output more periodic. Negative sign is to change the structure 292 * of the filter. */ 293 for (n = 0; n < PITCH_SUBFRAMES; ++n) { 294 gains[n] *= -kEnhancer; 295 } 296 } 297 298 old_lag = *filter_state->oldlagp; 299 old_gain = *filter_state->oldgainp; 300 301 /* No interpolation if pitch lag step is big. */ 302 if ((lags[0] > (PITCH_UPSTEP * old_lag)) || 303 (lags[0] < (PITCH_DOWNSTEP * old_lag))) { 304 old_lag = lags[0]; 305 old_gain = gains[0]; 306 307 if (mode == kPitchFilterPreGain) { 308 filter_parameters.gain_mult[0] = 1.0; 309 } 310 } 311 312 filter_parameters.num_samples = PITCH_UPDATE; 313 for (m = 0; m < PITCH_SUBFRAMES; ++m) { 314 /* Set the sub-frame value. */ 315 filter_parameters.sub_frame = m; 316 /* Calculate interpolation steps for pitch-lag and pitch-gain. */ 317 lag_delta = (lags[m] - old_lag) / PITCH_GRAN_PER_SUBFRAME; 318 filter_parameters.lag = old_lag; 319 gain_delta = (gains[m] - old_gain) / PITCH_GRAN_PER_SUBFRAME; 320 filter_parameters.gain = old_gain; 321 /* Store for the next sub-frame. */ 322 old_lag = lags[m]; 323 old_gain = gains[m]; 324 325 for (n = 0; n < PITCH_GRAN_PER_SUBFRAME; ++n) { 326 /* Step-wise interpolation of pitch gains and lags. As pitch-lag changes, 327 * some parameters of filter need to be update. */ 328 filter_parameters.gain += gain_delta; 329 filter_parameters.lag += lag_delta; 330 /* Update parameters according to new lag value. */ 331 Update(&filter_parameters); 332 /* Filter a segment of input. */ 333 FilterSegment(in_data, &filter_parameters, out_data, out_dg); 334 } 335 } 336 337 if (mode != kPitchFilterPreGain) { 338 /* Export buffer and states. */ 339 memcpy(filter_state->ubuf, &filter_parameters.buffer[PITCH_FRAME_LEN], 340 sizeof(filter_state->ubuf)); 341 memcpy(filter_state->ystate, filter_parameters.damper_state, 342 sizeof(filter_state->ystate)); 343 344 /* Store for the next frame. */ 345 *filter_state->oldlagp = old_lag; 346 *filter_state->oldgainp = old_gain; 347 } 348 349 if ((mode == kPitchFilterPreGain) || (mode == kPitchFilterPreLa)) { 350 /* Filter the lookahead segment, this is treated as the last sub-frame. So 351 * set |pf_param| to last sub-frame. */ 352 filter_parameters.sub_frame = PITCH_SUBFRAMES - 1; 353 filter_parameters.num_samples = QLOOKAHEAD; 354 FilterSegment(in_data, &filter_parameters, out_data, out_dg); 355 } 356 } 357 358 void WebRtcIsac_PitchfilterPre(double* in_data, double* out_data, 359 PitchFiltstr* pf_state, double* lags, 360 double* gains) { 361 FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPre, out_data, NULL); 362 } 363 364 void WebRtcIsac_PitchfilterPre_la(double* in_data, double* out_data, 365 PitchFiltstr* pf_state, double* lags, 366 double* gains) { 367 FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreLa, out_data, 368 NULL); 369 } 370 371 void WebRtcIsac_PitchfilterPre_gains( 372 double* in_data, double* out_data, 373 double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD], PitchFiltstr *pf_state, 374 double* lags, double* gains) { 375 FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreGain, out_data, 376 out_dg); 377 } 378 379 void WebRtcIsac_PitchfilterPost(double* in_data, double* out_data, 380 PitchFiltstr* pf_state, double* lags, 381 double* gains) { 382 FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPost, out_data, NULL); 383 } 384