Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "pitch_estimator.h"
     12 
     13 #include <math.h>
     14 #include <memory.h>
     15 #include <stdlib.h>
     16 
     17 #include "os_specific_inline.h"
     18 
     19 /*
     20  * We are implementing the following filters;
     21  *
     22  * Pre-filtering:
     23  *   y(z) = x(z) + damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
     24  *
     25  * Post-filtering:
     26  *   y(z) = x(z) - damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
     27  *
     28  * Note that |lag| is a floating number so we perform an interpolation to
     29  * obtain the correct |lag|.
     30  *
     31  */
     32 
     33 static const double kDampFilter[PITCH_DAMPORDER] = {-0.07, 0.25, 0.64, 0.25,
     34     -0.07};
     35 
     36 /* interpolation coefficients; generated by design_pitch_filter.m */
     37 static const double kIntrpCoef[PITCH_FRACS][PITCH_FRACORDER] = {
     38     {-0.02239172458614,  0.06653315052934, -0.16515880017569,  0.60701333734125,
     39      0.64671399919202, -0.20249000396417,  0.09926548334755, -0.04765933793109,
     40      0.01754159521746},
     41     {-0.01985640750434,  0.05816126837866, -0.13991265473714,  0.44560418147643,
     42      0.79117042386876, -0.20266133815188,  0.09585268418555, -0.04533310458084,
     43      0.01654127246314},
     44     {-0.01463300534216,  0.04229888475060, -0.09897034715253,  0.28284326017787,
     45      0.90385267956632, -0.16976950138649,  0.07704272393639, -0.03584218578311,
     46      0.01295781500709},
     47     {-0.00764851320885,  0.02184035544377, -0.04985561057281,  0.13083306574393,
     48      0.97545011664662, -0.10177807997561,  0.04400901776474, -0.02010737175166,
     49      0.00719783432422},
     50     {-0.00000000000000,  0.00000000000000, -0.00000000000001,  0.00000000000001,
     51      0.99999999999999,  0.00000000000001, -0.00000000000001,  0.00000000000000,
     52      -0.00000000000000},
     53     {0.00719783432422, -0.02010737175166,  0.04400901776474, -0.10177807997562,
     54      0.97545011664663,  0.13083306574393, -0.04985561057280,  0.02184035544377,
     55      -0.00764851320885},
     56     {0.01295781500710, -0.03584218578312,  0.07704272393640, -0.16976950138650,
     57      0.90385267956634,  0.28284326017785, -0.09897034715252,  0.04229888475059,
     58      -0.01463300534216},
     59     {0.01654127246315, -0.04533310458085,  0.09585268418557, -0.20266133815190,
     60      0.79117042386878,  0.44560418147640, -0.13991265473712,  0.05816126837865,
     61      -0.01985640750433}
     62 };
     63 
     64 /*
     65  * Enumerating the operation of the filter.
     66  * iSAC has 4 different pitch-filter which are very similar in their structure.
     67  *
     68  * kPitchFilterPre     : In this mode the filter is operating as pitch
     69  *                       pre-filter. This is used at the encoder.
     70  * kPitchFilterPost    : In this mode the filter is operating as pitch
     71  *                       post-filter. This is the inverse of pre-filter and used
     72  *                       in the decoder.
     73  * kPitchFilterPreLa   : This is, in structure, similar to pre-filtering but
     74  *                       utilizing 3 millisecond lookahead. It is used to
     75  *                       obtain the signal for LPC analysis.
     76  * kPitchFilterPreGain : This is, in structure, similar to pre-filtering but
     77  *                       differential changes in gain is considered. This is
     78  *                       used to find the optimal gain.
     79  */
     80 typedef enum {
     81   kPitchFilterPre, kPitchFilterPost, kPitchFilterPreLa, kPitchFilterPreGain
     82 } PitchFilterOperation;
     83 
     84 /*
     85  * Structure with parameters used for pitch-filtering.
     86  * buffer           : a buffer where the sum of previous inputs and outputs
     87  *                    are stored.
     88  * damper_state     : the state of the damping filter. The filter is defined by
     89  *                    |kDampFilter|.
     90  * interpol_coeff   : pointer to a set of coefficient which are used to utilize
     91  *                    fractional pitch by interpolation.
     92  * gain             : pitch-gain to be applied to the current segment of input.
     93  * lag              : pitch-lag for the current segment of input.
     94  * lag_offset       : the offset of lag w.r.t. current sample.
     95  * sub_frame        : sub-frame index, there are 4 pitch sub-frames in an iSAC
     96  *                    frame.
     97  *                    This specifies the usage of the filter. See
     98  *                    'PitchFilterOperation' for operational modes.
     99  * num_samples      : number of samples to be processed in each segment.
    100  * index            : index of the input and output sample.
    101  * damper_state_dg  : state of damping filter for different trial gains.
    102  * gain_mult        : differential changes to gain.
    103  */
    104 typedef struct {
    105   double buffer[PITCH_INTBUFFSIZE + QLOOKAHEAD];
    106   double damper_state[PITCH_DAMPORDER];
    107   const double *interpol_coeff;
    108   double gain;
    109   double lag;
    110   int lag_offset;
    111 
    112   int sub_frame;
    113   PitchFilterOperation mode;
    114   int num_samples;
    115   int index;
    116 
    117   double damper_state_dg[4][PITCH_DAMPORDER];
    118   double gain_mult[4];
    119 } PitchFilterParam;
    120 
    121 /**********************************************************************
    122  * FilterSegment()
    123  * Filter one segment, a quarter of a frame.
    124  *
    125  * Inputs
    126  *   in_data      : pointer to the input signal of 30 ms at 8 kHz sample-rate.
    127  *   filter_param : pitch filter parameters.
    128  *
    129  * Outputs
    130  *   out_data     : pointer to a buffer where the filtered signal is written to.
    131  *   out_dg       : [only used in kPitchFilterPreGain] pointer to a buffer
    132  *                  where the output of different gain values (differential
    133  *                  change to gain) is written.
    134  */
    135 static void FilterSegment(const double* in_data, PitchFilterParam* parameters,
    136                           double* out_data,
    137                           double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
    138   int n;
    139   int m;
    140   int j;
    141   double sum;
    142   double sum2;
    143   /* Index of |parameters->buffer| where the output is written to. */
    144   int pos = parameters->index + PITCH_BUFFSIZE;
    145   /* Index of |parameters->buffer| where samples are read for fractional-lag
    146    * computation. */
    147   int pos_lag = pos - parameters->lag_offset;
    148 
    149   for (n = 0; n < parameters->num_samples; ++n) {
    150     /* Shift low pass filter states. */
    151     for (m = PITCH_DAMPORDER - 1; m > 0; --m) {
    152       parameters->damper_state[m] = parameters->damper_state[m - 1];
    153     }
    154     /* Filter to get fractional pitch. */
    155     sum = 0.0;
    156     for (m = 0; m < PITCH_FRACORDER; ++m) {
    157       sum += parameters->buffer[pos_lag + m] * parameters->interpol_coeff[m];
    158     }
    159     /* Multiply with gain. */
    160     parameters->damper_state[0] = parameters->gain * sum;
    161 
    162     if (parameters->mode == kPitchFilterPreGain) {
    163       int lag_index = parameters->index - parameters->lag_offset;
    164       int m_tmp = (lag_index < 0) ? -lag_index : 0;
    165       /* Update the damper state for the new sample. */
    166       for (m = PITCH_DAMPORDER - 1; m > 0; --m) {
    167         for (j = 0; j < 4; ++j) {
    168           parameters->damper_state_dg[j][m] =
    169               parameters->damper_state_dg[j][m - 1];
    170         }
    171       }
    172 
    173       for (j = 0; j < parameters->sub_frame + 1; ++j) {
    174         /* Filter for fractional pitch. */
    175         sum2 = 0.0;
    176         for (m = PITCH_FRACORDER-1; m >= m_tmp; --m) {
    177           /* |lag_index + m| is always larger than or equal to zero, see how
    178            * m_tmp is computed. This is equivalent to assume samples outside
    179            * |out_dg[j]| are zero. */
    180           sum2 += out_dg[j][lag_index + m] * parameters->interpol_coeff[m];
    181         }
    182         /* Add the contribution of differential gain change. */
    183         parameters->damper_state_dg[j][0] = parameters->gain_mult[j] * sum +
    184             parameters->gain * sum2;
    185       }
    186 
    187       /* Filter with damping filter, and store the results. */
    188       for (j = 0; j < parameters->sub_frame + 1; ++j) {
    189         sum = 0.0;
    190         for (m = 0; m < PITCH_DAMPORDER; ++m) {
    191           sum -= parameters->damper_state_dg[j][m] * kDampFilter[m];
    192         }
    193         out_dg[j][parameters->index] = sum;
    194       }
    195     }
    196     /* Filter with damping filter. */
    197     sum = 0.0;
    198     for (m = 0; m < PITCH_DAMPORDER; ++m) {
    199       sum += parameters->damper_state[m] * kDampFilter[m];
    200     }
    201 
    202     /* Subtract from input and update buffer. */
    203     out_data[parameters->index] = in_data[parameters->index] - sum;
    204     parameters->buffer[pos] = in_data[parameters->index] +
    205         out_data[parameters->index];
    206 
    207     ++parameters->index;
    208     ++pos;
    209     ++pos_lag;
    210   }
    211   return;
    212 }
    213 
    214 /* Update filter parameters based on the pitch-gains and pitch-lags. */
    215 static void Update(PitchFilterParam* parameters) {
    216   double fraction;
    217   int fraction_index;
    218   /* Compute integer lag-offset. */
    219   parameters->lag_offset = WebRtcIsac_lrint(parameters->lag + PITCH_FILTDELAY +
    220                                             0.5);
    221   /* Find correct set of coefficients for computing fractional pitch. */
    222   fraction = parameters->lag_offset - (parameters->lag + PITCH_FILTDELAY);
    223   fraction_index = WebRtcIsac_lrint(PITCH_FRACS * fraction - 0.5);
    224   parameters->interpol_coeff = kIntrpCoef[fraction_index];
    225 
    226   if (parameters->mode == kPitchFilterPreGain) {
    227     /* If in this mode make a differential change to pitch gain. */
    228     parameters->gain_mult[parameters->sub_frame] += 0.2;
    229     if (parameters->gain_mult[parameters->sub_frame] > 1.0) {
    230       parameters->gain_mult[parameters->sub_frame] = 1.0;
    231     }
    232     if (parameters->sub_frame > 0) {
    233       parameters->gain_mult[parameters->sub_frame - 1] -= 0.2;
    234     }
    235   }
    236 }
    237 
    238 /******************************************************************************
    239  * FilterFrame()
    240  * Filter a frame of 30 millisecond, given pitch-lags and pitch-gains.
    241  *
    242  * Inputs
    243  *   in_data     : pointer to the input signal of 30 ms at 8 kHz sample-rate.
    244  *   lags        : pointer to pitch-lags, 4 lags per frame.
    245  *   gains       : pointer to pitch-gians, 4 gains per frame.
    246  *   mode        : defining the functionality of the filter. It takes the
    247  *                 following values.
    248  *                 kPitchFilterPre:     Pitch pre-filter, used at encoder.
    249  *                 kPitchFilterPost:    Pitch post-filter, used at decoder.
    250  *                 kPitchFilterPreLa:   Pitch pre-filter with lookahead.
    251  *                 kPitchFilterPreGain: Pitch pre-filter used to otain optimal
    252  *                                      pitch-gains.
    253  *
    254  * Outputs
    255  *   out_data    : pointer to a buffer where the filtered signal is written to.
    256  *   out_dg      : [only used in kPitchFilterPreGain] pointer to a buffer
    257  *                 where the output of different gain values (differential
    258  *                 change to gain) is written.
    259  */
    260 static void FilterFrame(const double* in_data, PitchFiltstr* filter_state,
    261                         double* lags, double* gains, PitchFilterOperation mode,
    262                         double* out_data,
    263                         double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
    264   PitchFilterParam filter_parameters;
    265   double gain_delta, lag_delta;
    266   double old_lag, old_gain;
    267   int n;
    268   int m;
    269   const double kEnhancer = 1.3;
    270 
    271   /* Set up buffer and states. */
    272   filter_parameters.index = 0;
    273   filter_parameters.lag_offset = 0;
    274   filter_parameters.mode = mode;
    275   /* Copy states to local variables. */
    276   memcpy(filter_parameters.buffer, filter_state->ubuf,
    277          sizeof(filter_state->ubuf));
    278   memcpy(filter_parameters.damper_state, filter_state->ystate,
    279          sizeof(filter_state->ystate));
    280 
    281   if (mode == kPitchFilterPreGain) {
    282     /* Clear buffers. */
    283     memset(filter_parameters.gain_mult, 0, sizeof(filter_parameters.gain_mult));
    284     memset(filter_parameters.damper_state_dg, 0,
    285            sizeof(filter_parameters.damper_state_dg));
    286     for (n = 0; n < PITCH_SUBFRAMES; ++n) {
    287       //memset(out_dg[n], 0, sizeof(double) * (PITCH_FRAME_LEN + QLOOKAHEAD));
    288       memset(out_dg[n], 0, sizeof(out_dg[n]));
    289     }
    290   } else if (mode == kPitchFilterPost) {
    291     /* Make output more periodic. Negative sign is to change the structure
    292      * of the filter. */
    293     for (n = 0; n < PITCH_SUBFRAMES; ++n) {
    294       gains[n] *= -kEnhancer;
    295     }
    296   }
    297 
    298   old_lag = *filter_state->oldlagp;
    299   old_gain = *filter_state->oldgainp;
    300 
    301   /* No interpolation if pitch lag step is big. */
    302   if ((lags[0] > (PITCH_UPSTEP * old_lag)) ||
    303       (lags[0] < (PITCH_DOWNSTEP * old_lag))) {
    304     old_lag = lags[0];
    305     old_gain = gains[0];
    306 
    307     if (mode == kPitchFilterPreGain) {
    308       filter_parameters.gain_mult[0] = 1.0;
    309     }
    310   }
    311 
    312   filter_parameters.num_samples = PITCH_UPDATE;
    313   for (m = 0; m < PITCH_SUBFRAMES; ++m) {
    314     /* Set the sub-frame value. */
    315     filter_parameters.sub_frame = m;
    316     /* Calculate interpolation steps for pitch-lag and pitch-gain. */
    317     lag_delta = (lags[m] - old_lag) / PITCH_GRAN_PER_SUBFRAME;
    318     filter_parameters.lag = old_lag;
    319     gain_delta = (gains[m] - old_gain) / PITCH_GRAN_PER_SUBFRAME;
    320     filter_parameters.gain = old_gain;
    321     /* Store for the next sub-frame. */
    322     old_lag = lags[m];
    323     old_gain = gains[m];
    324 
    325     for (n = 0; n < PITCH_GRAN_PER_SUBFRAME; ++n) {
    326       /* Step-wise interpolation of pitch gains and lags. As pitch-lag changes,
    327        * some parameters of filter need to be update. */
    328       filter_parameters.gain += gain_delta;
    329       filter_parameters.lag += lag_delta;
    330       /* Update parameters according to new lag value. */
    331       Update(&filter_parameters);
    332       /* Filter a segment of input. */
    333       FilterSegment(in_data, &filter_parameters, out_data, out_dg);
    334     }
    335   }
    336 
    337   if (mode != kPitchFilterPreGain) {
    338     /* Export buffer and states. */
    339     memcpy(filter_state->ubuf, &filter_parameters.buffer[PITCH_FRAME_LEN],
    340            sizeof(filter_state->ubuf));
    341     memcpy(filter_state->ystate, filter_parameters.damper_state,
    342            sizeof(filter_state->ystate));
    343 
    344     /* Store for the next frame. */
    345     *filter_state->oldlagp = old_lag;
    346     *filter_state->oldgainp = old_gain;
    347   }
    348 
    349   if ((mode == kPitchFilterPreGain) || (mode == kPitchFilterPreLa)) {
    350     /* Filter the lookahead segment, this is treated as the last sub-frame. So
    351      * set |pf_param| to last sub-frame. */
    352     filter_parameters.sub_frame = PITCH_SUBFRAMES - 1;
    353     filter_parameters.num_samples = QLOOKAHEAD;
    354     FilterSegment(in_data, &filter_parameters, out_data, out_dg);
    355   }
    356 }
    357 
    358 void WebRtcIsac_PitchfilterPre(double* in_data, double* out_data,
    359                                PitchFiltstr* pf_state, double* lags,
    360                                double* gains) {
    361   FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPre, out_data, NULL);
    362 }
    363 
    364 void WebRtcIsac_PitchfilterPre_la(double* in_data, double* out_data,
    365                                   PitchFiltstr* pf_state, double* lags,
    366                                   double* gains) {
    367   FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreLa, out_data,
    368               NULL);
    369 }
    370 
    371 void WebRtcIsac_PitchfilterPre_gains(
    372     double* in_data, double* out_data,
    373     double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD], PitchFiltstr *pf_state,
    374     double* lags, double* gains) {
    375   FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreGain, out_data,
    376               out_dg);
    377 }
    378 
    379 void WebRtcIsac_PitchfilterPost(double* in_data, double* out_data,
    380                                 PitchFiltstr* pf_state, double* lags,
    381                                 double* gains) {
    382   FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPost, out_data, NULL);
    383 }
    384