Home | History | Annotate | Download | only in cfront
      1 /*---------------------------------------------------------------------------*
      2  *  spec_anl.c  *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 
     21 
     22 #include <stdlib.h>
     23 #ifndef _RTT
     24 #include <stdio.h>
     25 #endif
     26 #include <string.h>
     27 #include <math.h>
     28 #include <limits.h>
     29 #include <assert.h>
     30 
     31 #include "hmm_desc.h"
     32 #include "front.h"
     33 #include "pendian.h"
     34 #include "portable.h"
     35 #include "LCHAR.h"
     36 
     37 #include "../clib/memmove.h"
     38 
     39 #define DEBUG           0
     40 
     41 #include "sh_down.h"
     42 
     43 static int sort_ints_unique(int *list, int *num);
     44 //static void mask_fft_taps(fftdata *data, int num, front_freq *freqobj);
     45 
     46 void peakpick(front_freq *freqobj, fftdata *density, int num_freq);
     47 void magsq(fftdata *x, fftdata *y, fftdata *z, int ns);
     48 
     49 void preemph(fftdata *data, int window_len, samdata *wav_data,
     50              int num_samples, coefdata pre_mel,
     51              bigdata *last_sample);
     52 void filtbank(front_freq *freqobj, fftdata *density, cepdata *fbo);
     53 
     54 
     55 
     56 void filterbank_emulation(front_channel * channel, front_wave *waveobj,
     57                           front_freq *freqobj, front_cep *cepobj, samdata *income,
     58                           samdata *outgo, int num_samples)
     59 {
     60   /*  Part II. Mel cepstrum coefficients
     61   **
     62   **  Maintain parameter queue */
     63   MEMMOVE(channel->cep + (channel->mel_dim + 1), channel->cep,
     64           (Q2 - 1) *(channel->mel_dim + 1), sizeof(cepdata));
     65   channel->shift = 0;
     66 
     67   /*  2.01 Pre-emphasize waveform
     68   Only the new samples are preemphasized.  To carry on from the previous call,
     69   the last sample value is stored in lastx.
     70   */
     71   preemph(channel->prebuff, freqobj->window_length, income, num_samples,
     72           waveobj->pre_mel, &channel->lastx);
     73 
     74 #if DEBUG
     75   log_report("preemphasized data\n");
     76   write_scaled_frames(freqobj->window_length, 1, channel->prebuff, D_FIXED, (float) 1 / (0x01 << WAVE_SHIFT));
     77 #endif
     78   /******************************************************************************
     79   **  The "NEW" fft performs shifting operations in fixed point, to maximise
     80   **  precision.
     81   **
     82   *******************************************************************************/
     83   channel->shift += place_sample_data(&freqobj->fft, channel->prebuff,
     84                                       freqobj->ham, freqobj->window_length);
     85 #if DEBUG
     86   log_report("windowed data\n");
     87   if (channel->shift >= 0)
     88   {
     89     write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.real, D_FIXED, (float)(0x01 << channel->shift));
     90     write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.imag, D_FIXED, (float)(0x01 << channel->shift));
     91   }
     92   else
     93   {
     94     write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.real, D_FIXED, (float)1 / (0x01 << -channel->shift));
     95     write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.imag, D_FIXED, (float)1 / (0x01 << -channel->shift));
     96   }
     97 #endif
     98   channel->shift *= 2;
     99   channel->shift += fft_perform_and_magsq(&freqobj->fft);
    100 
    101 #if DEBUG
    102   log_report("After magnitude squared (%d)\n", channel->frame_count);
    103   if (channel->shift >= 0)
    104     write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.real, D_FIXED, (float)(0x01 << (channel->shift)));
    105   else
    106     write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.real, D_FIXED, (float)1 / (0x01 << (- channel->shift)));
    107 #endif
    108 
    109 #if DEBUG
    110   log_report("After magnitude squared: ");
    111   if (channel->shift >= 0)
    112     write_scaled_frames(freqobj->fft.size, 1, (void *)freqobj->fft.real, D_FIXED, (float)(0x01 <<  channel->shift));
    113   else
    114     write_scaled_frames(freqobj->fft.size, 1, (void *)freqobj->fft.real, D_FIXED, (float)1 / (0x01 <<  -channel->shift));
    115 #endif
    116 
    117   if (freqobj->do_nonlinear_filter)
    118     peakpick(freqobj, freqobj->fft.real, freqobj->fft.size + 1);
    119 
    120 #if DEBUG
    121   log_report("After peakpick: ");
    122   if (channel->shift >= 0)
    123     write_scaled_frames(freqobj->fft.size + 1, 1, (void *)freqobj->fft.real, D_FIXED, (float)(0x01 << channel->shift));
    124   else
    125     write_scaled_frames(freqobj->fft.size + 1, 1, (void *)freqobj->fft.real, D_FIXED, (float)1 / (0x01 << -channel->shift));
    126 #endif
    127 
    128   /*  2.23 Apply filterbank emulation */
    129   channel->shift += RAMP_SHIFT;
    130   filtbank(freqobj, freqobj->fft.real, channel->filterbank);
    131 #if DEBUG
    132   log_report("After filterbanked: ");
    133   if (channel->shift >= 0)
    134     write_scaled_frames(freqobj->nf, 1, channel->filterbank, D_FIXED, (float)(0x01 << channel->shift));
    135   else
    136     write_scaled_frames(freqobj->nf, 1, channel->filterbank, D_FIXED, (float)1 / (0x01 << -channel->shift));
    137 #endif
    138 
    139   return;
    140 }
    141 
    142 
    143 void preemph(fftdata *data, int window_len, samdata *wav_data,
    144              int num_samples, coefdata pre_mel,
    145              bigdata *last_sample)
    146 /*
    147 **  pre-emphasize on speech data, check for end of data */
    148 /*  SCALE: In this stage we're introducing a scale factor of 2 */
    149 {
    150   int i;
    151   bigdata temp;
    152 
    153   ASSERT(data);
    154   ASSERT(last_sample);
    155   ASSERT(wav_data);
    156   ASSERT(num_samples >= 0);
    157   if (num_samples > window_len)
    158     num_samples = window_len;
    159 
    160   if (num_samples < window_len)
    161     MEMMOVE(data, data + num_samples, (window_len - num_samples),
    162             sizeof(fftdata));
    163   data += window_len - num_samples;
    164 
    165   /*  If no preemphasis to do
    166   */
    167   if (pre_mel == 0)
    168   { /* dont't shift */
    169     for (i = 0; i < num_samples; i++)
    170       data[i] = (fftdata) wav_data[i];
    171     return;
    172   }
    173 
    174   /*  Otherwise do the preemphasis
    175   */
    176   for (i = 0; i < num_samples; i++)
    177   {
    178     temp = SHIFT_UP((bigdata)wav_data[i], COEFDATA_SHIFT);
    179     data[i] = (fftdata)(SHIFT_DOWN(temp - (*last_sample), COEFDATA_SHIFT));
    180     *last_sample = (bigdata)pre_mel * wav_data[i];
    181 
    182   }
    183   return;
    184 }
    185 
    186 void magsq(fftdata *x, fftdata *y, fftdata *z, int ns)
    187 /*
    188 **  magnitude squared, tailored for TI FFT routines
    189 **  The dynamic range should fit 32 - RAMP_SHIFT */
    190 {
    191   int i;
    192 
    193   ASSERT((float)x[0] *(float)x[0] < LONG_MAX);
    194   ASSERT((float)x[0] *(float)x[0] > LONG_MIN);
    195   z[0] = (fftdata)((bigdata)x[0] * (bigdata)x[0]);
    196   for (i = 1; i < ns; i++)
    197   {
    198     ASSERT(((fftdata)x[i] *(fftdata)x[i]) >= 0);
    199     ASSERT(((fftdata)y[i] *(fftdata)y[i]) >= 0);
    200     ASSERT((float)x[i] *(float)x[i] < LONG_MAX);
    201     ASSERT((float)x[i] *(float)x[i] > LONG_MIN);
    202     ASSERT((float)y[i] *(float)y[i] < LONG_MAX);
    203     ASSERT((float)y[i] *(float)y[i] > LONG_MIN);
    204     /*    z[i]= (fftdata) SHIFT_DOWN ((bigdata)x[i] * (bigdata)x[i] + (bigdata)y[i] * (bigdata)y[i], RAMP_SHIFT);
    205     */
    206     z[i] = (fftdata)(((bigdata)x[i] * (bigdata)x[i])
    207                      + ((bigdata)y[i] * (bigdata)y[i]));
    208     if (z[i] <= 0)
    209       z[i] = (fftdata) 1;
    210   }
    211   return;
    212 }
    213 
    214 void peakpick(front_freq *freqobj, fftdata *density, int num_freq)
    215 {
    216   int i;
    217   fftdata peak;
    218   fftdata bdecay;
    219   fftdata fdecay;
    220   int first;
    221   int last;
    222 
    223   ASSERT(freqobj);
    224   /* Fixed pt requires scale up of COEFDATA_SHIFT on these pars (coefdata) */
    225   bdecay = freqobj->peakpickdown;
    226   fdecay = freqobj->peakpickup;
    227 
    228   if ((bdecay <= (fftdata) 0.0) && (fdecay <= (fftdata) 0.0))
    229     return;
    230 
    231   first = freqobj->cut_off_below;
    232   last  = freqobj->cut_off_above;
    233   /* this filters from cut_off_below to       */
    234   /* cut_off_above inclusive          */
    235 
    236   if (last >= num_freq)
    237     last = num_freq - 1;
    238   /* as most routines seem to check both      */
    239   /* limits                           */
    240 
    241   if (bdecay > 0.0)
    242   {
    243     ASSERT(density[last] >= 0);
    244     peak = density[last];
    245     for (i = last - 1; i >= first; i--)
    246     {
    247       peak = (fftdata)(SHIFT_DOWN((bigdata)peak, COEFDATA_SHIFT) * (bigdata)bdecay);
    248       ASSERT(peak >= 0);
    249       if (density[i] > peak)
    250         peak = density[i];
    251       else
    252         density[i] = peak;
    253     }
    254   }
    255   if (fdecay > 0.0)
    256   {
    257     peak = density[first];
    258     for (i = first + 1; i <= last; i++)
    259     {
    260       peak = (fftdata)(SHIFT_DOWN((bigdata)peak, COEFDATA_SHIFT) * (bigdata)fdecay);
    261       if (density[i] > peak)
    262         peak = density[i];
    263       else
    264         density[i] = peak;
    265     }
    266   }
    267   return;
    268 }
    269 
    270 void filtbank(front_freq *freqobj, fftdata *density, cepdata *fbo)
    271 /*
    272 **  pwr spect -> filter bank output (linear) */
    273 {
    274   int i, j, k;
    275   bigdata t, sum, mom, nxt;
    276 
    277   /*  Scale down before starting mel-filterbank operations
    278   */
    279   for (i = 0; i < freqobj->cut_off_above; i++)
    280     density[i] = SHIFT_DOWN(density[i], RAMP_SHIFT);
    281 
    282   j = MAX(freqobj->fcmid[0], freqobj->cut_off_below);
    283   nxt = 0;
    284   for (; j < freqobj->fcmid[1]; j++)
    285   {
    286     ASSERT(((float)nxt + (float)freqobj->framp[j] *(float)density[j]) < LONG_MAX);
    287     ASSERT(((float)nxt + (float)freqobj->framp[j] *(float)density[j]) > -LONG_MAX);
    288     nxt += (bigdata) SHIFT_DOWN((bigdata)freqobj->framp[j] * (bigdata)density[j], RAMP_SHIFT);
    289   }
    290   for (i = 0, k = 2; i < freqobj->nf; i++, k++)
    291   {
    292     sum = mom = 0;
    293     for (; j < freqobj->fcmid[k]; j++)
    294     {
    295       /* TODO: Tidy up this fixed pt shifting. BP */
    296 
    297       ASSERT((float) freqobj->framp[j] *(float) density[j] < LONG_MAX);
    298       ASSERT((float) freqobj->framp[j] *(float) density[j] > LONG_MIN);
    299       ASSERT((float) sum + (float)density[j] < LONG_MAX);
    300       ASSERT((float) sum + (float)density[j] > LONG_MIN);
    301       sum += (bigdata) density[j];
    302       ASSERT((float) mom + (float) freqobj->framp[j] *(float) density[j] < LONG_MAX);
    303       ASSERT((float) mom + (float) freqobj->framp[j] *(float) density[j] > LONG_MIN);
    304 
    305       mom += (bigdata)(long) SHIFT_DOWN((bigdata)freqobj->framp[j] * (bigdata)density[j], RAMP_SHIFT);
    306     }
    307 
    308     ASSERT(((float)nxt + (float)sum - (float)mom) < LONG_MAX);
    309     ASSERT(((float)nxt + (float)sum - (float)mom) > LONG_MIN);
    310 
    311     /* TODO: refine this expression. Shift down fcscl in advance.  */
    312     t = (bigdata)((SHIFT_UP(nxt + sum - mom, HALF_RAMP_SHIFT)
    313                    + SHIFT_DOWN(freqobj->fcscl[i+1], HALF_RAMP_SHIFT + 1))
    314                   / SHIFT_DOWN(freqobj->fcscl[i+1], HALF_RAMP_SHIFT));
    315     /* TODO: cleanup and also check for division by zero */
    316     nxt = mom;
    317     fbo[i] = (cepdata) t;
    318   }
    319   return;
    320 }
    321 
    322 int create_spectrum_filter(front_freq *freqobj, int *freq, int *spread)
    323 {
    324   int ii, jj, freq_step;
    325   int lo, hi;
    326   ASSERT(freqobj);
    327   ASSERT(freqobj->spectrum_filter_num == 0);
    328   ASSERT(freqobj->samplerate > 0);
    329   /* Convert to FFT taps. Mark adjacent taps as well as taps within spread */
    330   freq_step = (freqobj->samplerate << 12) / (2 * freqobj->fft.size);
    331   freqobj->spectrum_filter = (int *) CALLOC_CLR(freqobj->fft.size + 1, sizeof(int), "cfront.spectrum_filter");
    332   freqobj->spectrum_filter_num = 0;
    333   for (ii = 0 ; ii < MAX_FILTER_NUM; ii++)
    334   {
    335     if (freq[ii] == 0)
    336       continue;
    337     lo = (((freq[ii] - spread[ii]) * 2 * freqobj->fft.size) + freqobj->samplerate / 2) / freqobj->samplerate;
    338     hi = (((freq[ii] + spread[ii]) * 2 * freqobj->fft.size) + freqobj->samplerate / 2) / freqobj->samplerate;
    339 
    340 
    341     for (jj = lo; jj <= hi;jj++)
    342     {
    343       if (freqobj->spectrum_filter_num >= (int) freqobj->fft.size)
    344         SERVICE_ERROR(MAX_FILTER_POINTS_EXCEEDED);
    345       freqobj->spectrum_filter[freqobj->spectrum_filter_num++] = jj;
    346     }
    347     /* jj=0;
    348      while (((jj+1)*freq_step)>>12 <= freq[ii]-spread[ii])
    349          jj++;
    350      while (((jj-1)*freq_step>>12) < freq[ii]+spread[ii]){
    351          if (freqobj->spectrum_filter_num >= (int) freqobj->fft.size)
    352       SERVICE_ERROR (MAX_FILTER_POINTS_EXCEEDED);
    353          freqobj->spectrum_filter[freqobj->spectrum_filter_num++]= jj;
    354          jj++;
    355      }
    356     */
    357   }
    358   sort_ints_unique(freqobj->spectrum_filter, &freqobj->spectrum_filter_num);
    359   return (freqobj->spectrum_filter_num);
    360 }
    361 
    362 void clear_spectrum_filter(front_freq *freqobj)
    363 {
    364   ASSERT(freqobj->spectrum_filter);
    365   if (freqobj->spectrum_filter)
    366     FREE((char *) freqobj->spectrum_filter);
    367   freqobj->spectrum_filter = NULL;
    368   freqobj->spectrum_filter_num = 0;
    369   return;
    370 }
    371 
    372 static int sort_ints_unique(int *list, int *num)
    373 {
    374   /*  Sort a list of ints and make unique */
    375   int ii, jj, temp;
    376   for (ii = 1; ii < *num; ii++)
    377   {
    378     for (jj = 0; jj < ii; jj++)
    379     {
    380       temp = list[ii];
    381       if (temp < list[jj])
    382       {
    383         MEMMOVE(&list[jj+1], &list[jj], (ii - jj), sizeof(int));
    384         list[jj] = temp;
    385         break;
    386       }
    387       if (temp == list[jj])
    388       {
    389         MEMMOVE(&list[ii], &list[ii+1], (*num - ii), sizeof(int));
    390 
    391 
    392 
    393         (*num)--;
    394       }
    395     }
    396   }
    397   return *num;
    398 }
    399 
    400 //static void mask_fft_taps(fftdata *data, int num, front_freq *freqobj)
    401 //{
    402 //  for (int i = 0; i < freqobj->spectrum_filter_num; ++i)
    403 //  {
    404 //    ASSERT(freqobj->spectrum_filter[i] < num);
    405 //    data[freqobj->spectrum_filter[i]] = 0;
    406 //  }
    407 //}
    408 
    409 /* --------------------------------------------------
    410  freq_warp will do pure linear warping if the warp
    411  scale > 1.0. Otherwise it will do piecewise warp
    412  which means warping the second part, from xstart
    413  to the bandwidth with another scale which is
    414  determined by b and c in the formulation.
    415  In general, 0.7 < wscale < 1.4, and xstart <= 1
    416  08/15/01, Puming Zhan
    417  --------------------------------------------------- */
    418 void freq_warp(front_freq *freqobj, fftdata *inbuf, int ns)
    419 {
    420   int i;
    421   int nsE;
    422   float x1, y1, b, c, wscale;
    423   fftdata *tmpbuf;
    424 
    425   ASSERT(freqobj && inbuf);
    426 
    427   ASSERT(freqobj->warp_scale != 0);
    428 
    429   wscale = freqobj->warp_scale;
    430   x1     = freqobj->piecewise_start;
    431   tmpbuf = (fftdata *) CALLOC(ns, sizeof(fftdata), "cfront.tmpbuf");
    432 
    433   if (wscale < MIN_WARP_SCALE || wscale > MAX_WARP_SCALE)
    434   {
    435     SERVICE_ERROR(WARP_SCALE);
    436   }
    437   if (x1 > 1.0 || x1 < 0.5)
    438   {
    439     SERVICE_ERROR(PIECEWISE_START);
    440   }
    441 
    442   y1 = x1 < wscale ? (float)x1 / wscale : (float)1.0;
    443 
    444   b = y1 < 1.0 ? (float)((1.0 - x1) / (1.0 - y1)) : (float)0.0;
    445 
    446   c = (float)((1.0 - b) * (ns - 1));
    447 
    448   nsE = (int)(y1 * (ns - 1));
    449 
    450   for (i = 0; i < ns; i++)
    451   {
    452     float x = i > nsE ? b * i + c : wscale * i;
    453     int   u = (int)ceil((double)x);
    454     int   l = (int)floor((double)x);
    455     float w1 = x - l;
    456     float w2 = 1 - w1;
    457 
    458     if (u < ns)
    459     {
    460       tmpbuf[i] = (int)(w1 * inbuf[u] + w2 * inbuf[l]);
    461     }
    462     else
    463     {
    464       tmpbuf[i] = inbuf[ns-1];
    465     }
    466   }
    467 
    468   /* need to copy the warped fft into inbuf    */
    469   /* because the following function filtbank() */
    470   /* will take inbuf as input                  */
    471   /* considering that this function will be    */
    472   /* for every frame, it may not be a good idea*/
    473   /* to do malloc here                         */
    474 
    475   for (i = 0; i < ns; i++)
    476     inbuf[i] = tmpbuf[i];
    477 
    478   FREE((char *) tmpbuf);
    479 }
    480