Home | History | Annotate | Download | only in crec
      1 /*---------------------------------------------------------------------------*
      2  *  get_fram.c  *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 
     21 #include <stdlib.h>
     22 #ifndef _RTT
     23 #include "pstdio.h"
     24 #endif
     25 #include <limits.h>
     26 #include <math.h>
     27 #include <string.h>
     28 #include "passert.h"
     29 
     30 #include "c42mul.h"
     31 #include "portable.h"
     32 
     33 #include "../clib/fpi_tgt.inl"
     34 
     35 #define DEBUG   0
     36 #define FUDGE_FACTOR 1.2f
     37 
     38 const float root_pi_over_2 = (float) 1.2533141;
     39 
     40 static const char get_fram[] = "$Id: get_fram.c,v 1.7.6.13 2007/10/15 18:06:24 dahan Exp $";
     41 
     42 static void create_cepstrum_offsets(preprocessed *prep);
     43 static void destroy_cepstrum_offsets(preprocessed *prep);
     44 static void apply_channel_offset(preprocessed *prep);
     45 static int compare_cached_frame(preprocessed *prep, utterance_info *utt);
     46 
     47 void init_utterance(utterance_info *utt, int utt_type, int dimen,
     48                     int buffer_size, int keep_frames, int num_chan, int do_voicing)
     49 /*
     50 **  To setup the utterance structure
     51 */
     52 {
     53   /*  Construct frame buffer  and voice buffer here
     54   */
     55   ASSERT(utt);
     56   ASSERT(dimen > 0);
     57   if (buffer_size < keep_frames)
     58     SERVICE_ERROR(BAD_ARGUMENT);
     59   utt->utt_type = utt_type;
     60   utt->gen_utt.dim = dimen;
     61   utt->gen_utt.frame = createFrameBuffer(buffer_size,
     62                                          dimen, keep_frames, do_voicing);
     63   utt->gen_utt.num_chan = num_chan;
     64 
     65   setup_ambient_estimation(utt->gen_utt.backchan,
     66                            utt->gen_utt.num_chan, 100);
     67   return;
     68 }
     69 
     70 void set_voicing_durations(utterance_info *utt, int voice_duration,
     71                            int quiet_duration, int unsure_duration,
     72                            int start_windback)
     73 {
     74   utt->gen_utt.voice_duration = voice_duration;
     75   utt->gen_utt.quiet_duration = quiet_duration;
     76   utt->gen_utt.unsure_duration = unsure_duration;
     77   utt->gen_utt.start_windback = start_windback;
     78   return;
     79 }
     80 
     81 void free_utterance(utterance_info *utt)
     82 /*
     83 **  To close data file pointers etc.
     84 */
     85 {
     86   /*  Destroy frame buffer
     87   */
     88   ASSERT(utt);
     89 
     90   clear_ambient_estimation(utt->gen_utt.backchan, utt->gen_utt.dim);
     91   if (utt->gen_utt.frame)
     92   {
     93     destroyFrameBuffer(utt->gen_utt.frame);
     94     utt->gen_utt.frame = NULL;
     95   }
     96   return;
     97 }
     98 
     99 void init_preprocessed(preprocessed *prep, int dimen, float imelda_scale)
    100 /*
    101 **  To setup the preprocessed structure
    102 */
    103 {
    104 
    105   ASSERT(prep);
    106   ASSERT(dimen > 0);
    107   prep->dim = dimen;
    108   prep->seq = (imeldata *) CALLOC(prep->dim, sizeof(imeldata),
    109                                         "srec.prep->seq");
    110   prep->seq_unnorm = (imeldata *) CALLOC(prep->dim, sizeof(imeldata),
    111                      "srec.prep->seq_unnorm");
    112   prep->last_frame = (featdata *) CALLOC(prep->dim, sizeof(featdata),
    113                      "srec.prep->last_frame");
    114 
    115   /*  Setup constants for distance calculation
    116   */
    117   /* TODO: check numbers for non-zero */
    118   prep->add.scale = (prdata)((2 * imelda_scale * imelda_scale) / MUL_SCALE
    119                              + 0.5) - (prdata)0.5;
    120   prep->add.inv_scale = (prdata)(((float)(0x01 << 12) * MUL_SCALE) /
    121                                  (2 * imelda_scale * imelda_scale) + 0.5) -
    122                         (prdata)0.5;
    123   prep->mul.multable_factor_gaussian = 1;
    124   prep->mul.multable_factor = (prdata)(((MUL_SCALE * (0x01 << EUCLID_SHIFT)
    125                                          * prep->uni_score_scale)
    126                                         / (2 * (imelda_scale * imelda_scale
    127                                                 * FUDGE_FACTOR * FUDGE_FACTOR))) / 128 + 0.5)
    128                               - (prdata)0.5;
    129   prep->mul.grand_mod_cov = (prdata)((MUL_SCALE * prep->uni_score_scale *
    130                                       prep->whole_dim *
    131                                       log((imelda_scale * FUDGE_FACTOR) /
    132                                           (SIGMA_BIAS * root_pi_over_2))) / 128 + 0.5)
    133                             - (prdata)0.5 - prep->uni_score_offset;
    134   prep->mul.grand_mod_cov_gaussian = (prdata)(2 * imelda_scale * imelda_scale *
    135                                      prep->use_dim *
    136                                      log(imelda_scale /
    137                                          (SIGMA_BIAS * root_pi_over_2)) + 0.5)
    138                                      - (prdata)0.5;
    139 #if DEBUG
    140   log_report("grand_mod_cov %.1f, grand_mod_cov_gaussian %.1f\n",
    141              (float)prep->mul.grand_mod_cov,
    142              (float)prep->mul.grand_mod_cov_gaussian);
    143   log_report("multable_factor %f, multable_factor_gaussian %f\n",
    144              (float)prep->mul.multable_factor,
    145              (float)prep->mul.multable_factor_gaussian);
    146 #endif
    147 
    148 
    149   create_cepstrum_offsets(prep);
    150   return;
    151 }
    152 
    153 void clear_preprocessed(preprocessed *prep)
    154 /*
    155 **  To setup the preprocessed structure
    156 */
    157 {
    158   ASSERT(prep);
    159   destroy_cepstrum_offsets(prep);
    160   prep->dim = 0;
    161   FREE((char *)prep->last_frame);
    162   FREE((char *)prep->seq);
    163   FREE((char *)prep->seq_unnorm);
    164   return;
    165 }
    166 
    167 int get_data_frame(preprocessed *prep, utterance_info *utt)
    168 /*
    169 **  To get a frame amount of data and perform preprocessing functions
    170 */
    171 {
    172   int status_code;
    173 
    174   ASSERT(prep);
    175   ASSERT(utt);
    176   if (utt->gen_utt.channorm && !utt->gen_utt.channorm->adj_valid)
    177     convert_adjustment_to_imelda(utt->gen_utt.channorm, prep);
    178   if (utt->gen_utt.dim != prep->dim)
    179     SERVICE_ERROR(UTTERANCE_DIMEN_MISMATCH);
    180 
    181   if (prep->post_proc & VFR)
    182   {
    183     if ((status_code = get_utterance_frame(prep, utt)) <= 0)
    184       return (status_code);
    185 
    186     log_report("get_data_frame vfr not supported\n");
    187     SERVICE_ERROR(FEATURE_NOT_SUPPORTED);
    188   }
    189   else
    190   {
    191     status_code = get_utterance_frame(prep, utt);
    192     if (status_code == 0) return(status_code);
    193     else if (status_code == -1) return(1);
    194   }
    195 
    196   if (prep->chan_offset)
    197     apply_channel_offset(prep);
    198 
    199   /*  Apply linear transformation if necessary
    200   */
    201   if (prep->post_proc & LIN_TRAN)
    202     linear_transform_frame(prep, prep->seq, True);
    203 
    204   memcpy(prep->seq_unnorm, prep->seq, prep->dim * sizeof(imeldata));
    205   if (utt->gen_utt.channorm)
    206     apply_channel_normalization_in_imelda(utt->gen_utt.channorm,
    207                                           prep->seq, prep->seq_unnorm,
    208                                           utt->gen_utt.channorm->dim);
    209   return (1);
    210 }
    211 
    212 int get_utterance_frame(preprocessed *prep, utterance_info *utt)
    213 /*
    214 **  To get a frame amount of data
    215 **  Maintains a single data buffer and passes the pointers to frame of data.
    216 **  Post-increments after copying
    217 */
    218 {
    219   featdata  *frame_ptr;
    220   int ii;
    221 
    222   ASSERT(prep);
    223   ASSERT(utt);
    224 
    225   /*  Get the next data frame in
    226   */
    227   if (getFrameGap(utt->gen_utt.frame) > 0)
    228   {
    229     /*  is it a cloned object */
    230     if (prep->ref_count > 1 && compare_cached_frame(prep, utt))
    231       return (-1);
    232 
    233     frame_ptr = currentRECframePtr(utt->gen_utt.frame);
    234     if (frame_ptr == NULL)
    235       return (0);
    236     if (prep->ref_count > 1)
    237     {
    238       ASSERT(prep->last_frame);
    239       memcpy(prep->last_frame, frame_ptr,
    240              prep->dim* sizeof(featdata));
    241     }
    242     for (ii = 0; ii < utt->gen_utt.dim; ii++)
    243       prep->seq[ii] = (imeldata)frame_ptr[ii];
    244     /*  Apply fast-voice corrections if necessary */
    245     if (utt->gen_utt.frame->haveVoiced)
    246     {
    247       utterance_detection_fixup(utt->gen_utt.frame,
    248                                 &utt->gen_utt.last_push, utt->gen_utt.voice_duration,
    249                                 utt->gen_utt.quiet_duration, utt->gen_utt.unsure_duration);
    250       /*     if (isFrameBufferActive (utt->gen_utt.frame)
    251         && getFrameGap (utt->gen_utt.frame) <= utt->gen_utt.quiet_duration)
    252             SERVICE_ERROR (INTERNAL_ERROR); */
    253       prep->voicing_status =
    254         rec_frame_voicing_status(utt->gen_utt.frame);
    255     }
    256     return (1);
    257   }
    258   return (0);
    259 }
    260 
    261 
    262 int advance_utterance_frame(utterance_info *utt)
    263 /*
    264 **  To get a frame amount of data
    265 */
    266 {
    267   ASSERT(utt);
    268   /*  if more samples are needed then read from file if the type matched
    269   */
    270   /*  Get the next data frame in
    271   */
    272   if (getFrameGap(utt->gen_utt.frame) > 0)
    273   {
    274     if (incRECframePtr(utt->gen_utt.frame) != False)
    275       return (0);
    276     return (1);
    277   }
    278   return (0);
    279 }
    280 
    281 int retreat_utterance_frame(utterance_info *utt)
    282 /*
    283 **  To get a frame amount of data
    284 */
    285 {
    286   ASSERT(utt);
    287 
    288   if (getBlockGap(utt->gen_utt.frame) > 0)
    289   {
    290     if (decRECframePtr(utt->gen_utt.frame) != False)
    291       return (0);
    292     return (1);
    293   }
    294   return (0);
    295 }
    296 
    297 void prepare_data_frame(preprocessed *prep)
    298 {
    299   int ii;
    300   prdata sum_sq;
    301 
    302   sum_sq = 0;
    303 
    304   for (ii = 0; ii < prep->whole_dim; ii++)
    305     sum_sq += (prdata) SQR((prdata)prep->seq[ii]);
    306   prep->seq_sq_sum_whole = -sum_sq;
    307 
    308   ASSERT(prep->whole_dim <= prep->use_dim);
    309   for (ii = 0; ii < prep->use_dim; ii++)
    310     sum_sq += (prdata) SQR((prdata)prep->seq[ii]);
    311   prep->seq_sq_sum = -sum_sq;
    312 
    313   sum_sq = 0;
    314 
    315   for (ii = 0; ii < prep->whole_dim; ii++)
    316     sum_sq += (prdata) SQR((prdata)prep->seq_unnorm[ii]);
    317   prep->seq_unnorm_sq_sum_whole = -sum_sq;
    318 
    319   return;
    320 }
    321 
    322 int utterance_started(utterance_info *utt)
    323 {
    324   ASSERT(utt);
    325   if (utt->gen_utt.frame->haveVoiced
    326       && utt->gen_utt.frame->voicingDetected)
    327     return (True);
    328   else
    329     return (False);
    330 }
    331 
    332 int utterance_ended(utterance_info *utt)
    333 {
    334   ASSERT(utt);
    335   return (utt->gen_utt.frame->utt_ended);
    336 }
    337 
    338 int load_utterance_frame(utterance_info *utt, unsigned char* pUttFrame, int voicing)
    339 {
    340   featdata framdata[MAX_DIMEN];
    341   int      ii;
    342 
    343   ASSERT(utt);
    344   ASSERT(pUttFrame);
    345 
    346   for (ii = 0; ii < utt->gen_utt.frame->uttDim; ii++)
    347     framdata[ii] = (featdata) pUttFrame[ii];
    348 
    349   if (pushSingleFEPframe(utt->gen_utt.frame, framdata, voicing) != False)
    350     return (0);
    351 
    352   return (1);
    353 }
    354 
    355 int copy_utterance_frame(utterance_info *oututt, utterance_info *inutt)
    356 {
    357   int      voicedata;
    358   featdata *framdata;
    359 
    360   ASSERT(oututt);
    361   ASSERT(inutt);
    362 
    363   if ((framdata = currentRECframePtr(inutt->gen_utt.frame)) == NULL)
    364     return (0);
    365 
    366   voicedata = getVoicingCode(inutt->gen_utt.frame, framdata);
    367 
    368   if (pushSingleFEPframe(oututt->gen_utt.frame, framdata, voicedata) != False)
    369     return (0);
    370 
    371   return (1);
    372 }
    373 
    374 int copy_pattern_frame(utterance_info *oututt, preprocessed *prep)
    375 {
    376   int      ii;
    377   featdata frame_ptr[MAX_DIMEN];
    378 
    379   ASSERT(oututt);
    380   ASSERT(prep);
    381   ASSERT(oututt->gen_utt.dim < MAX_DIMEN);
    382   for (ii = 0; ii < oututt->gen_utt.dim; ii++)
    383     frame_ptr[ii] = (featdata) RANGE(prep->seq[ii], 0, 255);
    384   if (pushSingleFEPframe(oututt->gen_utt.frame, frame_ptr,
    385                          prep->voicing_status)
    386       != False) return(0);
    387   return (1);
    388 }
    389 
    390 static void create_cepstrum_offsets(preprocessed *prep)
    391 {
    392   ASSERT(prep);
    393   prep->chan_offset = (imeldata *) CALLOC_CLR(prep->dim,
    394                       sizeof(imeldata), "srec.chan_offset");
    395   return;
    396 }
    397 
    398 void set_cepstrum_offset(preprocessed *prep, int index, int value)
    399 {
    400   ASSERT(prep);
    401   ASSERT(prep->chan_offset);
    402   ASSERT(index >= 0 && index < prep->dim);
    403   prep->chan_offset[index] = (imeldata) value;
    404   return;
    405 }
    406 
    407 static void destroy_cepstrum_offsets(preprocessed *prep)
    408 {
    409   ASSERT(prep);
    410   FREE((char *)prep->chan_offset);
    411   prep->chan_offset = 0;
    412   return;
    413 }
    414 
    415 static void apply_channel_offset(preprocessed *prep)
    416 {
    417   int ii;
    418 
    419   for (ii = 0; ii < prep->dim; ii++)
    420     prep->seq[ii] += prep->chan_offset[ii];
    421   return;
    422 }
    423 
    424 static int compare_cached_frame(preprocessed *prep, utterance_info *utt)
    425 {
    426   int      ii;
    427   featdata *frame_ptr;
    428 
    429   frame_ptr = currentRECframePtr(utt->gen_utt.frame);
    430   if (frame_ptr == NULL)
    431     return (False);
    432   for (ii = 0; ii < utt->gen_utt.dim; ii++)
    433     if (prep->last_frame[ii] != frame_ptr[ii])
    434       return (False);
    435   return (True);
    436 }
    437 
    438 void convert_adjustment_to_imelda(norm_info *norm, preprocessed *prep)
    439 {
    440   int      ii;
    441   imeldata fram[MAX_DIMEN];
    442 
    443   ASSERT(prep);
    444   ASSERT(norm);
    445   for (ii = 0; ii < 12; ii++)      /* TODO: fix dimension properly, and sort out rouding/type */
    446     fram[ii] = (imeldata) norm->adjust[ii]; /* TODO: review types */
    447   for (; ii < prep->dim; ii++)
    448     fram[ii] = 0;
    449 
    450   linear_transform_frame(prep, fram, False);
    451 
    452   for (ii = 0; ii < prep->dim; ii++)
    453     norm->imelda_adjust[ii] = fram[ii];
    454 #if DEBUG
    455   log_report("NORM AUX: ");
    456   for (ii = 0; ii < norm->dim; ii++)
    457     log_report("%d ", (int)norm->imelda_adjust[ii]);
    458   log_report("\n");
    459 #endif
    460   norm->adj_valid = True;
    461   return;
    462 }
    463