Home | History | Annotate | Download | only in include
      1 /*---------------------------------------------------------------------------*
      2  *  pre_desc.h  *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 
     21 
     22 #ifndef _h_pre_desc_
     23 #define _h_pre_desc_
     24 
     25 #ifdef SET_RCSID
     26 static const char pre_desc_h[] = "$Id: pre_desc.h,v 1.3.6.10 2008/03/07 19:41:39 dahan Exp $";
     27 #endif
     28 
     29 
     30 #include "all_defs.h"
     31 #include "hmm_type.h"
     32 #include "specnorm.h"
     33 #ifndef _RTT
     34 #include "duk_io.h"
     35 #endif
     36 
     37 #define DO_SUBTRACTED_SEGMENTATION  0
     38 
     39 #ifndef NONE
     40 #define NONE   0
     41 #endif
     42 #define SCALE   1 /* Scaling the channels */
     43 #define LIN_TRAN  2 /* Linear Transformation */
     44 #define VFR   4 /* Variable frame rate */
     45 #define USE_MULTAB      8 /* Set up multable distance calculations */
     46 
     47 /**
     48  * @todo document
     49  */
     50 typedef struct
     51 {  /* mul-table data types */
     52   unsigned short sigma;
     53   int   num;
     54   short *pdf;
     55 }
     56 mul_table;
     57 
     58 /**
     59  * @todo document
     60  */
     61 typedef struct
     62 {
     63   unsigned short num_dev8_index;
     64   unsigned char  *dev8_index;
     65   unsigned short *wt_index;
     66   short    *gauss_dist_table;
     67   short    **dist_ptr;
     68   prdata    multable_factor; /* euclidean to multable */
     69   prdata    multable_factor_gaussian; /* euclidean to multable */
     70   prdata    grand_mod_cov; /* grand covariance modulus */
     71   prdata    grand_mod_cov_gaussian; /* grand covariance modulus */
     72 }
     73 mul_table_info;
     74 
     75 /**
     76  * @todo document
     77  */
     78 typedef struct
     79 {
     80   const prdata *table;
     81   prdata add_log_limit;
     82   prdata scale;   /* X - scale to log function */
     83   prdata inv_scale;
     84   float logscale;  /* Y - scale to log function */
     85 }
     86 logadd_table_info;
     87 
     88 /**
     89  * @todo document
     90  */
     91 typedef struct
     92 {
     93   unsigned long num;
     94   accdata **between;
     95   accdata *bmean;
     96   accdata **within;
     97   accdata *wmean;
     98 }
     99 transform_info;
    100 
    101 /**
    102  * @todo document
    103  */
    104 typedef struct
    105 {   /* Segmentation parameters */
    106   int  rel_low;
    107   int  rel_high;
    108   int  gap_period;
    109   int  click_period;
    110   int  breath_period;
    111   int  extend_annotation;
    112   int  param;
    113   int         min_initial_quiet_frames;    /* num silence frames needed before input */
    114   int         min_annotation_frames;          /* minimum overall length */
    115   int         max_annotation_frames;          /* maximum overall length */
    116   int         delete_leading_segments;        /* num segments to delete. 0=no action */
    117   int         leading_segment_accept_if_not_found; /* Do not reject segmentation if not found */
    118   int         leading_segment_min_frames;   /* remove unless shorter */
    119   int         leading_segment_max_frames;   /* remove unless exceeded */
    120   int         leading_segment_min_silence_gap_frames;/* remove if good silence gap to next segment */
    121   int  beep_size;  /*X201 beep filter */
    122   int  beep_threshold;  /*X201 beep filter */
    123   int  min_segment_rel_c0; /* Any segment gets deleted whose peak c0 is < max - min_segment_rel_c0 */
    124 
    125 #if DO_SUBTRACTED_SEGMENTATION
    126   int         snr_holdoff;    /* Ignore first n frames when estimating speech level for SNR measure */
    127   int         min_acceptable_snr; /* for an acceptable segmentation */
    128 #endif
    129 }
    130 endpoint_info;
    131 
    132 
    133 /**
    134  * @todo document
    135  */
    136 typedef struct
    137 {  /* processed speech data/front end output */
    138   int  ref_count; /* reference counts */
    139   /* Pattern vector section */
    140   int  dim;  /* dimension of frame vector */
    141   int  use_dim; /* dimension used for recognition */
    142   int  whole_dim; /* reduced feature use. Set unused to 127 (0) on model construction */
    143   int  use_from; /* first channel used for recognition */
    144   featdata *last_frame; /* last frame processed in frame buffer */
    145   imeldata *seq;  /* current valid frame */
    146   imeldata *seq_unnorm; /* current valid frame, for whole-word models */
    147   prdata seq_sq_sum; /* sum of the squared of frames */
    148   prdata seq_sq_sum_whole; /* sum of the squared of frames, for wholeword */
    149   prdata seq_unnorm_sq_sum_whole; /* sum of the squared of frames, for wholeword */
    150   int  voicing_status; /* voicing code */
    151   int  post_proc; /* post processing functions */
    152   imeldata *offset; /* offset vector with transformation */
    153   imeldata **matrix; /* linear transformation matrix */
    154   int  imel_shift; /* Imelda scale factor (in shifts) */
    155   covdata **imelda; /* linear transformation matrix, PMC or RN */
    156   imeldata **invmat; /* inverse transformation matrix */
    157   int  inv_shift; /* inverse Imelda scale factor (in shifts) */
    158   covdata **inverse; /* inverse linear transformation matrix, PMC or RN */
    159 #if PARTIAL_DISTANCE_APPROX /* Gaussian tail approximation? */
    160   int  partial_distance_calc_dim;  /* number of params to calc distance over, before approximating if beyond threshold */
    161   scodata partial_distance_threshold;
    162   prdata partial_distance_calc_threshold;
    163   prdata partial_distance_offset;
    164   prdata global_distance_over_n_params;
    165   int  global_model_means[MAX_DIMEN];
    166   prdata partial_mean_sq_sum;
    167   prdata partial_seq_sq_sum;
    168   prdata partial_seq_unnorm_sq_sum;
    169 #endif
    170   imeldata *chan_offset;
    171   /* Channel Normalization etc */
    172 
    173   /* Tables */
    174   prdata exp_wt[MAX_WTS]; /* weights exp lookup table */
    175   mul_table_info mul;  /* Mul-table */
    176   logadd_table_info add; /* logadd-table */
    177   /* ENC */
    178   booldata is_setup_for_noise;
    179   booldata do_whole_enc; /* to enable ENC */
    180   booldata do_sub_enc; /* to enable ENC */
    181   booldata enc_count;
    182   booldata ambient_valid; /* ambient estimates valid */
    183   imeldata **pmc_fixmat; /* ENC matrix */
    184   imeldata **pmc_fixinv; /* inverse ENC matrix */
    185   covdata **pmc_matrix; /* ENC matrix in float */
    186   covdata **pmc_inverse; /* inverse ENC matrix in float */
    187   int  pmc_matshift; /* scaling */
    188   int  pmc_invshift; /* scaling */
    189   imeldata    *ambient_mean; /* ambient mean vector */
    190   imeldata    *ambient_prof; /* ambient estimates, pseudo space */
    191   imeldata    *ambient_prof_unnorm; /* ambient estimates, unnormalised */
    192   logadd_table_info fbadd; /* logadd-table for ENC */
    193 #if DO_SUBTRACTED_SEGMENTATION
    194   int  mel_dim;
    195   covdata **spec_inverse;
    196   imeldata **spec_fixinv;
    197   int  spec_invshift;
    198   int  *cep_offset;
    199 #endif
    200   /* Parameters */
    201   prdata mix_score_scale; /* Mixture score scaling constant */
    202   prdata uni_score_scale; /* Unimodal score scaling constant */
    203   prdata uni_score_offset; /* Unimodal score offset constant */
    204   prdata imelda_scale;  /* Imelda grand variance */
    205   /* Endpoint data */
    206   endpoint_info end;
    207 
    208 }
    209 preprocessed;
    210 
    211 /**
    212  * @todo document
    213  */
    214 typedef struct
    215 {
    216   preprocessed    *prep; /* The preprocessed data structure */
    217   /* The following stuff cannot be cloned */
    218   booldata do_imelda; /* Alignment based accumulation */
    219   transform_info  imelda_acc;
    220 }
    221 pattern_info;
    222 
    223 #endif /* _h_pre_desc_ */
    224