1 /*---------------------------------------------------------------------------* 2 * pre_desc.h * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 21 22 #ifndef _h_pre_desc_ 23 #define _h_pre_desc_ 24 25 #ifdef SET_RCSID 26 static const char pre_desc_h[] = "$Id: pre_desc.h,v 1.3.6.10 2008/03/07 19:41:39 dahan Exp $"; 27 #endif 28 29 30 #include "all_defs.h" 31 #include "hmm_type.h" 32 #include "specnorm.h" 33 #ifndef _RTT 34 #include "duk_io.h" 35 #endif 36 37 #define DO_SUBTRACTED_SEGMENTATION 0 38 39 #ifndef NONE 40 #define NONE 0 41 #endif 42 #define SCALE 1 /* Scaling the channels */ 43 #define LIN_TRAN 2 /* Linear Transformation */ 44 #define VFR 4 /* Variable frame rate */ 45 #define USE_MULTAB 8 /* Set up multable distance calculations */ 46 47 /** 48 * @todo document 49 */ 50 typedef struct 51 { /* mul-table data types */ 52 unsigned short sigma; 53 int num; 54 short *pdf; 55 } 56 mul_table; 57 58 /** 59 * @todo document 60 */ 61 typedef struct 62 { 63 unsigned short num_dev8_index; 64 unsigned char *dev8_index; 65 unsigned short *wt_index; 66 short *gauss_dist_table; 67 short **dist_ptr; 68 prdata multable_factor; /* euclidean to multable */ 69 prdata multable_factor_gaussian; /* euclidean to multable */ 70 prdata grand_mod_cov; /* grand covariance modulus */ 71 prdata grand_mod_cov_gaussian; /* grand covariance modulus */ 72 } 73 mul_table_info; 74 75 /** 76 * @todo document 77 */ 78 typedef struct 79 { 80 const prdata *table; 81 prdata add_log_limit; 82 prdata scale; /* X - scale to log function */ 83 prdata inv_scale; 84 float logscale; /* Y - scale to log function */ 85 } 86 logadd_table_info; 87 88 /** 89 * @todo document 90 */ 91 typedef struct 92 { 93 unsigned long num; 94 accdata **between; 95 accdata *bmean; 96 accdata **within; 97 accdata *wmean; 98 } 99 transform_info; 100 101 /** 102 * @todo document 103 */ 104 typedef struct 105 { /* Segmentation parameters */ 106 int rel_low; 107 int rel_high; 108 int gap_period; 109 int click_period; 110 int breath_period; 111 int extend_annotation; 112 int param; 113 int min_initial_quiet_frames; /* num silence frames needed before input */ 114 int min_annotation_frames; /* minimum overall length */ 115 int max_annotation_frames; /* maximum overall length */ 116 int delete_leading_segments; /* num segments to delete. 0=no action */ 117 int leading_segment_accept_if_not_found; /* Do not reject segmentation if not found */ 118 int leading_segment_min_frames; /* remove unless shorter */ 119 int leading_segment_max_frames; /* remove unless exceeded */ 120 int leading_segment_min_silence_gap_frames;/* remove if good silence gap to next segment */ 121 int beep_size; /*X201 beep filter */ 122 int beep_threshold; /*X201 beep filter */ 123 int min_segment_rel_c0; /* Any segment gets deleted whose peak c0 is < max - min_segment_rel_c0 */ 124 125 #if DO_SUBTRACTED_SEGMENTATION 126 int snr_holdoff; /* Ignore first n frames when estimating speech level for SNR measure */ 127 int min_acceptable_snr; /* for an acceptable segmentation */ 128 #endif 129 } 130 endpoint_info; 131 132 133 /** 134 * @todo document 135 */ 136 typedef struct 137 { /* processed speech data/front end output */ 138 int ref_count; /* reference counts */ 139 /* Pattern vector section */ 140 int dim; /* dimension of frame vector */ 141 int use_dim; /* dimension used for recognition */ 142 int whole_dim; /* reduced feature use. Set unused to 127 (0) on model construction */ 143 int use_from; /* first channel used for recognition */ 144 featdata *last_frame; /* last frame processed in frame buffer */ 145 imeldata *seq; /* current valid frame */ 146 imeldata *seq_unnorm; /* current valid frame, for whole-word models */ 147 prdata seq_sq_sum; /* sum of the squared of frames */ 148 prdata seq_sq_sum_whole; /* sum of the squared of frames, for wholeword */ 149 prdata seq_unnorm_sq_sum_whole; /* sum of the squared of frames, for wholeword */ 150 int voicing_status; /* voicing code */ 151 int post_proc; /* post processing functions */ 152 imeldata *offset; /* offset vector with transformation */ 153 imeldata **matrix; /* linear transformation matrix */ 154 int imel_shift; /* Imelda scale factor (in shifts) */ 155 covdata **imelda; /* linear transformation matrix, PMC or RN */ 156 imeldata **invmat; /* inverse transformation matrix */ 157 int inv_shift; /* inverse Imelda scale factor (in shifts) */ 158 covdata **inverse; /* inverse linear transformation matrix, PMC or RN */ 159 #if PARTIAL_DISTANCE_APPROX /* Gaussian tail approximation? */ 160 int partial_distance_calc_dim; /* number of params to calc distance over, before approximating if beyond threshold */ 161 scodata partial_distance_threshold; 162 prdata partial_distance_calc_threshold; 163 prdata partial_distance_offset; 164 prdata global_distance_over_n_params; 165 int global_model_means[MAX_DIMEN]; 166 prdata partial_mean_sq_sum; 167 prdata partial_seq_sq_sum; 168 prdata partial_seq_unnorm_sq_sum; 169 #endif 170 imeldata *chan_offset; 171 /* Channel Normalization etc */ 172 173 /* Tables */ 174 prdata exp_wt[MAX_WTS]; /* weights exp lookup table */ 175 mul_table_info mul; /* Mul-table */ 176 logadd_table_info add; /* logadd-table */ 177 /* ENC */ 178 booldata is_setup_for_noise; 179 booldata do_whole_enc; /* to enable ENC */ 180 booldata do_sub_enc; /* to enable ENC */ 181 booldata enc_count; 182 booldata ambient_valid; /* ambient estimates valid */ 183 imeldata **pmc_fixmat; /* ENC matrix */ 184 imeldata **pmc_fixinv; /* inverse ENC matrix */ 185 covdata **pmc_matrix; /* ENC matrix in float */ 186 covdata **pmc_inverse; /* inverse ENC matrix in float */ 187 int pmc_matshift; /* scaling */ 188 int pmc_invshift; /* scaling */ 189 imeldata *ambient_mean; /* ambient mean vector */ 190 imeldata *ambient_prof; /* ambient estimates, pseudo space */ 191 imeldata *ambient_prof_unnorm; /* ambient estimates, unnormalised */ 192 logadd_table_info fbadd; /* logadd-table for ENC */ 193 #if DO_SUBTRACTED_SEGMENTATION 194 int mel_dim; 195 covdata **spec_inverse; 196 imeldata **spec_fixinv; 197 int spec_invshift; 198 int *cep_offset; 199 #endif 200 /* Parameters */ 201 prdata mix_score_scale; /* Mixture score scaling constant */ 202 prdata uni_score_scale; /* Unimodal score scaling constant */ 203 prdata uni_score_offset; /* Unimodal score offset constant */ 204 prdata imelda_scale; /* Imelda grand variance */ 205 /* Endpoint data */ 206 endpoint_info end; 207 208 } 209 preprocessed; 210 211 /** 212 * @todo document 213 */ 214 typedef struct 215 { 216 preprocessed *prep; /* The preprocessed data structure */ 217 /* The following stuff cannot be cloned */ 218 booldata do_imelda; /* Alignment based accumulation */ 219 transform_info imelda_acc; 220 } 221 pattern_info; 222 223 #endif /* _h_pre_desc_ */ 224