1 /*---------------------------------------------------------------------------* 2 * front.h * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 21 22 #ifndef _h_front_ 23 #define _h_front_ 24 25 #include "all_defs.h" 26 #include "fronttyp.h" 27 #include "log_tabl.h" 28 #include "duk_err.h" 29 #include "voicing.h" 30 #include "specnorm.h" 31 #include "channorm.h" 32 #include "swicms.h" 33 #ifndef _RTT 34 #include "duk_io.h" 35 #endif 36 #include "fft.h" 37 #include "frontpar.h" /* Shared front end parameters structure. Native data types only. */ 38 39 40 #define SPEC_SUB 0 41 #define SPEC_CORRECT 0 42 #define BIGGER_WINDOW 0 43 #define MIN_WARP_SCALE 0.5 44 #define MAX_WARP_SCALE 1.5 45 46 #define D_FIXED D_LONG 47 48 49 50 #define FRAMERATE 100 51 #define NUM_MEL_FREQS 30 /* up to 3750 Hz. Now >5512 -BP */ 52 #define DELTA 3 53 #define Q2 7 54 #define NP 1025 55 #define NF 40 56 #define NC 40 57 #define MEL_FREQ_ARRAY_SIZE 30 58 59 60 /* Spectral sub def moved from spec_sub.c BP */ 61 #ifdef SET_RCSID 62 static const char spec_sub_h[] = "$Id: front.h,v 1.2.10.9 2007/08/31 17:44:53 dahan Exp $"; 63 #endif 64 65 /** 66 * @todo document 67 */ 68 typedef struct 69 { 70 cepdata *sub_vector; 71 int is_valid; 72 unsigned int frame_dur; 73 cepdata scale; 74 unsigned int count; 75 } 76 spectral_sub_info; 77 78 79 /** 80 * Contains the data storage points associated with a channel. 81 */ 82 typedef struct 83 { 84 size_t mel_dim; 85 86 /* WAVE data */ 87 int buff_size; 88 samdata *outbuff; /* incoming samples buffer */ 89 samdata *refbuff; /* outgoing samples buffer */ 90 fftdata *prebuff; /* buffer for preemphasised data */ 91 fftdata *prerefbuff; /* buffer for preemphasised data outgoing */ 92 int forget_factor; 93 norm_info *channorm; 94 swicms_norm_info *swicms; 95 spect_dist_info *spchchan[MAX_CHAN_DIM]; 96 97 /* FREQ data */ 98 int shift; 99 int num_freq; 100 cepdata *filterbank; 101 cepdata *filterbankref; 102 spectral_sub_info *spectral_sub; 103 104 /* CEP data */ 105 int frame_valid; /* whether frame is valid */ 106 long frame_count; /* frame count */ 107 int frame_delay; /* ignore the first few frames */ 108 cepdata *cep; /* cepstrum coefs. of prev. frames */ 109 cepdata *rasta; 110 featdata *framdata; 111 bigdata lastx; 112 } 113 front_channel; 114 115 116 117 /* This is where the front end objects are defined 118 WAVE (front_wave) 119 FREQ (front_freq) 120 CEP (front_cep) 121 */ 122 123 /** 124 * @todo document 125 */ 126 typedef struct 127 { 128 size_t samdim; 129 int samtyp; 130 int samplerate; 131 coefdata pre_mel; 132 int high_clip; 133 int low_clip; 134 int max_per10000_clip; 135 int max_dc_offset; 136 int high_noise_level_bit; 137 int low_speech_level_bit; 138 int min_samples; 139 } 140 front_wave; 141 142 143 /** 144 * FREQ object. 145 */ 146 typedef struct 147 { 148 int window_length; 149 int samplerate; 150 int framerate; 151 int frame_period; /* the following 3 are private */ 152 ESR_BOOL do_spectral_sub; 153 int do_nonlinear_filter; 154 ESR_BOOL do_filterbank_input; 155 ESR_BOOL do_filterbank_dump; 156 float warp_scale; /*## */ 157 float piecewise_start; /*## */ 158 int low_cut; 159 int high_cut; 160 int num_fb_to_use; 161 int *spectrum_filter; /* List of FFT taps to filter */ 162 int spectrum_filter_num; 163 fftdata peakpickup; 164 fftdata peakpickdown; 165 int cut_off_below, cut_off_above; 166 int np, ns, nf, lognp; 167 fftdata fcb[NF]; 168 fftdata *fc; 169 int fcmid[NF+2]; 170 fftdata fcscl[NF+1], framp[NP+1]; 171 fftdata *ham; 172 fft_info fft; 173 } 174 front_freq; 175 176 177 /** 178 * CEP object. 179 */ 180 typedef struct 181 { 182 ESR_BOOL do_dd_mel; 183 ESR_BOOL do_rasta; 184 int do_scales; 185 ESR_BOOL do_plp; 186 size_t mel_dim; 187 int lpc_order; 188 ESR_BOOL do_skip_even_frames; 189 ESR_BOOL do_smooth_c0; 190 int spectral_sub_frame_dur; 191 coefdata spec_sub_scale; 192 int forget_factor; /* preserve % of previous hist */ 193 int sv6_margin; 194 cepdata *melA_scale; 195 cepdata *melB_scale; 196 cepdata *dmelA_scale; 197 cepdata *dmelB_scale; 198 cepdata *ddmelA_scale; 199 cepdata *ddmelB_scale; 200 cepdata *rastaA_scale; 201 cepdata *rastaB_scale; 202 cepdata *mel_offset; 203 cepdata *mel_loop; 204 cepdata *cs; 205 log_table_info logtab; 206 } 207 front_cep; 208 209 210 /** 211 * @todo document 212 */ 213 typedef struct 214 { 215 front_wave *waveobj; 216 front_freq *freqobj; 217 front_cep *cepobj; 218 /* Internal memberrs that may need to be configurable. Currently constants 219 size_t mel_dim; 220 */ 221 } 222 front_config; 223 224 225 /* Front end function declarations follow */ 226 227 228 front_config *config_frontend(void); 229 int make_frame(front_channel *channel, front_wave *waveobj, 230 front_freq *freqobj, front_cep *cepobj, 231 voicing_info *voice, 232 samdata *inFramesWorth, samdata *refFramesWorth, 233 int num_samples, 234 featdata *framdata, featdata *voicedata); 235 void standard_front_init(front_config *config, front_freq *freqobj); 236 void init_cepstrum_analysis(front_config *config, front_freq *freqobj); 237 238 239 void load_samples(front_channel *channel, int window_length, 240 samdata *incom, samdata *outgo, int nsam); 241 void filterbank_emulation(front_channel * channel, front_wave *waveobj, 242 front_freq *freqobj, front_cep *cepobj, samdata *income, samdata *outgo, 243 int num_samples); 244 void cepstrum_params(front_channel *channel, front_wave *waveobj, 245 front_freq *freqobj, front_cep *cepobj); 246 int make_std_frame(front_channel *channel, front_cep *cepobj, 247 featdata *hFrame); 248 int purge_std_frames(front_channel *channel, front_cep *cepobj, 249 featdata *hFrame, int frame); 250 251 void init_spectral_sub(front_config *config, front_freq *freqobj); 252 void close_spectral_sub(front_freq *freqobj); 253 void reset_spectral_sub(front_freq *freqobj); 254 void do_spectral_subtraction(cepdata *fbo, spectral_sub_info* spectral_sub, 255 int num_freqs); 256 257 int create_spectrum_filter(front_freq *freqobj, int *freq, int *spread); 258 259 void clear_spectrum_filter(front_freq *freqobj); 260 261 front_config *create_config_object(void) ; 262 void setup_config_object(front_config *config, front_parameters *parameters); 263 void clear_config_object(front_config *config); 264 void delete_config_object(front_config *config); 265 266 267 front_channel *create_channel_object(void) ; 268 void delete_channel_object(front_channel *channel); 269 void setup_channel_object(front_channel *channel, front_wave *waveobj, 270 front_freq *freqobj, front_cep *cepobj); 271 void clear_channel_object(front_channel *channel); 272 void reset_channel_object(front_channel *channel); 273 274 /* 275 ** Fixed pont front end 276 ** 277 ** Function: Scaling (in bit shifts) 278 ** 279 ** preemphasis -1 280 ** hamming-window 0 281 ** fft 0 282 ** magnitude HFN 283 ** filterbank 0 284 ** 285 ** log 286 ** cosine HFN- 287 ** regression 0 288 ** scaling 289 */ 290 291 292 #endif /* _h_front_ */ 293