1 /*---------------------------------------------------------------------------* 2 * spec_anl.c * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 21 22 #include <stdlib.h> 23 #ifndef _RTT 24 #include <stdio.h> 25 #endif 26 #include <string.h> 27 #include <math.h> 28 #include <limits.h> 29 #include <assert.h> 30 31 #include "hmm_desc.h" 32 #include "front.h" 33 #include "pendian.h" 34 #include "portable.h" 35 #include "LCHAR.h" 36 37 #include "../clib/memmove.h" 38 39 #define DEBUG 0 40 41 #include "sh_down.h" 42 43 static int sort_ints_unique(int *list, int *num); 44 //static void mask_fft_taps(fftdata *data, int num, front_freq *freqobj); 45 46 void peakpick(front_freq *freqobj, fftdata *density, int num_freq); 47 void magsq(fftdata *x, fftdata *y, fftdata *z, int ns); 48 49 void preemph(fftdata *data, int window_len, samdata *wav_data, 50 int num_samples, coefdata pre_mel, 51 bigdata *last_sample); 52 void filtbank(front_freq *freqobj, fftdata *density, cepdata *fbo); 53 54 55 56 void filterbank_emulation(front_channel * channel, front_wave *waveobj, 57 front_freq *freqobj, front_cep *cepobj, samdata *income, 58 samdata *outgo, int num_samples) 59 { 60 /* Part II. Mel cepstrum coefficients 61 ** 62 ** Maintain parameter queue */ 63 MEMMOVE(channel->cep + (channel->mel_dim + 1), channel->cep, 64 (Q2 - 1) *(channel->mel_dim + 1), sizeof(cepdata)); 65 channel->shift = 0; 66 67 /* 2.01 Pre-emphasize waveform 68 Only the new samples are preemphasized. To carry on from the previous call, 69 the last sample value is stored in lastx. 70 */ 71 preemph(channel->prebuff, freqobj->window_length, income, num_samples, 72 waveobj->pre_mel, &channel->lastx); 73 74 #if DEBUG 75 log_report("preemphasized data\n"); 76 write_scaled_frames(freqobj->window_length, 1, channel->prebuff, D_FIXED, (float) 1 / (0x01 << WAVE_SHIFT)); 77 #endif 78 /****************************************************************************** 79 ** The "NEW" fft performs shifting operations in fixed point, to maximise 80 ** precision. 81 ** 82 *******************************************************************************/ 83 channel->shift += place_sample_data(&freqobj->fft, channel->prebuff, 84 freqobj->ham, freqobj->window_length); 85 #if DEBUG 86 log_report("windowed data\n"); 87 if (channel->shift >= 0) 88 { 89 write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.real, D_FIXED, (float)(0x01 << channel->shift)); 90 write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.imag, D_FIXED, (float)(0x01 << channel->shift)); 91 } 92 else 93 { 94 write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.real, D_FIXED, (float)1 / (0x01 << -channel->shift)); 95 write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.imag, D_FIXED, (float)1 / (0x01 << -channel->shift)); 96 } 97 #endif 98 channel->shift *= 2; 99 channel->shift += fft_perform_and_magsq(&freqobj->fft); 100 101 #if DEBUG 102 log_report("After magnitude squared (%d)\n", channel->frame_count); 103 if (channel->shift >= 0) 104 write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.real, D_FIXED, (float)(0x01 << (channel->shift))); 105 else 106 write_scaled_frames(freqobj->fft.size, 1, freqobj->fft.real, D_FIXED, (float)1 / (0x01 << (- channel->shift))); 107 #endif 108 109 #if DEBUG 110 log_report("After magnitude squared: "); 111 if (channel->shift >= 0) 112 write_scaled_frames(freqobj->fft.size, 1, (void *)freqobj->fft.real, D_FIXED, (float)(0x01 << channel->shift)); 113 else 114 write_scaled_frames(freqobj->fft.size, 1, (void *)freqobj->fft.real, D_FIXED, (float)1 / (0x01 << -channel->shift)); 115 #endif 116 117 if (freqobj->do_nonlinear_filter) 118 peakpick(freqobj, freqobj->fft.real, freqobj->fft.size + 1); 119 120 #if DEBUG 121 log_report("After peakpick: "); 122 if (channel->shift >= 0) 123 write_scaled_frames(freqobj->fft.size + 1, 1, (void *)freqobj->fft.real, D_FIXED, (float)(0x01 << channel->shift)); 124 else 125 write_scaled_frames(freqobj->fft.size + 1, 1, (void *)freqobj->fft.real, D_FIXED, (float)1 / (0x01 << -channel->shift)); 126 #endif 127 128 /* 2.23 Apply filterbank emulation */ 129 channel->shift += RAMP_SHIFT; 130 filtbank(freqobj, freqobj->fft.real, channel->filterbank); 131 #if DEBUG 132 log_report("After filterbanked: "); 133 if (channel->shift >= 0) 134 write_scaled_frames(freqobj->nf, 1, channel->filterbank, D_FIXED, (float)(0x01 << channel->shift)); 135 else 136 write_scaled_frames(freqobj->nf, 1, channel->filterbank, D_FIXED, (float)1 / (0x01 << -channel->shift)); 137 #endif 138 139 return; 140 } 141 142 143 void preemph(fftdata *data, int window_len, samdata *wav_data, 144 int num_samples, coefdata pre_mel, 145 bigdata *last_sample) 146 /* 147 ** pre-emphasize on speech data, check for end of data */ 148 /* SCALE: In this stage we're introducing a scale factor of 2 */ 149 { 150 int i; 151 bigdata temp; 152 153 ASSERT(data); 154 ASSERT(last_sample); 155 ASSERT(wav_data); 156 ASSERT(num_samples >= 0); 157 if (num_samples > window_len) 158 num_samples = window_len; 159 160 if (num_samples < window_len) 161 MEMMOVE(data, data + num_samples, (window_len - num_samples), 162 sizeof(fftdata)); 163 data += window_len - num_samples; 164 165 /* If no preemphasis to do 166 */ 167 if (pre_mel == 0) 168 { /* dont't shift */ 169 for (i = 0; i < num_samples; i++) 170 data[i] = (fftdata) wav_data[i]; 171 return; 172 } 173 174 /* Otherwise do the preemphasis 175 */ 176 for (i = 0; i < num_samples; i++) 177 { 178 temp = SHIFT_UP((bigdata)wav_data[i], COEFDATA_SHIFT); 179 data[i] = (fftdata)(SHIFT_DOWN(temp - (*last_sample), COEFDATA_SHIFT)); 180 *last_sample = (bigdata)pre_mel * wav_data[i]; 181 182 } 183 return; 184 } 185 186 void magsq(fftdata *x, fftdata *y, fftdata *z, int ns) 187 /* 188 ** magnitude squared, tailored for TI FFT routines 189 ** The dynamic range should fit 32 - RAMP_SHIFT */ 190 { 191 int i; 192 193 ASSERT((float)x[0] *(float)x[0] < LONG_MAX); 194 ASSERT((float)x[0] *(float)x[0] > LONG_MIN); 195 z[0] = (fftdata)((bigdata)x[0] * (bigdata)x[0]); 196 for (i = 1; i < ns; i++) 197 { 198 ASSERT(((fftdata)x[i] *(fftdata)x[i]) >= 0); 199 ASSERT(((fftdata)y[i] *(fftdata)y[i]) >= 0); 200 ASSERT((float)x[i] *(float)x[i] < LONG_MAX); 201 ASSERT((float)x[i] *(float)x[i] > LONG_MIN); 202 ASSERT((float)y[i] *(float)y[i] < LONG_MAX); 203 ASSERT((float)y[i] *(float)y[i] > LONG_MIN); 204 /* z[i]= (fftdata) SHIFT_DOWN ((bigdata)x[i] * (bigdata)x[i] + (bigdata)y[i] * (bigdata)y[i], RAMP_SHIFT); 205 */ 206 z[i] = (fftdata)(((bigdata)x[i] * (bigdata)x[i]) 207 + ((bigdata)y[i] * (bigdata)y[i])); 208 if (z[i] <= 0) 209 z[i] = (fftdata) 1; 210 } 211 return; 212 } 213 214 void peakpick(front_freq *freqobj, fftdata *density, int num_freq) 215 { 216 int i; 217 fftdata peak; 218 fftdata bdecay; 219 fftdata fdecay; 220 int first; 221 int last; 222 223 ASSERT(freqobj); 224 /* Fixed pt requires scale up of COEFDATA_SHIFT on these pars (coefdata) */ 225 bdecay = freqobj->peakpickdown; 226 fdecay = freqobj->peakpickup; 227 228 if ((bdecay <= (fftdata) 0.0) && (fdecay <= (fftdata) 0.0)) 229 return; 230 231 first = freqobj->cut_off_below; 232 last = freqobj->cut_off_above; 233 /* this filters from cut_off_below to */ 234 /* cut_off_above inclusive */ 235 236 if (last >= num_freq) 237 last = num_freq - 1; 238 /* as most routines seem to check both */ 239 /* limits */ 240 241 if (bdecay > 0.0) 242 { 243 ASSERT(density[last] >= 0); 244 peak = density[last]; 245 for (i = last - 1; i >= first; i--) 246 { 247 peak = (fftdata)(SHIFT_DOWN((bigdata)peak, COEFDATA_SHIFT) * (bigdata)bdecay); 248 ASSERT(peak >= 0); 249 if (density[i] > peak) 250 peak = density[i]; 251 else 252 density[i] = peak; 253 } 254 } 255 if (fdecay > 0.0) 256 { 257 peak = density[first]; 258 for (i = first + 1; i <= last; i++) 259 { 260 peak = (fftdata)(SHIFT_DOWN((bigdata)peak, COEFDATA_SHIFT) * (bigdata)fdecay); 261 if (density[i] > peak) 262 peak = density[i]; 263 else 264 density[i] = peak; 265 } 266 } 267 return; 268 } 269 270 void filtbank(front_freq *freqobj, fftdata *density, cepdata *fbo) 271 /* 272 ** pwr spect -> filter bank output (linear) */ 273 { 274 int i, j, k; 275 bigdata t, sum, mom, nxt; 276 277 /* Scale down before starting mel-filterbank operations 278 */ 279 for (i = 0; i < freqobj->cut_off_above; i++) 280 density[i] = SHIFT_DOWN(density[i], RAMP_SHIFT); 281 282 j = MAX(freqobj->fcmid[0], freqobj->cut_off_below); 283 nxt = 0; 284 for (; j < freqobj->fcmid[1]; j++) 285 { 286 ASSERT(((float)nxt + (float)freqobj->framp[j] *(float)density[j]) < LONG_MAX); 287 ASSERT(((float)nxt + (float)freqobj->framp[j] *(float)density[j]) > -LONG_MAX); 288 nxt += (bigdata) SHIFT_DOWN((bigdata)freqobj->framp[j] * (bigdata)density[j], RAMP_SHIFT); 289 } 290 for (i = 0, k = 2; i < freqobj->nf; i++, k++) 291 { 292 sum = mom = 0; 293 for (; j < freqobj->fcmid[k]; j++) 294 { 295 /* TODO: Tidy up this fixed pt shifting. BP */ 296 297 ASSERT((float) freqobj->framp[j] *(float) density[j] < LONG_MAX); 298 ASSERT((float) freqobj->framp[j] *(float) density[j] > LONG_MIN); 299 ASSERT((float) sum + (float)density[j] < LONG_MAX); 300 ASSERT((float) sum + (float)density[j] > LONG_MIN); 301 sum += (bigdata) density[j]; 302 ASSERT((float) mom + (float) freqobj->framp[j] *(float) density[j] < LONG_MAX); 303 ASSERT((float) mom + (float) freqobj->framp[j] *(float) density[j] > LONG_MIN); 304 305 mom += (bigdata)(long) SHIFT_DOWN((bigdata)freqobj->framp[j] * (bigdata)density[j], RAMP_SHIFT); 306 } 307 308 ASSERT(((float)nxt + (float)sum - (float)mom) < LONG_MAX); 309 ASSERT(((float)nxt + (float)sum - (float)mom) > LONG_MIN); 310 311 /* TODO: refine this expression. Shift down fcscl in advance. */ 312 t = (bigdata)((SHIFT_UP(nxt + sum - mom, HALF_RAMP_SHIFT) 313 + SHIFT_DOWN(freqobj->fcscl[i+1], HALF_RAMP_SHIFT + 1)) 314 / SHIFT_DOWN(freqobj->fcscl[i+1], HALF_RAMP_SHIFT)); 315 /* TODO: cleanup and also check for division by zero */ 316 nxt = mom; 317 fbo[i] = (cepdata) t; 318 } 319 return; 320 } 321 322 int create_spectrum_filter(front_freq *freqobj, int *freq, int *spread) 323 { 324 int ii, jj, freq_step; 325 int lo, hi; 326 ASSERT(freqobj); 327 ASSERT(freqobj->spectrum_filter_num == 0); 328 ASSERT(freqobj->samplerate > 0); 329 /* Convert to FFT taps. Mark adjacent taps as well as taps within spread */ 330 freq_step = (freqobj->samplerate << 12) / (2 * freqobj->fft.size); 331 freqobj->spectrum_filter = (int *) CALLOC_CLR(freqobj->fft.size + 1, sizeof(int), "cfront.spectrum_filter"); 332 freqobj->spectrum_filter_num = 0; 333 for (ii = 0 ; ii < MAX_FILTER_NUM; ii++) 334 { 335 if (freq[ii] == 0) 336 continue; 337 lo = (((freq[ii] - spread[ii]) * 2 * freqobj->fft.size) + freqobj->samplerate / 2) / freqobj->samplerate; 338 hi = (((freq[ii] + spread[ii]) * 2 * freqobj->fft.size) + freqobj->samplerate / 2) / freqobj->samplerate; 339 340 341 for (jj = lo; jj <= hi;jj++) 342 { 343 if (freqobj->spectrum_filter_num >= (int) freqobj->fft.size) 344 SERVICE_ERROR(MAX_FILTER_POINTS_EXCEEDED); 345 freqobj->spectrum_filter[freqobj->spectrum_filter_num++] = jj; 346 } 347 /* jj=0; 348 while (((jj+1)*freq_step)>>12 <= freq[ii]-spread[ii]) 349 jj++; 350 while (((jj-1)*freq_step>>12) < freq[ii]+spread[ii]){ 351 if (freqobj->spectrum_filter_num >= (int) freqobj->fft.size) 352 SERVICE_ERROR (MAX_FILTER_POINTS_EXCEEDED); 353 freqobj->spectrum_filter[freqobj->spectrum_filter_num++]= jj; 354 jj++; 355 } 356 */ 357 } 358 sort_ints_unique(freqobj->spectrum_filter, &freqobj->spectrum_filter_num); 359 return (freqobj->spectrum_filter_num); 360 } 361 362 void clear_spectrum_filter(front_freq *freqobj) 363 { 364 ASSERT(freqobj->spectrum_filter); 365 if (freqobj->spectrum_filter) 366 FREE((char *) freqobj->spectrum_filter); 367 freqobj->spectrum_filter = NULL; 368 freqobj->spectrum_filter_num = 0; 369 return; 370 } 371 372 static int sort_ints_unique(int *list, int *num) 373 { 374 /* Sort a list of ints and make unique */ 375 int ii, jj, temp; 376 for (ii = 1; ii < *num; ii++) 377 { 378 for (jj = 0; jj < ii; jj++) 379 { 380 temp = list[ii]; 381 if (temp < list[jj]) 382 { 383 MEMMOVE(&list[jj+1], &list[jj], (ii - jj), sizeof(int)); 384 list[jj] = temp; 385 break; 386 } 387 if (temp == list[jj]) 388 { 389 MEMMOVE(&list[ii], &list[ii+1], (*num - ii), sizeof(int)); 390 391 392 393 (*num)--; 394 } 395 } 396 } 397 return *num; 398 } 399 400 //static void mask_fft_taps(fftdata *data, int num, front_freq *freqobj) 401 //{ 402 // for (int i = 0; i < freqobj->spectrum_filter_num; ++i) 403 // { 404 // ASSERT(freqobj->spectrum_filter[i] < num); 405 // data[freqobj->spectrum_filter[i]] = 0; 406 // } 407 //} 408 409 /* -------------------------------------------------- 410 freq_warp will do pure linear warping if the warp 411 scale > 1.0. Otherwise it will do piecewise warp 412 which means warping the second part, from xstart 413 to the bandwidth with another scale which is 414 determined by b and c in the formulation. 415 In general, 0.7 < wscale < 1.4, and xstart <= 1 416 08/15/01, Puming Zhan 417 --------------------------------------------------- */ 418 void freq_warp(front_freq *freqobj, fftdata *inbuf, int ns) 419 { 420 int i; 421 int nsE; 422 float x1, y1, b, c, wscale; 423 fftdata *tmpbuf; 424 425 ASSERT(freqobj && inbuf); 426 427 ASSERT(freqobj->warp_scale != 0); 428 429 wscale = freqobj->warp_scale; 430 x1 = freqobj->piecewise_start; 431 tmpbuf = (fftdata *) CALLOC(ns, sizeof(fftdata), "cfront.tmpbuf"); 432 433 if (wscale < MIN_WARP_SCALE || wscale > MAX_WARP_SCALE) 434 { 435 SERVICE_ERROR(WARP_SCALE); 436 } 437 if (x1 > 1.0 || x1 < 0.5) 438 { 439 SERVICE_ERROR(PIECEWISE_START); 440 } 441 442 y1 = x1 < wscale ? (float)x1 / wscale : (float)1.0; 443 444 b = y1 < 1.0 ? (float)((1.0 - x1) / (1.0 - y1)) : (float)0.0; 445 446 c = (float)((1.0 - b) * (ns - 1)); 447 448 nsE = (int)(y1 * (ns - 1)); 449 450 for (i = 0; i < ns; i++) 451 { 452 float x = i > nsE ? b * i + c : wscale * i; 453 int u = (int)ceil((double)x); 454 int l = (int)floor((double)x); 455 float w1 = x - l; 456 float w2 = 1 - w1; 457 458 if (u < ns) 459 { 460 tmpbuf[i] = (int)(w1 * inbuf[u] + w2 * inbuf[l]); 461 } 462 else 463 { 464 tmpbuf[i] = inbuf[ns-1]; 465 } 466 } 467 468 /* need to copy the warped fft into inbuf */ 469 /* because the following function filtbank() */ 470 /* will take inbuf as input */ 471 /* considering that this function will be */ 472 /* for every frame, it may not be a good idea*/ 473 /* to do malloc here */ 474 475 for (i = 0; i < ns; i++) 476 inbuf[i] = tmpbuf[i]; 477 478 FREE((char *) tmpbuf); 479 } 480