1 /*---------------------------------------------------------------------------* 2 * srec.c * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 #include <stdlib.h> 21 #include <string.h> 22 #include <math.h> 23 24 #include "pstdio.h" 25 #include "passert.h" 26 #include "portable.h" 27 28 #include "hmm_desc.h" 29 #include "utteranc.h" 30 #include "hmmlib.h" 31 #include "srec_sizes.h" 32 #include "search_network.h" 33 #include "srec_context.h" 34 #include "srec.h" 35 #include "srec_stats.h" 36 #include "srec_debug.h" 37 #include "srec_tokens.h" 38 #include "word_lattice.h" 39 #include "swimodel.h" 40 #if USE_COMP_STATS 41 #include "comp_stats.h" 42 #endif 43 #include "c42mul.h" 44 45 #ifdef SET_RCSID 46 static const char *rcsid = 0 ? (const char *) &rcsid : 47 "$Id: srec.c,v 1.39.4.31 2008/06/23 17:20:39 dahan Exp $"; 48 #endif 49 50 #define SILENCE_MODEL_INDEX 0 51 #define PRUNE_TIGHTEN 0.9 /*if we run out of room in the state arrays, 52 keep multiplying pruning thresh by this amount 53 until there is room */ 54 55 /*--------------------------------------------------------------------------* 56 * * 57 * protos * 58 * * 59 *--------------------------------------------------------------------------*/ 60 static void reset_cost_offsets(multi_srec* rec, frameID current_search_frame, 61 costdata current_best_cost); 62 static void update_internal_hmm_states(srec *rec, costdata *pcurrent_prune_delta, 63 costdata *pcurrent_best_cost, 64 costdata *precomputed_model_scores); 65 66 /*--------------------------------------------------------------------------* 67 * * 68 * utils * 69 * * 70 *--------------------------------------------------------------------------*/ 71 72 static void dump_core(char *msg) 73 { 74 PLogError ( msg ); 75 ASSERT(0); 76 } 77 78 static altword_token* reprune_altword_token_batch(srec* rec, 79 altword_token* batch, 80 bigcostdata costlimit) 81 { 82 altword_token *awtoken, *next_awtoken; 83 altword_token **awtokenp; 84 85 /* look for previous invalidate, see below */ 86 if (batch->costbasis == MAXcostdata / 2) 87 { /* was costdelta // costlimit */ 88 free_altword_token(rec, batch); 89 return AWTNULL; 90 } 91 /* a flag to check whether we already pruned this batch would be nice */ 92 93 /* first prune the CDR of the list, ie everything but the head */ 94 awtokenp = &batch->next_token; 95 for (awtoken = batch->next_token; awtoken != AWTNULL; awtoken = next_awtoken) 96 { 97 next_awtoken = awtoken->next_token; 98 if ((bigcostdata)batch->costbasis + awtoken->costdelta > costlimit) 99 { 100 (*awtokenp) = awtoken->next_token; 101 awtoken->refcount = 1; /* to make sure it frees */ 102 free_altword_token(rec, awtoken); 103 } 104 else 105 awtokenp = &awtoken->next_token; 106 } 107 108 /* now see about the CAR of the list, ie the head */ 109 if ((bigcostdata)(batch->costbasis) + batch->costdelta < costlimit) 110 { 111 /* head of list survives pruning => no problem */ 112 } 113 else if (batch->next_token != AWTNULL) 114 { 115 /* head of list does not survive => since we can't change the pointer 116 we copy a survivor into it and free that original */ 117 awtoken = batch->next_token; 118 batch->costdelta = awtoken->costdelta; 119 batch->word = awtoken->word; 120 batch->word_backtrace = awtoken->word_backtrace; 121 /*ASSERT( batch->refcount == awtoken->refcount); */ 122 /* batch->refcount = awtoken->refcount; */ 123 batch->next_token = awtoken->next_token; 124 awtoken->refcount = 1; /* to make sure it frees */ 125 free_altword_token(rec, awtoken); 126 } 127 else 128 { 129 /* head of list does not survive, nothing survives, so invalidate it */ 130 batch->costbasis = MAXcostdata / 2; /* was costdelta */ 131 free_altword_token(rec, batch); 132 batch = AWTNULL; 133 } 134 return batch; 135 } 136 137 static void reprune_altword_tokens(srec* rec) 138 { 139 stokenID i, j; 140 fsmarc_token* stoken; 141 fsmnode_token* ftoken; 142 bigcostdata current_prune_delta = rec->prune_delta; 143 altword_token* awtoken; 144 145 /* first clear the costbases */ 146 for (i = rec->active_fsmarc_tokens; i != MAXstokenID; i = stoken->next_token_index) 147 { 148 stoken = &rec->fsmarc_token_array[i]; 149 for (j = 0; j < stoken->num_hmm_states; j++) 150 if ((awtoken = stoken->aword_backtrace[j]) != AWTNULL) 151 awtoken->costbasis = MAXcostdata; 152 } 153 for (i = rec->active_fsmnode_tokens;i != MAXftokenID;i = ftoken->next_token_index) 154 { 155 ftoken = &rec->fsmnode_token_array[i]; 156 if ((awtoken = ftoken->aword_backtrace) != AWTNULL) 157 awtoken->costbasis = MAXcostdata; 158 } 159 /* costbases for altword attached to stoken */ 160 for (i = rec->active_fsmarc_tokens; i != MAXstokenID; i = stoken->next_token_index) 161 { 162 stoken = &rec->fsmarc_token_array[i]; 163 for (j = 0; j < stoken->num_hmm_states; j++) 164 if ((awtoken = stoken->aword_backtrace[j]) != AWTNULL) 165 if (awtoken->costbasis > stoken->cost[j]) 166 awtoken->costbasis = stoken->cost[j]; 167 } 168 /* costbases for altword attached to ftokens */ 169 for (i = rec->active_fsmnode_tokens;i != MAXftokenID;i = ftoken->next_token_index) 170 { 171 ftoken = &rec->fsmnode_token_array[i]; 172 if ((awtoken = ftoken->aword_backtrace) != AWTNULL) 173 if (awtoken->costbasis > ftoken->cost) 174 awtoken->costbasis = ftoken->cost; 175 } 176 177 /* now reprune */ 178 while (rec->altword_token_freelist_len < rec->altword_token_array_size / 4 179 || rec->altword_token_freelist_len < 2*rec->word_priority_q->max_in_q) 180 { 181 SREC_STATS_INC_AWTOKEN_REPRUNES(1); 182 current_prune_delta = (costdata)(PRUNE_TIGHTEN * PRUNE_TIGHTEN * current_prune_delta); 183 for (i = rec->active_fsmarc_tokens; i != MAXstokenID; i = stoken->next_token_index) 184 { 185 stoken = &rec->fsmarc_token_array[i]; 186 for (j = 0; j < stoken->num_hmm_states; j++) 187 { 188 if (stoken->aword_backtrace[j] != AWTNULL) 189 { 190 stoken->aword_backtrace[j] = 191 reprune_altword_token_batch(rec, stoken->aword_backtrace[j], 192 current_prune_delta); 193 } 194 } 195 } 196 for (i = rec->active_fsmnode_tokens;i != MAXftokenID;i = ftoken->next_token_index) 197 { 198 ftoken = &rec->fsmnode_token_array[i]; 199 if (ftoken->aword_backtrace != AWTNULL) 200 { 201 ftoken->aword_backtrace = 202 reprune_altword_token_batch(rec, ftoken->aword_backtrace, 203 current_prune_delta); 204 } 205 } 206 ASSERT(current_prune_delta > 0); 207 } 208 } 209 210 #define refcopy_altwords(rEc, aWtOkEn) (aWtOkEn?(aWtOkEn->refcount++,aWtOkEn):aWtOkEn) 211 212 static altword_token* copy_altwords(srec* rec, altword_token* list1, costdata delta) 213 { 214 altword_token *q2, dummy, *last_q2 = &dummy; 215 costdata q2_costdelta; 216 217 /* first check for space */ 218 #if BUILD & BUILD_DEBUG 219 int num = 0; 220 221 for (num = 0, q2 = list1; q2 != AWTNULL; q2 = q2->next_token) 222 num++; 223 if (num > rec->altword_token_freelist_len) 224 { 225 printf("warning: mid-copy reprune_altword_tokens()\n"); 226 ASSERT(0); 227 reprune_altword_tokens(rec); 228 } 229 #endif 230 231 /* now do the copy */ 232 for (; list1 != AWTNULL; list1 = list1->next_token) 233 { 234 ASSERT(list1->refcount >= 1); 235 236 q2_costdelta = list1->costdelta + delta; 237 ASSERT(list1->costdelta != MAXcostdata); 238 if (q2_costdelta > rec->prune_delta) 239 continue; 240 q2 = get_free_altword_token(rec, NULL_IF_NO_TOKENS); 241 if (!q2) /* this should never happen */ 242 break; 243 last_q2->next_token = q2; 244 q2->costdelta = q2_costdelta; 245 q2->word = list1->word; 246 q2->word_backtrace = list1->word_backtrace; 247 last_q2 = q2; 248 } 249 last_q2->next_token = AWTNULL; 250 return dummy.next_token; 251 } 252 253 #if 1 254 /* sizewise_altwords just makes sure the list of altwords is no longer than 255 the number of possible word ends. Any tokens beyond that length will get 256 ignored later anyways, so we may as well kill them here. 257 This also is related to the anticipated repruning. This code currently 258 makes use of calloc/free and qsort, but can easily be rewritten to just 259 to a linear in-place sort, linear looking for the 10 best score should not 260 take too long. This is on the todo list! 261 */ 262 int altword_token_ptr_cmp(const void* a, const void* b) 263 { 264 const altword_token** A = (const altword_token**)a, **B = (const altword_token**)b; 265 if ((*A)->costdelta > (*B)->costdelta) return 1; 266 else if ((*A)->costdelta < (*B)->costdelta) return -1; 267 else return 0; 268 } 269 static altword_token* sizewise_altwords(srec* rec, altword_token* awtoken_head) 270 { 271 #define SIZEWISE_THRESH (rec->word_priority_q->max_in_q) 272 #define SIZEWISE_TARGET (rec->word_priority_q->max_in_q*4/5) 273 int i, num; 274 altword_token *awtoken, **list; 275 276 if( SIZEWISE_TARGET == 0) { 277 free_altword_token(rec, awtoken_head); 278 return NULL; 279 } 280 num = count_altword_token(rec, awtoken_head); 281 /* if the linked list is shorter than max_in_q we're fine */ 282 if (num <= SIZEWISE_THRESH) 283 return awtoken_head; 284 else 285 { 286 list = (altword_token**)CALLOC(num, sizeof(altword_token*), L("search.srec.altword_tokens")); 287 ASSERT(awtoken_head->refcount == 1); 288 for (i = 0, awtoken = awtoken_head; i < num; i++, awtoken = awtoken->next_token) 289 list[i] = awtoken; 290 qsort(list, num, sizeof(altword_token*), altword_token_ptr_cmp); 291 for (i = 0; i < SIZEWISE_TARGET; i++) 292 list[i]->next_token = list[i+1]; 293 if(i>0) list[i-1]->next_token = AWTNULL; 294 for (; i < num; i++) 295 { 296 list[i]->refcount = 1; /* make sure it frees */ 297 free_altword_token(rec, list[i]); 298 } 299 awtoken_head = list[0]; 300 awtoken_head->refcount = 1; 301 FREE(list); 302 return awtoken_head; 303 } 304 } 305 #else 306 #define sizewise_altwords(ReC,hEad) hEad 307 #endif 308 309 /*--------------------------------------------------------------------------* 310 * * 311 * acoustic scoring utils * 312 * * 313 *--------------------------------------------------------------------------*/ 314 315 #define DO_COMPUTE_MODEL 0 316 #define DO_NOT_COMPUTE_MODEL MAXcostdata 317 318 static asr_uint16_t best_uint16(asr_uint16_t* p, int n) 319 { 320 asr_uint16_t rv = p[0]; 321 for (;--n > 0;p++) if (rv > *p) rv = *p; 322 return rv; 323 } 324 static int compute_model_scores(costdata *current_model_scores, const SWIModel *acoustic_models, 325 pattern_info *pattern, frameID current_search_frame) 326 { 327 int i; 328 int num_models_computed = 0; 329 330 for (i = 0; i < acoustic_models->num_hmmstates; i++) 331 { 332 if (current_model_scores[i] == DO_COMPUTE_MODEL) 333 { 334 scodata score = mixture_diagonal_gaussian_swimodel(pattern->prep, 335 &acoustic_models->hmmstates[i], acoustic_models->num_dims); 336 ASSERT(score <= 0 && "model score out of range"); 337 338 current_model_scores[i] = (costdata) - score; 339 num_models_computed++; 340 } 341 } 342 return num_models_computed; 343 } 344 345 346 347 /*precompute all needed models to be used by next frame of search*/ 348 349 static int find_which_models_to_compute(srec *rec, const SWIModel *acoustic_models) 350 { 351 int i; 352 modelID model_index; 353 stokenID current_token_index; 354 ftokenID current_ftoken_index; 355 fsmarc_token *current_token; 356 fsmnode_token *current_ftoken; 357 costdata *current_model_scores; 358 /* arcID arc_index; */ 359 arcID fsm_arc_index; 360 HMMInfo* hmm_info; 361 FSMnode* fsm_node; 362 FSMarc* fsm_arc; 363 /*use the current_model_scores array both to tell the model computing stuff 364 what models to compute and to get the scores back. This is a bit ugly, but 365 saves having another array to allocate*/ 366 367 /* this belongs elsewhere at initialization, 368 eg. where we'll associate search to acoustic models 369 */ 370 rec->avg_state_durations = acoustic_models->avg_state_durations; 371 372 current_model_scores = rec->current_model_scores; 373 374 for (model_index = 0; model_index < acoustic_models->num_hmmstates; model_index++) 375 { 376 current_model_scores[model_index] = DO_NOT_COMPUTE_MODEL; 377 } 378 379 current_token_index = rec->active_fsmarc_tokens; 380 381 while (current_token_index != MAXstokenID) 382 { 383 current_token = &(rec->fsmarc_token_array[current_token_index]); 384 /*need to compute all models needed within this HMM*/ 385 fsm_arc = &rec->context->FSMarc_list[ current_token->FSMarc_index]; 386 hmm_info = &rec->context->hmm_info_for_ilabel[fsm_arc->ilabel]; 387 388 /*handle all states that are alive in this hmm*/ 389 for (i = 0;i < hmm_info->num_states;i++) 390 { 391 if ((current_token->cost[i] != MAXcostdata) || 392 ((i > 0) && current_token->cost[i-1] != MAXcostdata)) 393 { 394 model_index = hmm_info->state_indices[i]; 395 current_model_scores[model_index] = DO_COMPUTE_MODEL; 396 } 397 } 398 current_token_index = current_token->next_token_index; 399 } 400 401 /*for each active FSM node, find models which can come from node*/ 402 403 current_ftoken_index = rec->active_fsmnode_tokens; 404 405 while (current_ftoken_index != MAXftokenID) 406 { 407 current_ftoken = &(rec->fsmnode_token_array[current_ftoken_index]); 408 fsm_node = &rec->context->FSMnode_list[current_ftoken->FSMnode_index]; 409 fsm_arc = NULL; 410 for (fsm_arc_index = fsm_node->un_ptr.first_next_arc; fsm_arc_index != MAXarcID; 411 fsm_arc_index = fsm_arc->linkl_next_arc) { 412 fsm_arc = rec->context->FSMarc_list+fsm_arc_index; 413 414 if (fsm_arc->ilabel != MAXlabelID) 415 { 416 hmm_info = &rec->context->hmm_info_for_ilabel[fsm_arc->ilabel]; 417 if (hmm_info->num_states > 0) 418 { 419 420 /* we should build in here a check that this arc has reasonable weight */ 421 /* if(fsm_arc->cost < rec->prune_delta) */ 422 current_model_scores[hmm_info->state_indices[0]] = DO_COMPUTE_MODEL; 423 } 424 } 425 } 426 current_ftoken_index = current_ftoken->next_token_index; 427 } 428 429 /*compute the scores in a batch - this allows the model computing code to 430 chunk it up however it wants*/ 431 return 0; 432 } 433 434 /*--------------------------------------------------------------------------* 435 * * 436 * pruning utils * 437 * * 438 *--------------------------------------------------------------------------*/ 439 440 /*this is called at the end of the search*/ 441 static int prune_new_tokens(srec *rec, costdata current_prune_thresh) 442 { 443 int i; 444 nodeID num_deleted; 445 stokenID token_index; 446 fsmarc_token *token; 447 stokenID next_token_index; 448 stokenID *list_head_pointer; 449 char any_alive; 450 451 num_deleted = 0; 452 list_head_pointer = &(rec->active_fsmarc_tokens); 453 454 for (token_index = rec->active_fsmarc_tokens; token_index != MAXstokenID; 455 token_index = next_token_index) 456 { 457 458 token = &(rec->fsmarc_token_array[token_index]); 459 next_token_index = token->next_token_index; 460 461 any_alive = 0; 462 463 for (i = 0;i < token->num_hmm_states;i++) 464 { 465 if (token->cost[i] < current_prune_thresh) 466 { 467 any_alive = 1; 468 } 469 } 470 471 if (!any_alive) 472 { /*everything pruned so recylce the token*/ 473 *list_head_pointer = next_token_index; 474 475 rec->best_token_for_arc[rec->fsmarc_token_array[token_index].FSMarc_index] = MAXstokenID; 476 477 free_fsmarc_token(rec, token_index); 478 479 num_deleted++; 480 rec->num_new_states--; 481 } 482 else 483 { 484 list_head_pointer = &token->next_token_index; 485 } 486 } 487 return num_deleted; 488 } 489 490 static void reprune_word_tokens_if_necessary(srec *rec) 491 { 492 word_token* wtoken; 493 wtokenID wtoken_index = rec->word_token_freelist; 494 wtokenID num_free_wtokens = 0; 495 496 for (; wtoken_index != MAXwtokenID; wtoken_index = wtoken->next_token_index) 497 { 498 wtoken = &rec->word_token_array[ wtoken_index]; 499 num_free_wtokens++; 500 } 501 if (num_free_wtokens < 2*rec->word_priority_q->max_in_q) 502 reprune_word_tokens(rec, 0); 503 } 504 505 /*this is called when we run out of room in the state token arrays and need to make more room - 506 it only prunes in theh new time slice - we could also look at pruning the previous slice if needed*/ 507 508 509 static costdata reprune_new_states(srec *rec, costdata current_best_cost, costdata current_prune_delta) 510 { 511 int num_deleted; 512 /*first check to see if current pruning thresholds make enough room 513 (because of better max)*/ 514 515 prune_new_tokens(rec, current_best_cost + current_prune_delta); 516 517 ASSERT(((float)current_best_cost) + current_prune_delta < (float)SHRT_MAX); 518 519 while ((rec->num_new_states >= rec->max_new_states - 1) 520 || rec->fsmarc_token_freelist == MAXstokenID) 521 { 522 523 SREC_STATS_INC_STOKEN_REPRUNES(1); 524 525 current_prune_delta = (costdata)(PRUNE_TIGHTEN * current_prune_delta); 526 527 if (current_prune_delta <= 1) 528 { 529 /*this seems like an unlikely case, but if we can't prune enough to make room, give up 530 on the utterance (better than being stuck here forever!)*/ 531 532 /*FIX replace with crec abort mechanism*/ 533 PLogError ("reprune_new_states: can't seem to prune enough - best cost %d num_new_states %d\n", 534 current_best_cost, rec->num_new_states); 535 536 print_fsmarc_token_list(rec, rec->active_fsmarc_tokens, "CANNOT PRUNE"); 537 538 dump_core("reprune died\n"); 539 } 540 541 num_deleted = prune_new_tokens(rec, current_best_cost + current_prune_delta); 542 543 ASSERT(((float)current_best_cost + current_prune_delta) < (float)USHRT_MAX); 544 } 545 return current_prune_delta; 546 } 547 548 549 550 static void prune_fsmnode_tokens(srec *rec, costdata current_prune_thresh, ftokenID not_this_one) 551 { 552 nodeID num_deleted; 553 ftokenID token_index; 554 fsmnode_token *token; 555 ftokenID next_token_index; 556 ftokenID *list_head_pointer; 557 558 num_deleted = 0; 559 token_index = rec->active_fsmnode_tokens; 560 561 token = &(rec->fsmnode_token_array[token_index]); 562 list_head_pointer = &(rec->active_fsmnode_tokens); 563 564 while (token_index != MAXftokenID) 565 { 566 next_token_index = token->next_token_index; 567 if( token_index!=not_this_one && token->cost >= current_prune_thresh) 568 { 569 /*pruned so recycle the token*/ 570 *list_head_pointer = next_token_index; 571 rec->best_token_for_node[token->FSMnode_index] = MAXftokenID; 572 free_fsmnode_token(rec, token_index); 573 num_deleted++; 574 } 575 else 576 { 577 list_head_pointer = &token->next_token_index; 578 } 579 token_index = next_token_index; 580 token = &(rec->fsmnode_token_array[token_index]); 581 } 582 } 583 584 585 /*this is called when we run out of room in the state token arrays and need to make more room - 586 it only prunes in theh new time slice - we could also look at pruning the previous slice if needed*/ 587 588 589 static costdata reprune_fsmnode_tokens(srec *rec, costdata current_best_cost, costdata current_prune_delta, 590 ftokenID not_this_one) 591 { 592 593 /*first check to see if current pruning thresholds make enough 594 room (because of better max)*/ 595 596 prune_fsmnode_tokens(rec, current_best_cost+current_prune_delta, not_this_one); 597 598 ASSERT((float)current_best_cost + (float)current_prune_delta < (float)SHRT_MAX); 599 600 while (rec->fsmnode_token_freelist == MAXftokenID) 601 { 602 SREC_STATS_INC_FTOKEN_REPRUNES(1); 603 604 current_prune_delta = (costdata)(PRUNE_TIGHTEN * current_prune_delta); 605 606 if (current_prune_delta <= 1) 607 { 608 /*this seems like an unlikely case, but if we can't prune enough to make room, give up 609 on the utterance (better than being stuck here forever!)*/ 610 611 /*FIX replace with crec abort mechanism*/ 612 PLogError ("reprune_fsmnode_tokens: can't seem to prune enough - best cost %d\n", 613 current_best_cost); 614 615 print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "CANNOT PRUNE: "); 616 617 dump_core("reprune_fsmnode_tokens() died\n"); 618 } 619 620 prune_fsmnode_tokens(rec, current_best_cost+current_prune_delta, not_this_one); 621 ASSERT((float)current_best_cost + (float)current_prune_delta < (float)USHRT_MAX); 622 } 623 624 return current_prune_delta; 625 } 626 627 628 void reset_best_cost_to_zero(srec* rec, costdata current_best_cost) 629 { 630 fsmarc_token* stoken; 631 stokenID token_index; 632 short i, num_states; 633 634 635 /*do the state tokens*/ 636 for (token_index = rec->active_fsmarc_tokens; 637 token_index != MAXstokenID; 638 token_index = stoken->next_token_index) 639 { 640 641 stoken = &rec->fsmarc_token_array[ token_index]; 642 num_states = stoken->num_hmm_states; 643 for (i = 0; i < num_states; i++) 644 { 645 if (stoken->cost[i] < MAXcostdata) 646 { 647 ASSERT(stoken->cost[i] >= current_best_cost); 648 stoken->cost[i] = (costdata)(stoken->cost[i] - (costdata) current_best_cost); 649 } 650 } 651 } 652 } 653 654 static void reset_cost_offsets(multi_srec* rec, frameID current_frame, 655 costdata current_best_cost) 656 { 657 rec->cost_offset_for_frame[current_frame] = current_best_cost; 658 if (current_frame == 0) 659 rec->accumulated_cost_offset[current_frame] = current_best_cost; 660 else 661 rec->accumulated_cost_offset[current_frame] = rec->accumulated_cost_offset[current_frame-1] + current_best_cost; 662 } 663 664 665 /*--------------------------------------------------------------------------* 666 * * 667 * word_token functions * 668 * * 669 *--------------------------------------------------------------------------*/ 670 671 /*this function allocates a new word token and remembers it in a list in the srec structure (to be used for backtrace later on*/ 672 673 static wtokenID create_word_token(srec *rec) 674 { 675 wtokenID word_token_index; 676 word_token *word_token; 677 678 word_token_index = get_free_word_token(rec, NULL_IF_NO_TOKENS); 679 680 if (0 && word_token_index == MAXwtokenID) 681 { 682 /*FIX - use crec error handling*/ 683 dump_core("create_word_token: cannot allocate word token - we need" 684 " to figure out a word pruning strategy when this happens!\n"); 685 } 686 687 word_token = &(rec->word_token_array[word_token_index]); 688 689 return word_token_index; 690 } 691 692 /* gets rid of fsmnode which trace back to this word since 693 the word is not goingto make it ono the word lattice */ 694 695 static int block_fsmnodes_per_backtrace(srec *rec, wtokenID wtoken_id) 696 { 697 int num_ftokens_blocked = 0; 698 ftokenID current_token_index = rec->active_fsmnode_tokens; 699 while (current_token_index != MAXftokenID) { 700 fsmnode_token *token = &(rec->fsmnode_token_array[current_token_index]); 701 if (token->word_backtrace == wtoken_id) { 702 num_ftokens_blocked++; 703 token->cost = MAXcostdata; 704 } 705 current_token_index = token->next_token_index; 706 } 707 return num_ftokens_blocked; 708 } 709 710 /* processing a word boundary, 711 current_token is the fsmnode_token to the left of the boundary 712 cost is the cost through this frame 713 pcurrent_word_threshold is the worst score for words on the prio.q 714 returns the word_token index just created 715 */ 716 static wtokenID srec_process_word_boundary_nbest(srec* rec, 717 nodeID end_node, 718 wordID word, 719 wtokenID word_backtrace, 720 costdata cost, 721 costdata* pcurrent_word_threshold, 722 int *any_nodes_blocked) 723 { 724 wtokenID wtoken_index; 725 wtokenID return_wtoken_index; 726 wtokenID token_id_to_remove; 727 728 word_token* wtoken; 729 730 if (word_backtrace != MAXwtokenID) 731 { 732 word_token* btoken = &rec->word_token_array[word_backtrace]; 733 if (btoken->end_time >= rec->current_search_frame) 734 { 735 SREC_STATS_INC_BAD_BACKTRACES(); 736 return MAXwtokenID; 737 } 738 } 739 /*make new word token*/ 740 wtoken_index = create_word_token(rec); 741 //ASSERT(wtoken_index != MAXwtokenID); 742 if (wtoken_index == MAXwtokenID) 743 { 744 /* we could have called reprune_word_tokens() here, but we instead called it 745 at the beginning of do_epsilon_updates() to avoid complications of 746 re-pruning word tokens too deep in the search update */ 747 return_wtoken_index = MAXwtokenID; 748 return return_wtoken_index; 749 } 750 751 wtoken = &(rec->word_token_array[wtoken_index]); 752 wtoken->word = word; 753 wtoken->_word_end_time = 0; // new 754 wtoken->end_time = rec->current_search_frame; 755 wtoken->end_node = end_node; 756 wtoken->backtrace = word_backtrace; 757 wtoken->cost = cost; 758 759 token_id_to_remove = add_word_token_to_priority_q(rec->word_priority_q, wtoken_index, rec->word_token_array); 760 761 if (token_id_to_remove == wtoken_index) 762 return_wtoken_index = MAXwtokenID; 763 else 764 { 765 /* the word was truly added to the priority q, so we must 766 get the new worst word on that list */ 767 *pcurrent_word_threshold = get_priority_q_threshold(rec->word_priority_q, rec->word_token_array); 768 return_wtoken_index = wtoken_index; 769 } 770 771 if (token_id_to_remove != MAXwtokenID) 772 { 773 /* Jean: must allow for this word_token to be recycled */ 774 775 /* ok, the word won't we maintained, so there's no point to 776 continue to search this path (as headed by this fsmarc_token) */ 777 778 *any_nodes_blocked += block_fsmnodes_per_backtrace( rec, token_id_to_remove); 779 780 /* we killed the fsmnode token associated with the word being removed. 781 But, we didn't kill it's word backtrace, so there may be word tokens 782 in the backtrace which cannot connect. But we can't really kill 783 the whole backtrace since it might be shared with other 784 active states, Mike's idea is to add a counter on the 785 word_tokens, which counts how many active paths are using 786 this word_token ... TODO */ 787 788 /* print_word_token(rec, token_id_to_remove, "srec_process_word_boundary killed wtoken "); */ 789 free_word_token(rec, token_id_to_remove); 790 791 } 792 return return_wtoken_index; 793 } 794 795 ftokenID* srec_get_parent_for_active_fsmnode(srec* rec, ftokenID ftoken_index) 796 { 797 ftokenID* parent_ftoken_index = &rec->active_fsmnode_tokens; 798 while( (*parent_ftoken_index) != MAXftokenID) { 799 fsmnode_token* parent_ftoken = &rec->fsmnode_token_array[ *parent_ftoken_index]; 800 if( *parent_ftoken_index == ftoken_index) 801 return parent_ftoken_index; 802 parent_ftoken_index = &parent_ftoken->next_token_index; 803 } 804 return NULL; 805 } 806 807 /*---------------------------------------------------------------------------* 808 * * 809 * updates * 810 * * 811 *---------------------------------------------------------------------------*/ 812 813 814 /*handles epsilon transitions (used for word boundaries). Epsilons come from active 815 fsmnode tokens and maximize into new FSMnode tokens (finds the best path to an FSM 816 node). 817 818 When we hit an 819 epsilon, create a word token, put it in the path, and remember it in a 820 list of all word tokens*/ 821 822 static int do_epsilon_updates(srec *rec, costdata prune_delta, 823 costdata best_cost) 824 { 825 fsmnode_token *new_ftoken; 826 fsmnode_token *current_ftoken; 827 wtokenID wtoken_index; 828 FSMnode* fsm_node; 829 FSMarc* fsm_arc; 830 costdata cost, cost_with_wtw; 831 ftokenID new_ftoken_index; 832 ftokenID current_ftoken_index; 833 costdata current_word_threshold; 834 arcID fsm_arc_index; 835 wordID word_with_wtw; 836 int num_fsm_nodes_updated=0, num_fsm_nodes_blocked, num_fsm_nodes_blocked2; 837 int num_wtokens_maybe_homonyms; 838 costdata current_prune_delta; 839 costdata current_prune_thresh; 840 altword_token* awtoken; 841 842 843 // printf("FRAME %d\n", rec->current_search_frame); 844 // print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "BEFORE UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n"); 845 846 current_word_threshold = MAXcostdata; 847 current_prune_delta = prune_delta; 848 current_prune_thresh = best_cost + current_prune_delta; 849 850 current_ftoken_index = rec->active_fsmnode_tokens; 851 while (current_ftoken_index != MAXftokenID) 852 { 853 // print_fsmnode_token(rec, current_ftoken_index, "processing ... "); 854 current_ftoken = &(rec->fsmnode_token_array[current_ftoken_index]); 855 856 cost = current_ftoken->cost; /*get last state of token*/ 857 fsm_node = &rec->context->FSMnode_list[current_ftoken->FSMnode_index]; 858 fsm_arc = NULL; 859 860 /* Jean: see below too, let's remember the wtoken_index we create in 861 case we need to re-use it. All N epsilon updates, and all M 862 M outgoing arcs can share, cuz this is the same arriving arc@frame */ 863 864 wtoken_index = MAXwtokenID; 865 866 if( cost >= current_prune_thresh) { 867 ftokenID* parent_ftoken_index; 868 // the srec_get_parent_for_active_fsmnode() functions can be 869 // gotten rid of if we use a doubly-linked list (fwd/bwd ptrs) 870 parent_ftoken_index = srec_get_parent_for_active_fsmnode( rec, current_ftoken_index); 871 if(!parent_ftoken_index) { 872 PLogError( ("Error: internal search error near %s %d, finding %d\n"), __FILE__, __LINE__, current_ftoken_index); 873 print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "in DO_UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n"); 874 exit(1); 875 } 876 *parent_ftoken_index = current_ftoken->next_token_index; 877 // effectively release this fsmnode_token and go to next one 878 rec->best_token_for_node[ current_ftoken->FSMnode_index] = MAXftokenID; 879 free_fsmnode_token( rec, current_ftoken_index); 880 current_ftoken_index = *parent_ftoken_index; 881 continue; 882 } 883 884 num_fsm_nodes_updated++; 885 num_fsm_nodes_blocked = 0; 886 num_wtokens_maybe_homonyms = 0; 887 for (fsm_arc_index = fsm_node->un_ptr.first_next_arc; 888 fsm_arc_index != MAXarcID; 889 fsm_arc_index = fsm_arc->linkl_next_arc) 890 { 891 fsm_arc = &rec->context->FSMarc_list[ fsm_arc_index]; 892 893 /* consider only epsilon transitions */ 894 if (fsm_arc->ilabel >= EPSILON_OFFSET) 895 continue; 896 897 /* can't loop to yourself on epsilon! */ 898 ASSERT(fsm_arc->to_node != current_ftoken->FSMnode_index); 899 900 cost_with_wtw = current_ftoken->cost + fsm_arc->cost; /* WTW */ 901 word_with_wtw = current_ftoken->word; 902 if(fsm_arc->olabel != WORD_EPSILON_LABEL) 903 word_with_wtw = fsm_arc->olabel ; 904 905 // we should compare cost_with_wtw but let's let the priority_q 906 // do the pruning 907 if (cost>=current_prune_thresh || fsm_arc->cost>=current_prune_thresh) 908 continue; 909 910 /*if word boundary, see if it crosses the word end threshold*/ 911 /* no room on the word priority_q, so not worth pursuing */ 912 if (fsm_arc->ilabel == WORD_BOUNDARY && cost_with_wtw >= current_word_threshold) { 913 continue; // goto NEXT_FTOKEN; 914 } 915 916 new_ftoken = NULL; 917 new_ftoken_index = rec->best_token_for_node[ fsm_arc->to_node]; 918 if(new_ftoken_index != MAXftokenID) 919 new_ftoken = &rec->fsmnode_token_array[ new_ftoken_index]; 920 if( new_ftoken && (current_ftoken->cost+fsm_arc->cost)<new_ftoken->cost) { 921 /* clobber it */ 922 } else if(new_ftoken) { 923 /* merge it */ 924 } else if(rec->fsmnode_token_freelist == MAXftokenID) { 925 /* create it? maybe */ 926 current_prune_delta = reprune_fsmnode_tokens(rec, best_cost, current_prune_delta, current_ftoken_index); 927 current_prune_thresh = best_cost + current_prune_delta; 928 } 929 930 if (fsm_arc->ilabel == WORD_BOUNDARY) { 931 /* 20030920, for sure the backtrace will change! */ 932 // token->word_backtrace = MAXwtokenID; 933 934 wtoken_index = srec_process_word_boundary_nbest(rec, 935 current_ftoken->FSMnode_index, 936 word_with_wtw, 937 current_ftoken->word_backtrace, 938 cost_with_wtw, 939 ¤t_word_threshold, 940 &num_fsm_nodes_blocked); 941 if (wtoken_index != MAXwtokenID) { 942 WORD_TOKEN_SET_WD_ETIME( (rec->word_token_array+wtoken_index), 943 rec->word_token_array[wtoken_index].end_time - current_ftoken->silence_duration); 944 } 945 if( fsm_arc->olabel!=WORD_EPSILON_LABEL && wtoken_index != MAXwtokenID) { 946 num_wtokens_maybe_homonyms++; 947 if( num_wtokens_maybe_homonyms>1) 948 WORD_TOKEN_SET_HOMONYM( (rec->word_token_array+wtoken_index), 1); 949 } 950 /* now drop alternative words, note the use of current_token 951 because token is on the other side already */ 952 if (current_ftoken->aword_backtrace != AWTNULL) { 953 awtoken = current_ftoken->aword_backtrace; 954 for (; awtoken != AWTNULL; awtoken = awtoken->next_token) { 955 wtokenID wti; 956 wti = srec_process_word_boundary_nbest(rec, 957 current_ftoken->FSMnode_index, 958 awtoken->word, 959 awtoken->word_backtrace, 960 cost_with_wtw + awtoken->costdelta, 961 ¤t_word_threshold, 962 &num_fsm_nodes_blocked2); 963 } 964 /* if we don't free the altwords here, i had thought that 965 updates from stateN would make the altwords grow and grow, 966 but by that time all the fsmnodes are brand new */ 967 /* leaving them alive allows them to propagate to state0 thru 968 other epsilons (eg non .wb) to new nodes but we don't 969 use such arcs. 970 the .wb would not get dropped again 'cuz we check 971 for that in wtoken_index above. 972 this is quite complex and the case for dropping word_tokens 973 from the node AFTER the .wb can be made 974 ie. would need a re-write of do_epsilon_updates() */ 975 if( current_ftoken->aword_backtrace != AWTNULL) 976 free_altword_token_batch(rec, current_ftoken->aword_backtrace); 977 current_ftoken->aword_backtrace = AWTNULL; 978 /*print_fsmnode_token(rec, token-rec->fsmnode_token_array, "123a");*/ 979 } 980 981 if( wtoken_index != MAXwtokenID) { 982 983 if( new_ftoken == NULL) { 984 /* create token for the other side */ 985 new_ftoken_index = get_free_fsmnode_token(rec, NULL_IF_NO_TOKENS); 986 // this should not happen because of the above check near 987 // fsmnode_token_freelist == MAXftokenID 988 ASSERT(new_ftoken_index != MAXftokenID); 989 new_ftoken = &(rec->fsmnode_token_array[new_ftoken_index]); 990 new_ftoken->word_backtrace = wtoken_index; 991 new_ftoken->cost = cost_with_wtw; 992 new_ftoken->word = WORD_EPSILON_LABEL; 993 new_ftoken->FSMnode_index = fsm_arc->to_node; 994 new_ftoken->aword_backtrace = AWTNULL; 995 new_ftoken->next_token_index = current_ftoken->next_token_index; 996 current_ftoken->next_token_index = new_ftoken_index; 997 rec->best_token_for_node[fsm_arc->to_node] = new_ftoken_index; 998 } else if(new_ftoken && cost_with_wtw<new_ftoken->cost) { 999 /* update token on the other side */ 1000 ftokenID *parent_ftoken_index; 1001 new_ftoken = &(rec->fsmnode_token_array[new_ftoken_index]); 1002 new_ftoken->cost = cost_with_wtw; 1003 new_ftoken->word_backtrace = wtoken_index; 1004 new_ftoken->word = WORD_EPSILON_LABEL; 1005 // unchanged token->FSMnode_index = fsm_arc->to_node; 1006 // because this token was updated, we need to reprocess it, right after 1007 parent_ftoken_index = srec_get_parent_for_active_fsmnode( rec, new_ftoken_index); 1008 if(!parent_ftoken_index) { 1009 PLogError( ("Error: internal search error near %s %d, finding %d\n"), __FILE__, __LINE__, new_ftoken_index); 1010 print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "in DO_UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n"); 1011 exit(1); 1012 } 1013 *parent_ftoken_index = new_ftoken->next_token_index; 1014 new_ftoken->next_token_index = current_ftoken->next_token_index; 1015 current_ftoken->next_token_index = new_ftoken_index; 1016 rec->best_token_for_node[ fsm_arc->to_node] = new_ftoken_index; 1017 /* new_ftoken->aword_backtrace must be null, alts here were 1018 processed and dropped in srec_process_word_boundary_nbest() */ 1019 if(new_ftoken->aword_backtrace != AWTNULL) { 1020 PLogError( ("Error: internal search error near %s %d\n"), __FILE__, __LINE__); 1021 continue; 1022 } 1023 } else { 1024 /* token on other side is same or better, just leave it */ 1025 } 1026 } 1027 } 1028 else if(fsm_arc->ilabel == EPSILON_LABEL) { 1029 if( new_ftoken == NULL) { 1030 /* create token for the other side */ 1031 new_ftoken_index = get_free_fsmnode_token(rec, NULL_IF_NO_TOKENS); 1032 // this should not happen because of the above check near 1033 // fsmnode_token_freelist == MAXftokenID 1034 ASSERT(new_ftoken_index != MAXftokenID); 1035 new_ftoken = &(rec->fsmnode_token_array[new_ftoken_index]); 1036 new_ftoken->word_backtrace = current_ftoken->word_backtrace; 1037 new_ftoken->cost = cost_with_wtw; 1038 new_ftoken->word = word_with_wtw; 1039 new_ftoken->FSMnode_index = fsm_arc->to_node; 1040 new_ftoken->aword_backtrace = refcopy_altwords(rec, current_ftoken->aword_backtrace); 1041 new_ftoken->next_token_index = current_ftoken->next_token_index; 1042 current_ftoken->next_token_index = new_ftoken_index; 1043 rec->best_token_for_node[fsm_arc->to_node] = new_ftoken_index; 1044 } else if(new_ftoken && cost_with_wtw<new_ftoken->cost) { 1045 /* update token on the other side */ 1046 ftokenID *parent_ftoken_index; 1047 new_ftoken = &(rec->fsmnode_token_array[new_ftoken_index]); 1048 new_ftoken->cost = cost_with_wtw; 1049 new_ftoken->word_backtrace = current_ftoken->word_backtrace; 1050 new_ftoken->word = word_with_wtw; 1051 /* here we are giving up the path and alternatives that existed at 1052 this node, which is not great! The new (better) top choice 1053 coming in and it's alternatives are propagated instead. 1054 TODO: merge the alternative lists and the previous top choice 1055 */ 1056 if(new_ftoken->aword_backtrace!=AWTNULL) 1057 free_altword_token_batch( rec, new_ftoken->aword_backtrace); 1058 new_ftoken->aword_backtrace = AWTNULL; 1059 new_ftoken->aword_backtrace = refcopy_altwords(rec, current_ftoken->aword_backtrace); 1060 // unchanged token->FSMnode_index = fsm_arc->to_node; 1061 // because this token was updated, we need to re-process it, right after 1062 parent_ftoken_index = srec_get_parent_for_active_fsmnode( rec, new_ftoken_index); 1063 if(!parent_ftoken_index) { 1064 PLogError( ("Error: internal search error near %s %d, finding %d\n"), __FILE__, __LINE__, new_ftoken_index); 1065 print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "in DO_UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n"); 1066 exit(1); 1067 } 1068 *parent_ftoken_index = new_ftoken->next_token_index; 1069 new_ftoken->next_token_index = current_ftoken->next_token_index; 1070 current_ftoken->next_token_index = new_ftoken_index; 1071 rec->best_token_for_node[ fsm_arc->to_node] = new_ftoken_index; 1072 } else { 1073 /* token on other side is same or better, just leave it */ 1074 /* todo: maybe merge alternative lists? */ 1075 } 1076 } 1077 } /* loop over arcs */ 1078 1079 ASSERT( current_ftoken->cost != MAXcostdata); 1080 if( num_fsm_nodes_blocked) { 1081 /* we do not want to propagate fsm node tokens for paths that have 1082 just been killed on the basis of no space for word propagation */ 1083 prune_fsmnode_tokens(rec, MAXcostdata/2, current_ftoken_index); 1084 } 1085 1086 // NEXT_FTOKEN: 1087 current_ftoken_index = current_ftoken->next_token_index; 1088 } 1089 1090 // print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "AFTER UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n"); 1091 1092 sanity_check_altwords(rec, rec->altword_token_freelist); 1093 return num_fsm_nodes_updated; 1094 } 1095 1096 static void update_internal_hmm_states(srec *rec, costdata *pcurrent_prune_delta, 1097 costdata *pcurrent_best_cost, 1098 costdata *precomputed_model_scores) 1099 { 1100 stokenID current_token_index; 1101 fsmarc_token *current_token; 1102 costdata current_best_cost; 1103 costdata current_prune_thresh; 1104 costdata current_prune_delta; 1105 costdata model_cost; 1106 asr_int16_t any_alive; 1107 HMMInfo *hmm_info; 1108 modelID model_index; 1109 asr_int16_t internal_state, end_state; 1110 arcID fsm_arc_index; 1111 FSMarc* fsm_arc; 1112 1113 costdata prev_cost; 1114 costdata self_loop_cost; 1115 1116 current_best_cost = *pcurrent_best_cost; 1117 current_prune_delta = *pcurrent_prune_delta; 1118 current_prune_thresh = current_best_cost + current_prune_delta; 1119 1120 ASSERT(((float)current_best_cost + current_prune_delta) < (float)USHRT_MAX); 1121 1122 /* best_token_for_arc must be reset here, cuz the same array might have 1123 been used by another gender. Alternatively we could have let each 1124 recog use it's own array thereby save cpu at expense of memory */ 1125 for (fsm_arc_index = 0; fsm_arc_index < rec->context->num_arcs; fsm_arc_index++) 1126 rec->best_token_for_arc[fsm_arc_index] = MAXstokenID; 1127 1128 current_token_index = rec->active_fsmarc_tokens; 1129 while (current_token_index != MAXstokenID) 1130 { 1131 current_token = &(rec->fsmarc_token_array[current_token_index]); 1132 1133 fsm_arc_index = current_token->FSMarc_index; 1134 fsm_arc = &rec->context->FSMarc_list[fsm_arc_index]; 1135 1136 /* best_token_for_arc must be set here, cuz it was reset above */ 1137 rec->best_token_for_arc[fsm_arc_index] = current_token_index; 1138 1139 hmm_info = &rec->context->hmm_info_for_ilabel[ fsm_arc->ilabel]; 1140 any_alive = 0; 1141 end_state = current_token->num_hmm_states - 1; 1142 1143 for (internal_state = end_state; internal_state >= 0; internal_state--) 1144 { 1145 1146 model_index = hmm_info->state_indices[internal_state]; 1147 model_cost = precomputed_model_scores[model_index]; 1148 1149 /*better to come from previous or self?*/ 1150 1151 if (internal_state > 0) 1152 { 1153 prev_cost = current_token->cost[internal_state-1]; 1154 /* a duration model can be applied here, 1155 by changing the prev_cost according to some function of 1156 the current_token->duration[internal_state-1] rec->avg_state_durations[ prev_model_index] */ 1157 if (prev_cost < current_prune_thresh) 1158 { 1159 modelID prev_model_index; 1160 prev_cost = (costdata)(prev_cost + (costdata) model_cost); 1161 /* apply duration model for "next" transition, note that it's nice 1162 to access the duration model (avg_state_durations) somehwere 1163 other than the acoustic models which could be far away in memory 1164 arrive penalty would be applied here too if it was reqd */ 1165 1166 prev_model_index = hmm_info->state_indices[internal_state-1]; 1167 prev_cost = (costdata)(prev_cost + (costdata) duration_penalty_depart(rec->avg_state_durations[ prev_model_index], 1168 current_token->duration[internal_state-1])); 1169 1170 } 1171 } 1172 else 1173 { 1174 prev_cost = MAXcostdata; 1175 } 1176 1177 self_loop_cost = current_token->cost[internal_state]; 1178 /* a duration model can be applied here, 1179 by changing the self_loop_cost according to some function of 1180 the current_token->duration[internal_state] rec->avg_state_durations[ prev_model_index] */ 1181 if (self_loop_cost < current_prune_thresh) 1182 { 1183 self_loop_cost = (costdata)(self_loop_cost + (costdata) model_cost); 1184 /* apply duration model for "loop" transition */ 1185 1186 self_loop_cost = (costdata)(self_loop_cost + (costdata) duration_penalty_loop(rec->avg_state_durations[ model_index], 1187 current_token->duration[internal_state])); 1188 1189 } 1190 1191 if (prev_cost < self_loop_cost) 1192 { 1193 current_token->cost[internal_state] = prev_cost; 1194 current_token->word_backtrace[internal_state] = current_token->word_backtrace[internal_state-1]; 1195 current_token->word[internal_state] = current_token->word[internal_state-1]; 1196 current_token->duration[internal_state] = 1; 1197 if (current_token->word[internal_state-1] != MAXwordID) 1198 { 1199 if (current_token->aword_backtrace[internal_state] != AWTNULL) 1200 free_altword_token_batch(rec, 1201 current_token->aword_backtrace[internal_state]); 1202 current_token->aword_backtrace[internal_state] = refcopy_altwords(rec, current_token->aword_backtrace[internal_state-1]); 1203 /*print_fsmarc_token(rec, current_token_index, "123c");*/ 1204 } 1205 else 1206 { 1207 /* if there's no top choice, there shouldn't be alternatives! */ 1208 ASSERT(current_token->aword_backtrace[internal_state] == AWTNULL); 1209 ASSERT(current_token->aword_backtrace[internal_state-1] == AWTNULL); 1210 } 1211 } 1212 else 1213 { 1214 current_token->cost[internal_state] = self_loop_cost; 1215 current_token->duration[internal_state]++; 1216 } 1217 1218 if (current_token->cost[internal_state] < current_prune_thresh) 1219 { 1220 any_alive = 1; 1221 if (current_token->cost[internal_state] < current_best_cost) 1222 { 1223 current_best_cost = current_token->cost[internal_state]; 1224 current_prune_thresh = current_best_cost + current_prune_delta; 1225 } 1226 } 1227 } 1228 current_token_index = current_token->next_token_index; 1229 } 1230 *pcurrent_best_cost = current_best_cost; 1231 *pcurrent_prune_delta = current_prune_delta; 1232 } 1233 1234 1235 1236 1237 static int GetNumArcsArrivingClip2(srec_context* context, FSMnode* fsm_node) 1238 { 1239 arcID fpa = fsm_node->first_prev_arc; 1240 FSMarc* arc; 1241 1242 if (fpa == MAXarcID) 1243 return 0; 1244 arc = &context->FSMarc_list[fpa]; 1245 if (arc->linkl_prev_arc == MAXarcID) 1246 return 1; 1247 else 1248 return 2; 1249 } 1250 1251 static int update_from_hmms_to_fsmnodes(srec *rec, costdata prune_delta, costdata best_cost) 1252 { 1253 stokenID current_token_index; 1254 fsmarc_token *current_token; 1255 int end_state; 1256 costdata end_cost; 1257 costdata current_prune_thresh; 1258 costdata current_prune_delta; /*may get tighter to keep num fsmnodes under control*/ 1259 // vFSMarc vfsm_arc; 1260 FSMarc* fsm_arc; 1261 FSMnode* fsm_node; 1262 // vFSMnode vfsm_node; 1263 arcID fsm_arc_index; 1264 nodeID to_node_index; 1265 ftokenID new_ftoken_index; 1266 fsmnode_token *ftoken; 1267 modelID end_model_index; 1268 labelID ilabel; 1269 short end_cost_equality_hack; 1270 HMMInfo* hmm_info; 1271 altword_token *awtoken, *q; 1272 int num_fsmnode_updates = 0; 1273 1274 current_prune_delta = prune_delta; 1275 current_prune_thresh = best_cost + current_prune_delta; 1276 current_token_index = rec->active_fsmarc_tokens; 1277 1278 for (ilabel = 0; ilabel < NODE_INFO_NUMS; ilabel++) 1279 { 1280 rec->current_best_ftoken_cost[ilabel] = MAXcostdata / 2; 1281 rec->current_best_ftoken_index[ilabel] = MAXftokenID; 1282 } 1283 sanity_check_altwords(rec, rec->altword_token_freelist); 1284 1285 while (current_token_index != MAXstokenID) 1286 { 1287 current_token = &(rec->fsmarc_token_array[current_token_index]); 1288 1289 /*propagate from end of state token to new FSM node*/ 1290 end_state = (char) current_token->num_hmm_states - 1; 1291 1292 ASSERT((current_token->aword_backtrace[end_state] == AWTNULL) 1293 || (current_token->word[end_state] != MAXwordID)); 1294 end_cost = current_token->cost[end_state]; 1295 1296 /* anticipated repruning: make sure there is enough space before 1297 beginning complex computation */ 1298 if (rec->word_priority_q->max_in_q>1 && rec->altword_token_freelist_len < 3*rec->word_priority_q->max_in_q) 1299 reprune_altword_tokens(rec); 1300 1301 if (end_cost < current_prune_thresh) 1302 { 1303 num_fsmnode_updates++; 1304 fsm_arc_index = current_token->FSMarc_index; 1305 fsm_arc = &rec->context->FSMarc_list[ fsm_arc_index]; 1306 1307 hmm_info = &rec->context->hmm_info_for_ilabel[ fsm_arc->ilabel]; 1308 1309 end_model_index = hmm_info->state_indices[end_state]; 1310 1311 end_cost = (costdata)(end_cost + (costdata) duration_penalty_depart(rec->avg_state_durations[end_model_index], 1312 current_token->duration[end_state])); 1313 to_node_index = fsm_arc->to_node; 1314 new_ftoken_index = rec->best_token_for_node[to_node_index]; 1315 if (new_ftoken_index == MAXftokenID) 1316 { 1317 /*we need to make sure there is room in the new_states array 1318 and there are free state tokens*/ 1319 if (rec->fsmnode_token_freelist == MAXftokenID) 1320 { 1321 /*make sure there is room for another FSMnode token 1322 - if not, prune until there is room*/ 1323 current_prune_delta = reprune_fsmnode_tokens(rec, best_cost, current_prune_delta, MAXftokenID); 1324 current_prune_thresh = best_cost + current_prune_delta; 1325 } 1326 1327 /*because of the above check, this should always succeed*/ 1328 new_ftoken_index = get_free_fsmnode_token(rec, EXIT_IF_NO_TOKENS); 1329 1330 ftoken = &(rec->fsmnode_token_array[new_ftoken_index]); 1331 ftoken->FSMnode_index = to_node_index; 1332 ftoken->next_token_index = rec->active_fsmnode_tokens; 1333 ftoken->cost = end_cost; 1334 ftoken->word_backtrace = current_token->word_backtrace[end_state]; 1335 ftoken->word = current_token->word[end_state]; 1336 if (end_model_index == SILENCE_MODEL_INDEX && ftoken->word != rec->context->beg_silence_word) 1337 { 1338 ftoken->silence_duration = current_token->duration[end_state]; 1339 } 1340 else 1341 { 1342 ftoken->silence_duration = 0; 1343 } 1344 if (ftoken->word != MAXwordID) 1345 { 1346 arcID narr; 1347 fsm_node = &rec->context->FSMnode_list[ fsm_arc->to_node]; 1348 /* when there is only one arc arriving, a refcopy is good enough 1349 and saves memory */ 1350 narr = (arcID) GetNumArcsArrivingClip2(rec->context, fsm_node); 1351 if (narr > 1) 1352 ftoken->aword_backtrace = copy_altwords(rec, current_token->aword_backtrace[end_state], 0); 1353 else 1354 ftoken->aword_backtrace = refcopy_altwords(rec, current_token->aword_backtrace[end_state]); 1355 } 1356 else 1357 { 1358 /* if there's no top choice, there shouldn't be alternatives! */ 1359 ASSERT(current_token->aword_backtrace[end_state] == AWTNULL); 1360 ftoken->aword_backtrace = AWTNULL; 1361 } 1362 rec->active_fsmnode_tokens = new_ftoken_index; 1363 rec->best_token_for_node[to_node_index] = new_ftoken_index; 1364 } 1365 else /* a token already exists, use it! */ 1366 { 1367 1368 ftoken = &(rec->fsmnode_token_array[new_ftoken_index]); 1369 ASSERT( ((current_token->word[end_state] == MAXwordID) && (ftoken->word == MAXwordID)) 1370 || ((current_token->word[end_state] != MAXwordID) && (ftoken->word != MAXwordID)) ); 1371 1372 /* this is a hack for preferring the shorter of the backtrace words 1373 when scores are equal, used to prefer longer pau2 word */ 1374 end_cost_equality_hack = 0; 1375 if (end_cost == ftoken->cost) 1376 { 1377 if (current_token->word_backtrace[end_state] != ftoken->word_backtrace 1378 && current_token->word_backtrace[end_state] != MAXwtokenID) 1379 { 1380 frameID ct_end_time = MAXframeID, et_end_time = 0; 1381 if (current_token->word_backtrace[end_state] != MAXwtokenID) 1382 ct_end_time = rec->word_token_array[current_token->word_backtrace[end_state]].end_time; 1383 if (ftoken->word_backtrace != MAXwtokenID) 1384 et_end_time = rec->word_token_array[ftoken->word_backtrace].end_time; 1385 if (ct_end_time < et_end_time) 1386 end_cost_equality_hack = 1; 1387 } 1388 } 1389 1390 if (end_cost < ftoken->cost || end_cost_equality_hack) 1391 { 1392 /* new one coming in is better, so push the current state down */ 1393 /* ftoken info goes into awtoken */ 1394 if (ftoken->word != MAXwordID) 1395 { 1396 /* copy_altwords() */ 1397 awtoken = get_free_altword_token(rec, NULL_IF_NO_TOKENS); 1398 if (awtoken != AWTNULL) 1399 { 1400 awtoken->costdelta = ftoken->cost - end_cost; 1401 awtoken->word_backtrace = ftoken->word_backtrace; 1402 awtoken->word = ftoken->word; 1403 1404 /* ensure full ownership! */ 1405 q = ftoken->aword_backtrace; 1406 if (q != AWTNULL && q->refcount > 1) 1407 { 1408 awtoken->next_token = copy_altwords(rec, ftoken->aword_backtrace, ftoken->cost - end_cost); 1409 free_altword_token_batch(rec, ftoken->aword_backtrace); 1410 /* reversed order above here !! */ 1411 } 1412 else 1413 { 1414 awtoken->next_token = ftoken->aword_backtrace; 1415 count_altword_token( rec, awtoken); 1416 for (q = awtoken->next_token; q; q = q->next_token) 1417 q->costdelta += ftoken->cost - end_cost; 1418 } 1419 ftoken->aword_backtrace = awtoken; 1420 ftoken->aword_backtrace = sizewise_altwords(rec, ftoken->aword_backtrace); 1421 if( (q=ftoken->aword_backtrace)!=AWTNULL) { 1422 for (q = ftoken->aword_backtrace; q->next_token; q = q->next_token) ; 1423 q->next_token = copy_altwords(rec, current_token->aword_backtrace[end_state], 0); 1424 ftoken->aword_backtrace = sizewise_altwords(rec, ftoken->aword_backtrace); 1425 /* awtoken->costbasis = &ftoken->cost; */ 1426 ftoken->aword_backtrace->refcount = 1; 1427 } 1428 } 1429 } 1430 else 1431 { 1432 /* if there's no top choice, there shouldn't be alternatives! */ 1433 ASSERT(ftoken->aword_backtrace == AWTNULL); 1434 } 1435 /* and stoken info goes into ftoken */ 1436 ftoken->cost = end_cost; 1437 ftoken->word_backtrace = current_token->word_backtrace[end_state]; 1438 ftoken->word = current_token->word[end_state]; 1439 if (end_model_index == SILENCE_MODEL_INDEX && ftoken->word != rec->context->beg_silence_word) 1440 { 1441 ftoken->silence_duration = current_token->duration[end_state]; 1442 } 1443 else 1444 { 1445 ftoken->silence_duration = 0; 1446 } 1447 } 1448 else 1449 { 1450 /* new arc arriving is worse */ 1451 /* print_fsmarc_token(rec, current_token_index, "new_arc_arriving worse"); 1452 print_fsmnode_token(rec, new_ftoken_index, "new_arc_arriving tonode");*/ 1453 /* append it to the alt list */ 1454 /* stoken info goes into the awtoken, ftoken unchanged */ 1455 if (ftoken->word != MAXwordID) 1456 { 1457 /* copy_altwords() */ 1458 awtoken = get_free_altword_token(rec, NULL_IF_NO_TOKENS); 1459 if (awtoken != AWTNULL) 1460 { 1461 awtoken->costdelta = end_cost - ftoken->cost; 1462 awtoken->word = current_token->word[end_state]; 1463 awtoken->word_backtrace = current_token->word_backtrace[end_state]; 1464 1465 if (current_token->aword_backtrace[end_state] != AWTNULL) 1466 awtoken->next_token = copy_altwords(rec, 1467 current_token->aword_backtrace[end_state], 1468 awtoken->costdelta); 1469 else 1470 awtoken->next_token = AWTNULL; 1471 1472 /* ensure full ownership!, this is new here! */ 1473 q = ftoken->aword_backtrace; 1474 if (q != AWTNULL && q->refcount > 1) 1475 { 1476 q = copy_altwords(rec, ftoken->aword_backtrace, 0); 1477 free_altword_token_batch(rec, ftoken->aword_backtrace); 1478 ftoken->aword_backtrace = q; 1479 } 1480 } 1481 if (ftoken->aword_backtrace) 1482 { 1483 for (q = ftoken->aword_backtrace; q->next_token; q = q->next_token) ; 1484 q->next_token = awtoken; 1485 } 1486 else 1487 { 1488 ftoken->aword_backtrace = awtoken; 1489 } 1490 if (ftoken->aword_backtrace!=AWTNULL) { 1491 ftoken->aword_backtrace->refcount = 1; 1492 ftoken->aword_backtrace = sizewise_altwords(rec, ftoken->aword_backtrace); 1493 } 1494 } 1495 } 1496 /*print_fsmnode_token(rec, new_ftoken_index, "123e reused-token ");*/ 1497 } 1498 ilabel = rec->context->FSMnode_info_list[ ftoken->FSMnode_index]; 1499 ASSERT(ilabel < NODE_INFO_NUMS); 1500 if (ftoken->cost < rec->current_best_ftoken_cost[ilabel]) 1501 { 1502 rec->current_best_ftoken_cost[ilabel] = ftoken->cost; 1503 rec->current_best_ftoken_index[ilabel] = new_ftoken_index; 1504 } 1505 if (ftoken->cost < rec->current_best_ftoken_cost[NODE_INFO_UNKNOWN]) 1506 { 1507 rec->current_best_ftoken_cost[NODE_INFO_UNKNOWN] = ftoken->cost; 1508 rec->current_best_ftoken_index[NODE_INFO_UNKNOWN] = new_ftoken_index; 1509 } 1510 ASSERT(ftoken->word != MAXwordID || ftoken->aword_backtrace == AWTNULL); 1511 } 1512 current_token_index = current_token->next_token_index; 1513 } 1514 sanity_check_altwords(rec, rec->altword_token_freelist); 1515 return num_fsmnode_updates; 1516 } 1517 1518 static int update_from_current_fsm_nodes_into_new_HMMs(srec* rec, 1519 costdata *pcurrent_prune_delta, 1520 costdata *pcurrent_best_cost, 1521 costdata *precomputed_model_scores) 1522 { 1523 costdata prev_cost; 1524 FSMnode* fsm_node; 1525 FSMarc* fsm_arc; 1526 arcID fsm_arc_index; 1527 HMMInfo *hmm_info; 1528 modelID model_index; 1529 fsmarc_token *token; 1530 stokenID new_token_index = MAXstokenID; 1531 costdata cost; 1532 costdata current_prune_thresh; 1533 costdata current_prune_delta = *pcurrent_prune_delta; 1534 costdata current_best_cost = *pcurrent_best_cost; 1535 ftokenID ftoken_index; 1536 ftokenID old_ftoken_index; 1537 fsmnode_token *fsmnode_token; 1538 int num_fsm_nodes_updated = 0; 1539 costdata orig_prune_delta; 1540 1541 ftoken_index = rec->active_fsmnode_tokens; 1542 1543 current_prune_thresh = *pcurrent_best_cost + *pcurrent_prune_delta; 1544 orig_prune_delta = *pcurrent_prune_delta; 1545 1546 sanity_check_altwords(rec, rec->altword_token_freelist); 1547 1548 while (ftoken_index != MAXftokenID) 1549 { 1550 fsmnode_token = &rec->fsmnode_token_array[ftoken_index]; 1551 1552 prev_cost = fsmnode_token->cost; /*get last state of token*/ 1553 if (fsmnode_token->FSMnode_index == rec->context->end_node) 1554 { 1555 prev_cost = MAXcostdata; 1556 } 1557 1558 if (prev_cost < current_prune_thresh) 1559 { 1560 num_fsm_nodes_updated++; 1561 1562 fsm_node = &rec->context->FSMnode_list[fsmnode_token->FSMnode_index]; 1563 1564 /* loop over arcs leaving this fsm_node */ 1565 for (fsm_arc_index = fsm_node->un_ptr.first_next_arc; 1566 fsm_arc_index != MAXarcID; 1567 fsm_arc_index = fsm_arc->linkl_next_arc) 1568 { 1569 labelID ilabel; 1570 wordID olabel; 1571 nodeID nextnode; 1572 1573 fsm_arc = &rec->context->FSMarc_list[ fsm_arc_index]; 1574 1575 ilabel = fsm_arc->ilabel; 1576 olabel = fsm_arc->olabel; 1577 nextnode = fsm_arc->to_node; 1578 1579 if (ilabel >= EPSILON_OFFSET) 1580 { 1581 /*so, not an epsilon arc*/ 1582 hmm_info = &rec->context->hmm_info_for_ilabel[ilabel]; 1583 1584 model_index = hmm_info->state_indices[0]; 1585 1586 cost = prev_cost + precomputed_model_scores[model_index]; 1587 cost = (costdata)(cost + (costdata) fsm_arc->cost); 1588 1589 if (cost < current_prune_thresh) 1590 { 1591 /*new node to keep*/ 1592 1593 /* look for the fsmarc_token* token, into which to maximize, else create new one */ 1594 if (rec->best_token_for_arc[fsm_arc_index] == MAXstokenID) 1595 { 1596 1597 /*make sure there is room for another state token - if not, prune 1598 until there is room*/ 1599 /*we need to make sure there is room in the new_states array and 1600 there are free state tokens*/ 1601 if (rec->fsmarc_token_freelist == MAXstokenID) 1602 { 1603 current_prune_delta = reprune_new_states(rec, current_best_cost, current_prune_delta); 1604 } 1605 1606 /* because of the above check, this should always succeed */ 1607 new_token_index = setup_free_fsmarc_token(rec, fsm_arc, fsm_arc_index, EXIT_IF_NO_TOKENS); 1608 1609 token = &(rec->fsmarc_token_array[new_token_index]); 1610 1611 token->next_token_index = rec->active_fsmarc_tokens; 1612 rec->active_fsmarc_tokens = new_token_index; 1613 rec->num_new_states++; 1614 1615 rec->best_token_for_arc[fsm_arc_index] = new_token_index; 1616 token->cost[0] = MAXcostdata; 1617 } 1618 else 1619 { 1620 new_token_index = rec->best_token_for_arc[fsm_arc_index]; 1621 token = &(rec->fsmarc_token_array[ new_token_index]); 1622 } 1623 1624 if (cost < token->cost[0]) 1625 { 1626 token->cost[0] = cost; 1627 token->duration[0] = 1; 1628 token->word_backtrace[0] = fsmnode_token->word_backtrace; 1629 if (token->aword_backtrace[0] != AWTNULL) 1630 free_altword_token_batch(rec, token->aword_backtrace[0]); 1631 token->aword_backtrace[0] = AWTNULL; 1632 token->aword_backtrace[0] = refcopy_altwords(rec, fsmnode_token->aword_backtrace); 1633 1634 if (olabel != WORD_EPSILON_LABEL) 1635 { 1636 token->word[0] = olabel; 1637 //ASSERT(token->aword_backtrace[0] == AWTNULL); 1638 } 1639 else 1640 { 1641 token->word[0] = fsmnode_token->word; 1642 } 1643 ASSERT(token->word[0] != MAXwordID 1644 || token->aword_backtrace[0] == AWTNULL); 1645 if (cost < current_best_cost) 1646 { 1647 current_best_cost = cost; 1648 current_prune_delta = orig_prune_delta; /*if we have a new best cost, the prune delta could go back up*/ 1649 current_prune_thresh = cost + current_prune_delta; 1650 ASSERT((float)cost + (float)current_prune_delta < (float)USHRT_MAX); 1651 } 1652 } 1653 } 1654 } 1655 } 1656 } 1657 rec->best_token_for_node[fsmnode_token->FSMnode_index] = MAXftokenID; /*done with this node - remove it from the array*/ 1658 old_ftoken_index = ftoken_index; 1659 1660 ftoken_index = fsmnode_token->next_token_index; 1661 free_fsmnode_token(rec, old_ftoken_index); /*done with this node - free the token*/ 1662 rec->active_fsmnode_tokens = ftoken_index; /*needed for sanity_check_altwords*/ 1663 } 1664 /*done with all the tokens, set active tokens to NULL*/ 1665 rec->active_fsmnode_tokens = MAXftokenID; 1666 sanity_check_altwords(rec, rec->altword_token_freelist); 1667 1668 *pcurrent_best_cost = current_best_cost; 1669 *pcurrent_prune_delta = current_prune_delta; 1670 1671 return num_fsm_nodes_updated; 1672 } 1673 1674 #if USE_COMP_STATS 1675 void start_front_end_clock(void) 1676 { 1677 if (!comp_stats) 1678 comp_stats = init_comp_stats(); 1679 start_cs_clock(&comp_stats->front_end); 1680 } 1681 void stop_front_end_clock(void) 1682 { 1683 end_cs_clock(&comp_stats->front_end, 1); 1684 } 1685 #endif 1686 1687 1688 /*---------------------------------------------------------------------------* 1689 * * 1690 * begin and end * 1691 * * 1692 *---------------------------------------------------------------------------*/ 1693 1694 /*gets things started for the viterbi search - sets up things for frame 0*/ 1695 1696 int srec_begin(srec *rec, int begin_syn_node) 1697 { 1698 FSMnode* fsm_node; 1699 fsmnode_token *token; 1700 stokenID new_token_index; 1701 nodeID node_index; 1702 arcID arc_index; 1703 1704 if (!rec || !rec->context) 1705 { 1706 log_report("Error: bad inputs to srec_begin()\n"); 1707 return 1; 1708 } 1709 if (!rec->context->whether_prepared) 1710 { 1711 log_report("srec_begin: Grammar not prepared. Compiling!\n"); 1712 FST_PrepareContext(rec->context); 1713 1714 if (!rec->context->whether_prepared) 1715 { 1716 PLogError("ESR_INVALID_STATE: Grammar can not be compiled (FST_PrepareContext failed)"); 1717 return ESR_INVALID_STATE ; 1718 } 1719 } 1720 1721 #if USE_COMP_STATS 1722 if (comp_stats == NULL) 1723 comp_stats = init_comp_stats(); 1724 #endif 1725 /*initialize token storage - not clear we really need this - as long as they 1726 are managed correctly, we should be able to do this on startup - not each utt*/ 1727 initialize_free_fsmarc_tokens(rec); 1728 initialize_free_word_tokens(rec); 1729 initialize_free_fsmnode_tokens(rec); 1730 initialize_word_lattice(rec->word_lattice); 1731 initialize_free_altword_tokens(rec); 1732 1733 if (rec->context->num_nodes > rec->max_fsm_nodes) 1734 { 1735 log_report("Error: srec_begin failing due to too many grammar nodes\n"); 1736 return 1; 1737 } 1738 for (node_index = 0;node_index < rec->context->num_nodes;node_index++) 1739 { 1740 rec->best_token_for_node[node_index] = MAXftokenID; 1741 } 1742 if (rec->context->num_arcs > rec->max_fsm_arcs) 1743 { 1744 log_report("Error: srec_begin failing due to too many grammar arcs\n"); 1745 return 1; 1746 } 1747 for (arc_index = 0;arc_index < rec->context->num_arcs;arc_index++) 1748 { 1749 rec->best_token_for_arc[arc_index] = MAXstokenID; 1750 } 1751 rec->srec_ended = 0; 1752 rec->num_new_states = 0; 1753 rec->current_best_cost = 0; 1754 rec->current_prune_delta = rec->prune_delta; 1755 1756 /*need help from johan - does ths FSM only have one start node? 1757 Which one is it? assume just one and it is node 0*/ 1758 1759 fsm_node = &rec->context->FSMnode_list[ rec->context->start_node]; 1760 node_index = (nodeID) rec->context->start_node; 1761 /* node_index is still 0 at this point */ 1762 1763 /*now we just need to setup an initial fsmnode token (for begin FSM node) and then do epsilon updates*/ 1764 1765 rec->active_fsmarc_tokens = MAXstokenID; 1766 1767 new_token_index = get_free_fsmnode_token(rec, EXIT_IF_NO_TOKENS); 1768 1769 token = &(rec->fsmnode_token_array[new_token_index]); 1770 token->word_backtrace = MAXwtokenID; /* real value set below*/ 1771 token->cost = 0; 1772 token->word = MAXwordID; 1773 token->FSMnode_index = node_index; 1774 token->next_token_index = MAXftokenID; 1775 token->aword_backtrace = AWTNULL; 1776 1777 rec->best_token_for_node[node_index] = new_token_index; 1778 rec->active_fsmnode_tokens = new_token_index; 1779 rec->current_search_frame = 0; 1780 1781 do_epsilon_updates(rec, rec->prune_delta, 0); 1782 return 0; 1783 } 1784 1785 void srec_force_the_end(srec* rec, frameID end_frame, wordID end_word) 1786 { 1787 srec_word_lattice* wl = rec->word_lattice; 1788 wtokenID wtoken_index, tmp; 1789 frameID frame; 1790 wtoken_index = wl->words_for_frame[end_frame]; 1791 if (wtoken_index == MAXwtokenID) 1792 { 1793 for (frame = end_frame - 1; frame > 20; frame--) 1794 { 1795 if (wl->words_for_frame[frame] != MAXwtokenID) 1796 { 1797 word_token* wtoken; 1798 wl->words_for_frame[end_frame] = wl->words_for_frame[frame]; 1799 wl->words_for_frame[frame] = MAXwtokenID; 1800 for (tmp = wl->words_for_frame[end_frame]; tmp != MAXwtokenID; 1801 tmp = wtoken->next_token_index) 1802 { 1803 wtoken = &rec->word_token_array[tmp]; 1804 wtoken->end_time = frame; 1805 wtoken->word = end_word; 1806 wtoken->end_node = rec->context->end_node; 1807 } 1808 #ifdef _WIN32 1809 PLogError(L("Forced an end path at end frame %d/%d)\n"), frame, end_frame); 1810 #endif 1811 break; 1812 } 1813 } 1814 } 1815 } 1816 1817 /* when there are no more frames of input, this functions 1818 kills all paths not ending at the end node and 1819 creates a word linked list even though there is no WORD_BOUNDARY ilabel */ 1820 1821 void srec_no_more_frames(srec* rec) 1822 { 1823 #if USE_COMP_STATS 1824 frameID end_frame = rec->current_search_frame; 1825 #endif 1826 nodeID end_node; 1827 fsmnode_token* ftoken; 1828 ftokenID current_token_index; 1829 costdata current_word_threshold = MAXcostdata; 1830 wtokenID word_token_index; 1831 int any_nodes_blocked = 0; 1832 altword_token* awtoken; 1833 1834 /* this is just for sanity checking, to find out what the state was 1835 at the end of input */ 1836 srec_check_end_of_speech_end(rec); 1837 1838 if (rec->srec_ended) return; 1839 rec->srec_ended = 1; 1840 1841 #if USE_COMP_STATS 1842 comp_stats->total_time += (float)(end_frame / 50.0f); 1843 dump_comp_stats(comp_stats, PSTDOUT); 1844 #endif 1845 1846 end_node = rec->context->end_node; 1847 /*remove all word paths from the priority_q which do not end at end_node 1848 to make space for those being added below */ 1849 remove_non_end_word_from_q(rec, rec->word_priority_q, rec->word_token_array, 1850 end_node); 1851 1852 if (rec->current_search_frame == 0) 1853 return; 1854 1855 rec->accumulated_cost_offset[ rec->current_search_frame] = 1856 rec->accumulated_cost_offset[ rec->current_search_frame-1]; 1857 rec->cost_offset_for_frame[ rec->current_search_frame] = 0; 1858 1859 /* watch out if using the best_token_for_node[] array here 1860 is it valid? not if multiple recognizers, maybe we 1861 should remember best_token_for_end_node separately */ 1862 1863 current_token_index = rec->active_fsmnode_tokens; 1864 while (current_token_index != MAXftokenID) 1865 { 1866 ftoken = &rec->fsmnode_token_array[current_token_index]; 1867 if (ftoken->FSMnode_index == end_node) 1868 { 1869 /* print_fsmnode_token(rec, current_token_index, "fsmnode_token at end_node "); */ 1870 word_token_index = srec_process_word_boundary_nbest(rec, 1871 ftoken->FSMnode_index, 1872 ftoken->word, 1873 ftoken->word_backtrace, 1874 ftoken->cost, ¤t_word_threshold, &any_nodes_blocked); 1875 if (word_token_index != MAXwtokenID) 1876 { 1877 WORD_TOKEN_SET_WD_ETIME( (rec->word_token_array+word_token_index), 1878 rec->word_token_array[word_token_index].end_time - ftoken->silence_duration); 1879 } 1880 /* now also dump alternatives at this last frame, sep19'03 fixed */ 1881 awtoken = ftoken->aword_backtrace; 1882 for (; awtoken != AWTNULL; awtoken = awtoken->next_token) 1883 { 1884 srec_process_word_boundary_nbest(rec, 1885 ftoken->FSMnode_index, 1886 awtoken->word, 1887 awtoken->word_backtrace, 1888 ftoken->cost + awtoken->costdelta, 1889 ¤t_word_threshold, 1890 &any_nodes_blocked); 1891 } 1892 } 1893 current_token_index = ftoken->next_token_index; 1894 } 1895 1896 /* we clobber the word_lattice at the last frame that was created 1897 in do_epsilon_updates() */ 1898 word_token_index = get_word_token_list(rec->word_priority_q, rec->word_token_array); 1899 lattice_add_word_tokens(rec->word_lattice, rec->current_search_frame, word_token_index); 1900 1901 if (FST_IsVoiceEnrollment(rec->context) && word_token_index == MAXwtokenID) 1902 { 1903 srec_force_the_end(rec, rec->current_search_frame, rec->context->end_silence_word); 1904 } 1905 1906 /* find the current_best_cost for this recognizer ... at the end node, 1907 it will be used to decide which recognizer wins! */ 1908 rec->current_best_cost = lattice_best_cost_to_frame(rec->word_lattice, 1909 rec->word_token_array, 1910 rec->current_search_frame); 1911 1912 } 1913 1914 void srec_terminate(srec* rec) 1915 { 1916 frameID ifr; 1917 stokenID stoken_index, next_stoken_index; 1918 fsmarc_token* stoken; 1919 ftokenID ftoken_index, next_ftoken_index; 1920 fsmnode_token* ftoken; 1921 wtokenID wtoken_index, next_wtoken_index; 1922 word_token* wtoken; 1923 1924 /* release all state tokens */ 1925 for (stoken_index = rec->active_fsmarc_tokens; stoken_index != MAXstokenID; 1926 stoken_index = next_stoken_index) 1927 { 1928 stoken = &rec->fsmarc_token_array[ stoken_index]; 1929 next_stoken_index = stoken->next_token_index; 1930 free_fsmarc_token(rec, stoken_index); 1931 } 1932 rec->active_fsmarc_tokens = MAXstokenID; 1933 1934 /* release all fsmnode tokens */ 1935 for (ftoken_index = rec->active_fsmnode_tokens; ftoken_index != MAXftokenID; 1936 ftoken_index = next_ftoken_index) 1937 { 1938 ftoken = &rec->fsmnode_token_array[ ftoken_index]; 1939 next_ftoken_index = ftoken->next_token_index; 1940 free_fsmnode_token(rec, ftoken_index); 1941 } 1942 rec->active_fsmnode_tokens = MAXftokenID; 1943 1944 /* release all word tokens */ 1945 for (ifr = 0; ifr < rec->current_search_frame; ifr++) 1946 { 1947 for (wtoken_index = rec->word_lattice->words_for_frame[ifr]; 1948 wtoken_index != MAXwtokenID; wtoken_index = next_wtoken_index) 1949 { 1950 wtoken = &rec->word_token_array[wtoken_index]; 1951 next_wtoken_index = wtoken->next_token_index; 1952 free_word_token(rec, wtoken_index); 1953 } 1954 rec->word_lattice->words_for_frame[ifr] = MAXwtokenID; 1955 } 1956 rec->current_model_scores[SILENCE_MODEL_INDEX] = DO_NOT_COMPUTE_MODEL; 1957 rec->current_best_cost = MAXcostdata; 1958 rec->srec_ended = 1; 1959 } 1960 /*------------------------------------------------------------------------* 1961 * * 1962 * main work of the viterbi search * 1963 * * 1964 *------------------------------------------------------------------------*/ 1965 1966 /*with new update to FSM node scheme, the sequence of operation is: 1967 1968 for each frame: 1969 1970 1. Handle all internal HMM updates based on new frame observations. This is 1971 done in place with the current list of HMM tokens. 1972 1973 2. For each current active FSM node (from previous frame), activate update 1974 into state 0 (either for existing HMM tokens or for new HMM tokens) by going 1975 through an observation frame (so, only go from an FSM node to a new HMM 1976 token if the first observation frame gets a score above the current pruning 1977 threshold). FSM nodes are freed as this is done. So, no FSMnode tokens are left 1978 at the end of this. 1979 1980 3. Prune. Note that the best score will have already been established for 1981 this frame (so therefore the pruning threshold will not change). 1982 1983 4. reset best cost to 0 (to keep scores in range). We can do this here since we already know the best score. 1984 1985 5. For end hmm states which are above the pruning threshold, create new 1986 FSMnode_tokens. 1987 1988 6. update epsilons, including word boundary arcs (which put words onto the word lattice). 1989 epsilon updates go from FSM node to FSM node. 1990 1991 repeat for next frame based on new FSM nodes and current HMMs 1992 1993 */ 1994 1995 void srec_viterbi_part1(srec *rec, 1996 const SWIModel *acoustic_models, 1997 pattern_info *pattern, 1998 costdata silence_model_cost); 1999 2000 void srec_viterbi_part2(srec *rec); 2001 2002 int multi_srec_viterbi(multi_srec *recm, 2003 srec_eos_detector_parms* eosd, 2004 pattern_info *pattern, 2005 utterance_info* utt_not_used) 2006 { 2007 EOSrc eosrc1 = SPEECH_ENDED, eosrc2 = SPEECH_ENDED; 2008 #if DO_ALLOW_MULTIPLE_MODELS 2009 ASSERT(recm->num_activated_recs == recm->num_swimodels); 2010 if (recm->num_activated_recs == 1) 2011 { 2012 #endif 2013 srec* rec1 = &recm->rec[0]; 2014 #if USE_COMP_STATS 2015 start_cs_clock1(&comp_stats->overall_search); 2016 #endif 2017 if (rec1->current_search_frame >= (rec1->word_lattice->max_frames - 1)) 2018 return 1; 2019 srec_viterbi_part1(&recm->rec[0], recm->swimodel[0], pattern, DO_NOT_COMPUTE_MODEL); 2020 reset_best_cost_to_zero(rec1, rec1->current_best_cost); 2021 reset_cost_offsets(recm, rec1->current_search_frame, rec1->current_best_cost); 2022 rec1->current_prune_delta = rec1->prune_delta; 2023 rec1->current_best_cost = 0; 2024 srec_viterbi_part2(&recm->rec[0]); 2025 eosrc1 = srec_check_end_of_speech(eosd, &recm->rec[0]); 2026 #if USE_COMP_STATS 2027 end_cs_clock1(&comp_stats->overall_search, 1); 2028 #endif 2029 2030 SREC_STATS_UPDATE(&recm->rec[0]); 2031 recm->eos_status = eosrc1; 2032 #if DO_ALLOW_MULTIPLE_MODELS 2033 } 2034 else if (recm->num_activated_recs == 2) 2035 { 2036 srec* rec1 = &recm->rec[0]; 2037 srec* rec2 = &recm->rec[1]; 2038 const SWIModel* acoustic_models1 = recm->swimodel[0]; 2039 const SWIModel* acoustic_models2 = recm->swimodel[1]; 2040 costdata diff; 2041 costdata current_best_cost; 2042 2043 ASSERT(rec1->prune_delta == rec2->prune_delta); 2044 /* in part 1 we need to operate by adjusting the prune delta, 'cuz we want 2045 to operate on scores after consumption of a frame */ 2046 if ((rec1->current_best_cost > MAXcostdata / 2 && !rec1->srec_ended) || 2047 (rec2->current_best_cost > MAXcostdata / 2 && !rec2->srec_ended)) 2048 { 2049 printf("hey %d %d\n", rec1->current_best_cost, rec2->current_best_cost); 2050 } 2051 2052 /* figure out the prune_delta for the different genders, we 2053 want that pruning should be joint (i.e. prune male and 2054 female relative to overall best). Before part1 we don't 2055 yet know the overall best, so we use the gender score gap 2056 from the last frame, and make the prune the worse gender 2057 accordingly more aggressive */ 2058 2059 if (!rec2->srec_ended && rec1->current_best_cost < rec2->current_best_cost) 2060 { 2061 diff = rec2->current_best_cost - rec1->current_best_cost; 2062 if (rec2->current_search_frame >= (rec2->word_lattice->max_frames - 1)) 2063 { 2064 return 1; 2065 } 2066 if (diff > rec2->prune_delta) 2067 { 2068 srec_terminate(rec2); 2069 #ifdef SREC_ENGINE_VERBOSE_LOGGING 2070 PLogMessage("T: terminate_viterbi(rec2) @%d", rec2->current_search_frame); 2071 #endif 2072 } 2073 else 2074 rec2->current_prune_delta = rec2->prune_delta - diff; 2075 rec1->current_prune_delta = rec1->prune_delta; 2076 } 2077 else if (!rec1->srec_ended) 2078 { 2079 if (rec1->current_search_frame >= (rec1->word_lattice->max_frames - 1)) 2080 { 2081 return 1; 2082 } 2083 diff = rec1->current_best_cost - rec2->current_best_cost; 2084 if (diff > rec1->prune_delta) 2085 { 2086 srec_terminate(rec1); 2087 #ifdef SREC_ENGINE_VERBOSE_LOGGING 2088 PLogMessage("T: terminate_viterbi(rec1) @%d", rec1->current_search_frame); 2089 #endif 2090 } 2091 else 2092 rec1->current_prune_delta = rec1->prune_delta - diff; 2093 rec2->current_prune_delta = rec2->prune_delta; 2094 } 2095 2096 /* now run part1 for each gender */ 2097 if (!rec1->srec_ended) 2098 { 2099 srec_viterbi_part1(rec1, acoustic_models1, pattern, DO_NOT_COMPUTE_MODEL); 2100 SREC_STATS_UPDATE(rec1); 2101 } 2102 2103 if (!rec2->srec_ended) 2104 { 2105 srec_viterbi_part1(rec2, acoustic_models2, pattern, rec1->current_model_scores[SILENCE_MODEL_INDEX]); 2106 SREC_STATS_UPDATE(rec2); 2107 } 2108 2109 /* now adjust score offsets, score offsets are shared across genders */ 2110 2111 if (rec1->current_best_cost <= rec2->current_best_cost) 2112 { 2113 /* am1 is winning, prune 2 harder */ 2114 current_best_cost = rec1->current_best_cost; 2115 reset_cost_offsets(recm, rec1->current_search_frame, current_best_cost); 2116 } 2117 else 2118 { 2119 /* am2 is winning, prune 1 harder */ 2120 current_best_cost = rec2->current_best_cost; 2121 reset_cost_offsets(recm, rec2->current_search_frame, current_best_cost); 2122 } 2123 2124 /* jean: some cleanup needed here */ 2125 /** best_token_for_arc = rec1->best_token_for_arc; 2126 rec1->best_token_for_arc = 0; **/ 2127 if (!rec1->srec_ended) 2128 { 2129 reset_best_cost_to_zero(rec1, current_best_cost); 2130 rec1->current_best_cost = (costdata)(rec1->current_best_cost - (costdata) current_best_cost); 2131 srec_viterbi_part2(rec1); 2132 if (rec1->active_fsmnode_tokens == MAXftokenID) 2133 srec_terminate(rec1); 2134 if (!rec1->srec_ended) 2135 eosrc1 = srec_check_end_of_speech(eosd, rec1); 2136 } 2137 /** rec1->best_token_for_arc = best_token_for_arc; 2138 best_token_for_arc = rec2->best_token_for_arc; 2139 rec2->best_token_for_arc = 0; **/ 2140 if (!rec2->srec_ended) 2141 { 2142 reset_best_cost_to_zero(rec2, current_best_cost); 2143 rec2->current_best_cost = (costdata)(rec2->current_best_cost - (costdata) current_best_cost); 2144 srec_viterbi_part2(rec2); 2145 if (rec2->active_fsmnode_tokens == MAXftokenID) 2146 srec_terminate(rec2); 2147 if (!rec2->srec_ended) 2148 eosrc2 = srec_check_end_of_speech(eosd, rec2); 2149 } 2150 /** rec2->best_token_for_arc = best_token_for_arc; **/ 2151 SREC_STATS_UPDATE(rec1); 2152 SREC_STATS_UPDATE(rec2); 2153 recm->eos_status = eosrc1; 2154 if (rec1->current_best_cost > rec2->current_best_cost) 2155 recm->eos_status = eosrc2; 2156 } 2157 #endif 2158 return 0; 2159 } 2160 2161 2162 void srec_viterbi_part1(srec *rec, 2163 const SWIModel *acoustic_models, 2164 pattern_info *pattern, 2165 costdata silence_model_cost) 2166 { 2167 costdata current_best_cost; 2168 /* costdata current_prune_thresh; */ 2169 costdata current_prune_delta; 2170 /* the score difference for pruning - can get adjusted below if 2171 pruning gets tighted to keep array sizes in check*/ 2172 costdata *current_model_scores; 2173 int num_models_computed; 2174 nodeID num_fsm_nodes_updated; 2175 2176 #if USE_COMP_STATS 2177 start_cs_clock(&comp_stats->models); 2178 #endif 2179 2180 /*first go ahead and compute scores for all models which are needed by the search at this point*/ 2181 2182 2183 find_which_models_to_compute(rec, acoustic_models); 2184 /* communication happens via rec->current_model_scores */ 2185 #define SCORE_FIRST_SILENCE_ONLY 2186 #ifdef SCORE_FIRST_SILENCE_ONLY 2187 if (silence_model_cost != DO_NOT_COMPUTE_MODEL) 2188 rec->current_model_scores[SILENCE_MODEL_INDEX] = silence_model_cost; 2189 #endif 2190 num_models_computed = compute_model_scores(rec->current_model_scores, acoustic_models, pattern, rec->current_search_frame); 2191 rec->best_model_cost_for_frame[rec->current_search_frame] = best_uint16(rec->current_model_scores, acoustic_models->num_hmmstates); 2192 2193 #if USE_COMP_STATS 2194 end_cs_clock(&comp_stats->models, num_models_computed); 2195 start_cs_clock(&comp_stats->internal_hmm); 2196 #endif 2197 2198 /*get some things out of the rec structure to make things a bit faster*/ 2199 current_model_scores = rec->current_model_scores; 2200 2201 /*update search to next frame*/ 2202 current_best_cost = MAXcostdata - ((costdata)2) * rec->prune_delta; /*to avoid overflows, must clean up later */ 2203 /* current_prune_thresh = MAXcostdata; */ 2204 current_prune_delta = rec->current_prune_delta; 2205 2206 /* srec_stats_update(rec, "(...0) "); */ 2207 /*------------------------------------------------------------------------* 2208 1. Handle all internal HMM updates based on new frame observations. This is 2209 done in place with the current list of HMM tokens. 2210 *------------------------------------------------------------------------*/ 2211 2212 update_internal_hmm_states(rec, ¤t_prune_delta, ¤t_best_cost, current_model_scores); 2213 2214 /* check_if_any_token_better_than_best_cost(rec, rec->active_fsmarc_tokens, current_best_cost, "after update into new");*/ 2215 2216 #if USE_COMP_STATS 2217 end_cs_clock(&comp_stats->internal_hmm, rec->num_new_states); 2218 start_cs_clock(&comp_stats->fsm_to_hmm); 2219 #endif 2220 2221 /* srec_stats_update(rec, "(...1) "); */ 2222 /*------------------------------------------------------------------------* 2223 2. For each current active FSM node (from previous frame), activate update 2224 into state 0 (either for existing HMM tokens or for new HMM tokens) by going 2225 through an observation frame (so, only go from an FSM node to a new HMM 2226 token if the first observation frame gets a score above the current pruning 2227 threshold). FSM nodes are freed as this is done. So, no FSMnode tokens are left 2228 at the end of this. 2229 *------------------------------------------------------------------------*/ 2230 2231 num_fsm_nodes_updated = (nodeID) update_from_current_fsm_nodes_into_new_HMMs(rec, ¤t_prune_delta, ¤t_best_cost, current_model_scores); 2232 /* srec_stats_update(rec, "(...2) "); */ 2233 /*------------------------------------------------------------------------* 2234 3. Prune. Note that the best score will have already been established for 2235 this frame (so therefore the pruning threshold will not change). 2236 *------------------------------------------------------------------------*/ 2237 2238 #if USE_COMP_STATS 2239 end_cs_clock(&comp_stats->fsm_to_hmm, num_fsm_nodes_updated); 2240 start_cs_clock(&comp_stats->prune); 2241 #endif 2242 2243 prune_new_tokens(rec, (costdata)(current_best_cost + current_prune_delta)); 2244 2245 /* it's nice to do word token pruning here 'cuz we only need to traceback 2246 the active_fsmarc_tokens, active_fsmnode_tokens are propogated thereto */ 2247 2248 reprune_word_tokens_if_necessary(rec); 2249 2250 rec->current_prune_delta = current_prune_delta; 2251 rec->current_best_cost = current_best_cost; 2252 /* srec_stats_update(rec, "(...3) "); */ 2253 #if USE_COMP_STATS 2254 end_cs_clock(&comp_stats->prune, rec->num_new_states); 2255 #endif 2256 } 2257 2258 void srec_viterbi_part2(srec *rec) 2259 { 2260 wtokenID word_token_index; 2261 nodeID inode, num_fsm_nodes_updated; 2262 costdata current_prune_delta = rec->current_prune_delta; 2263 costdata current_best_cost = rec->current_best_cost; 2264 ftokenID* ftmp; 2265 int num_updates; 2266 2267 /* first we clear the best_token_for_node array, there are no live 2268 fsmnode_tokens at this point, and we don't want leftovers from 2269 the last frame */ 2270 ftmp = rec->best_token_for_node; 2271 for (inode = 0; inode < rec->context->num_nodes; inode++) 2272 *ftmp++ = MAXftokenID; 2273 2274 /*------------------------------------------------------------------------* 2275 4. reset best cost to 0 (to keep scores in range). We can do this here 2276 since we already know the best score. This is done here so that 2277 no fsmnode tokens (there are none active now) need updating. This is also 2278 done here before epsilons - that way we don't need to update the word 2279 tokens . 2280 2281 We assume this was done just before part2. 2282 *------------------------------------------------------------------------*/ 2283 2284 #if USE_COMP_STATS 2285 start_cs_clock(&comp_stats->hmm_to_fsm); 2286 #endif 2287 2288 /*------------------------------------------------------------------------* 2289 5. For end hmm states which are above the pruning threshold, create new 2290 FSMnode_tokens. 2291 *------------------------------------------------------------------------*/ 2292 2293 num_updates = update_from_hmms_to_fsmnodes(rec, current_prune_delta, current_best_cost); 2294 if (num_updates == 0) 2295 { 2296 num_updates = update_from_hmms_to_fsmnodes(rec, 2 * current_prune_delta, current_best_cost); 2297 SREC_STATS_INC_FORCED_UPDATES(); 2298 } 2299 SREC_STATS_UPDATE(rec); 2300 2301 #if USE_COMP_STATS 2302 end_cs_clock(&comp_stats->hmm_to_fsm, rec->num_new_states); 2303 start_cs_clock(&comp_stats->epsilon); 2304 #endif 2305 2306 /* srec_stats_update(rec, "(...5) "); */ 2307 2308 /*------------------------------------------------------------------------* 2309 6. update epsilons, including word boundary arcs (which put words onto the word lattice). 2310 epsilon updates go from FSM node to FSM node. 2311 *------------------------------------------------------------------------*/ 2312 2313 /*clear priority_q for this frame*/ 2314 clear_priority_q(rec->word_priority_q); 2315 2316 num_fsm_nodes_updated = (nodeID) do_epsilon_updates(rec, current_prune_delta, current_best_cost); 2317 2318 #if USE_COMP_STATS 2319 end_cs_clock(&comp_stats->epsilon, num_fsm_nodes_updated); 2320 #endif 2321 2322 /* srec_stats_update(rec, "(...6) "); */ 2323 rec->current_search_frame++; 2324 2325 /* no need to prune again after epsilons since they add no new cost - if we 2326 add costs to epsilon arcs (at word boundaries for example), add another 2327 pruning stage */ 2328 2329 word_token_index = get_word_token_list(rec->word_priority_q, rec->word_token_array); 2330 lattice_add_word_tokens(rec->word_lattice, rec->current_search_frame, word_token_index); 2331 } 2332 2333 /* get the top choice, trace it back, and find out where speech starts 2334 and ends. this is used for channel normalization */ 2335 2336 static srec* WHICH_RECOG(multi_srec* rec) 2337 { 2338 #if DO_ALLOW_MULTIPLE_MODELS 2339 srec* return_rec = NULL; 2340 costdata current_best_cost = MAXcostdata; 2341 int i = 0; 2342 for (i = 0; i < rec->num_activated_recs; i++) 2343 { 2344 if (current_best_cost > rec->rec[i].current_best_cost) 2345 { 2346 current_best_cost = rec->rec[i].current_best_cost; 2347 return_rec = &rec->rec[i]; 2348 } 2349 } 2350 return return_rec; 2351 #else 2352 return &rec->rec[0]; 2353 #endif 2354 } 2355 2356 void multi_srec_get_speech_bounds(multi_srec* recm, frameID* start_frame, frameID* end_frame) 2357 { 2358 frameID csf; 2359 wtokenID token_index; 2360 wordID last_word; 2361 srec* rec = WHICH_RECOG(recm); 2362 2363 *start_frame = *end_frame = 0; 2364 2365 if (!rec) 2366 return; 2367 csf = rec->current_search_frame; 2368 token_index = rec->word_lattice->words_for_frame[csf]; 2369 last_word = MAXwordID; 2370 while (token_index != MAXwtokenID) 2371 { 2372 word_token* wtoken = &rec->word_token_array[token_index]; 2373 word_token* next_wtoken; 2374 2375 if (wtoken->word == rec->context->beg_silence_word) 2376 { 2377 if (*start_frame == 0) *start_frame = wtoken->end_time; 2378 } 2379 if (wtoken->word == rec->context->hack_silence_word) 2380 { 2381 if (wtoken->backtrace != MAXwtokenID) 2382 { 2383 next_wtoken = &rec->word_token_array[wtoken->backtrace]; 2384 if (next_wtoken->word == rec->context->beg_silence_word) 2385 *start_frame = wtoken->end_time; 2386 } 2387 } 2388 2389 if (last_word == rec->context->end_silence_word) 2390 { 2391 *end_frame = wtoken->end_time; 2392 if (wtoken->word == rec->context->hack_silence_word 2393 && wtoken->backtrace != MAXwtokenID) 2394 { 2395 next_wtoken = &rec->word_token_array[wtoken->backtrace]; 2396 *end_frame = WORD_TOKEN_GET_WD_ETIME( next_wtoken); 2397 } 2398 } 2399 if (token_index == wtoken->backtrace) 2400 { 2401 /* infinite loop! */ 2402 PLogError ("warning: breaking infinite loop\n"); 2403 *end_frame = 0; 2404 break; 2405 } 2406 token_index = wtoken->backtrace; 2407 last_word = wtoken->word; 2408 } 2409 } 2410 2411 int multi_srec_get_eos_status(multi_srec* rec) 2412 { 2413 int rc; 2414 ASSERT(rec); 2415 rc = (int)rec->eos_status; 2416 if (rc < 0) rc = 0; 2417 return rc; 2418 } 2419 2420 /* 2421 ToDo List: 2422 2423 end-pointing 2424 duration 2425 channel normalization 2426 re-use and appropriate killing of word_tokens 2427 pruning fsmnode_tokens 2428 astar backward for alternative choices 2429 2430 minimized graphs and word merging 2431 Johans idea: 2432 When propagating a fsmarc_token, we need to remember the word.id when it 2433 is observed. Let's continue to use fsmarc_token->word[] to remember those. 2434 When merging 2+ fsmarc_tokens into a fsmnode_token, we need remember 2435 both histories, not just the best. All histories and maintained on a linked 2436 list, with word_token->next_token_index serving as links, somehow we also 2437 remember the cost offset from one link to the next and keep track of that. 2438 Try to create the word_token as late a possible, so as to keep usage down. 2439 The list should be sorted so that we can drop things off the end, Ie. don't 2440 need to keep all word, a max of 10 is fine cuz that's the most we'll need 2441 to drop off at a .wb anyways! 2442 2443 altwords .. working .. now cpu optimize ... ideas 2444 use only the head refcount, #define the refcopy, not a function 2445 free_altword_token_batch() should not double check for AWTNULL 2446 BUILD & BUILD_DEBUG in selected areas 2447 reprune_altword_token_batch ... change costbasis to a tag ... to say (already repruned) 2448 2449 2450 2451 endpointing 2452 at grammar prepare ... 2453 get the list of endnodes ... get the list of opendnodes 2454 ... start from the graph's endnode, walk backwards on all null or silence arcs, find the nodes which have a silence or null path to the end: those are sinknodes 2455 ... sinknodes are endnodes or opendnodes ... the sinknodesO are the sinknodes that do go to speech arcs .. the sinknodes1 are the sinknodes that do not go to any speech arcs 2456 ... walkforward all sinknodes0 through iwt arcs, those are openendnodes 2457 ... walkforward all sinknodes1 through iwt arcs, those are endnodes 2458 get the top score fsmnode_token ... 2459 ... is it on an endnode ... has this been the top choice for the last 30 frames 2460 ... is it on an optional endnode ... has this neen the top choice for the last 50 frames? 2461 2462 */ 2463