1 /*---------------------------------------------------------------------------* 2 * word_lattice.c * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 #ifndef _RTT 21 #include "pstdio.h" 22 #endif 23 #include <stdlib.h> 24 #include <string.h> 25 #include <math.h> 26 #include "passert.h" 27 #include "portable.h" 28 29 30 #include "portable.h" 31 32 #include "hmm_desc.h" 33 #include "utteranc.h" 34 #include "hmmlib.h" 35 36 #include "srec_sizes.h" 37 #include "search_network.h" 38 #include "srec.h" 39 #include "word_lattice.h" 40 #include "astar.h" 41 #include "srec_stats.h" 42 #include "srec_results.h" 43 44 #define PRINT_WORD_LATTICE 0 45 #define PRINT_SEARCH_DETAILS 0 46 47 #define TRUE_KILL_WTOKEN(WT) { WT.cost = MAXcostdata; \ 48 WT.word = MAXwordID; \ 49 WT.end_time = MAXframeID; \ 50 WT.end_node = MAXnodeID; \ 51 WT.backtrace = MAXwtokenID; \ 52 } 53 54 srec_word_lattice *allocate_word_lattice(frameID max_frames) 55 { 56 srec_word_lattice *wl; 57 58 wl = (srec_word_lattice*) CALLOC_CLR(1, sizeof(srec_word_lattice), "search.word_lattice.base"); 59 wl->max_frames = max_frames; 60 wl->words_for_frame = (wtokenID*) CALLOC_CLR(max_frames, sizeof(wtokenID), "search.word_lattice.words"); 61 62 wl->whether_sorted = (asr_int16_t*)CALLOC_CLR(max_frames, sizeof(asr_int16_t), "search.word_lattice.sflag"); 63 64 return wl; 65 } 66 67 void destroy_word_lattice(srec_word_lattice* wl) 68 { 69 FREE(wl->words_for_frame); 70 FREE(wl->whether_sorted); 71 FREE(wl); 72 } 73 74 void initialize_word_lattice(srec_word_lattice* wl) 75 { 76 frameID ifr; 77 for (ifr = 0; ifr < wl->max_frames; ifr++) 78 { 79 wl->words_for_frame[ifr] = MAXwtokenID; 80 wl->whether_sorted[ifr] = 0; 81 } 82 } 83 84 costdata lattice_best_cost_to_frame(srec_word_lattice *wl, word_token* word_token_array, frameID ifr) 85 { 86 int sanity_counter = 0; 87 costdata best_cost = MAXcostdata; 88 wtokenID wtoken_index = wl->words_for_frame[ ifr]; 89 while (wtoken_index != MAXwtokenID) 90 { 91 word_token* wtoken = &word_token_array[wtoken_index]; 92 if (sanity_counter++ > 200) return MAXcostdata; 93 wtoken_index = wtoken->next_token_index; 94 if (best_cost > wtoken->cost) 95 best_cost = wtoken->cost; 96 } 97 return best_cost; 98 } 99 100 void lattice_add_word_tokens(srec_word_lattice *wl, frameID frame, 101 wtokenID word_token_list_head) 102 { 103 if (frame >= wl->max_frames) 104 { 105 log_report("lattice_add_word_tokens: max_frame not big enough\n"); 106 ASSERT(0); 107 } 108 wl->words_for_frame[frame] = word_token_list_head; 109 } 110 111 void print_word_token_backtrace(srec* rec, wtokenID wtoken_index, char* tail) 112 { 113 char* null = "NULL", *p; 114 char iwttime[8] = { 0, 0, 0, 0, 0, 0, 0, 0}; 115 bigcostdata cost; 116 bigcostdata cost_for_word; 117 word_token *wtoken, *last_wtoken; 118 119 last_wtoken = NULL; 120 while (wtoken_index != MAXwtokenID) 121 { 122 wtoken = &rec->word_token_array[wtoken_index]; 123 if (wtoken->word < rec->context->olabels->num_words) 124 p = rec->context->olabels->words[wtoken->word]; 125 else 126 p = null; 127 ASSERT(!last_wtoken || last_wtoken->end_time > wtoken->end_time); 128 ASSERT(rec->accumulated_cost_offset[ wtoken->end_time] != 0); 129 cost = wtoken->cost + rec->accumulated_cost_offset[ wtoken->end_time]; 130 131 if (wtoken->backtrace != MAXwtokenID) 132 { 133 word_token* next_wtoken = &rec->word_token_array[wtoken->backtrace]; 134 cost_for_word = cost - next_wtoken->cost - rec->accumulated_cost_offset[ next_wtoken->end_time]; 135 } 136 else 137 { 138 cost_for_word = cost; 139 } 140 sprintf(iwttime, "/%d", WORD_TOKEN_GET_WD_ETIME(wtoken) ); 141 PLogMessage (" (%d W%d %s cost=%d/%d/%d time=%d%s node=%d)", wtoken_index, wtoken->word, p, wtoken->cost, cost, cost_for_word, wtoken->end_time, iwttime, wtoken->end_node); 142 fflush(stdout); 143 ASSERT(wtoken->backtrace != wtoken_index); 144 wtoken_index = wtoken->backtrace; 145 last_wtoken = wtoken; 146 } 147 PLogMessage (tail); 148 } 149 150 int sprint_bword_token_backtrace(char* buf, int buflen, srec* rec, wtokenID wtoken_index) 151 { 152 char* null = "NULL", *p; 153 char *pbuf = buf; 154 *pbuf = 0; 155 156 while (wtoken_index != MAXwtokenID) 157 { 158 word_token* wtoken = &rec->word_token_array[wtoken_index]; 159 p = null; 160 if (wtoken->word < rec->context->olabels->num_words) 161 p = rec->context->olabels->words[wtoken->word]; 162 ASSERT(pbuf + strlen(p) + 1 < buf + buflen); 163 pbuf += sprintf(pbuf, "%s ", p); 164 ASSERT(wtoken->backtrace != wtoken_index); 165 166 wtoken_index = wtoken->backtrace; 167 } 168 if (pbuf > buf && *(pbuf - 1) == ' ') *(pbuf - 1) = 0; 169 return 0; 170 } 171 172 #define ERROR_TRANSCRIPTION_TOO_LONG -1 173 174 ESR_ReturnCode sprint_word_token_backtraceByWordID(wordID* wordIDs, size_t* len, srec* rec, wtokenID wtoken_index) 175 { 176 size_t i, currentLen = 0; 177 ESR_ReturnCode rc; 178 word_token* wtoken; 179 180 #if PRINT_SEARCH_DETAILS 181 printf("in get backtrace wtoken %d\n", wtoken_index); 182 #endif 183 184 while (wtoken_index != MAXwtokenID) 185 { 186 if (*len <= currentLen) 187 { 188 rc = ESR_BUFFER_OVERFLOW; 189 PLogError(ESR_rc2str(rc)); 190 *len = currentLen + 1; 191 goto CLEANUP; 192 } 193 wtoken = &rec->word_token_array[wtoken_index]; 194 wordIDs[currentLen] = wtoken->word; 195 ++currentLen; 196 197 if (wtoken_index == wtoken->backtrace) 198 { 199 *len = 0; 200 PLogError("Result is loopy, rejecting"); 201 return ESR_INVALID_STATE; 202 } 203 wtoken_index = wtoken->backtrace; 204 } 205 206 /* reverse the order */ 207 for (i = 0; i < currentLen / 2; i++) 208 { 209 wordID tmp = wordIDs[i]; 210 wordIDs[i] = wordIDs[(currentLen-1-i)]; 211 wordIDs[(currentLen-1-i)] = tmp; 212 } 213 /* strip the pau/pau2 markers */ 214 if (currentLen >= 1 && wordIDs[0] == rec->context->beg_silence_word) 215 { 216 for (i = 0; i < currentLen - 1; i++) 217 wordIDs[i] = wordIDs[i+1]; 218 currentLen--; 219 } 220 if (currentLen >= 1 && wordIDs[currentLen-1] == rec->context->end_silence_word) 221 currentLen--; 222 wordIDs[currentLen] = MAXwordID; 223 *len = currentLen; 224 225 return ESR_SUCCESS; 226 CLEANUP: 227 return rc; 228 } 229 230 int sprint_word_token_backtrace(char *transcription, int len, srec* rec, wtokenID wtoken_index) 231 { 232 char *w; 233 char *from_p; 234 char *to_p; 235 char *end; 236 char *tr_end = transcription; 237 int wlen; 238 239 #define SHOW_END_TIMES 1 240 #if SHOW_END_TIMES 241 char buf[256/*64*/]; 242 #endif 243 244 *transcription = 0; 245 246 #if PRINT_SEARCH_DETAILS 247 printf("in get backtrace wtoken %d\n", wtoken_index); 248 #endif 249 250 while (wtoken_index != MAXwtokenID) 251 { 252 word_token* wtoken = &rec->word_token_array[wtoken_index]; 253 #if PRINT_SEARCH_DETAILS 254 printf("got token %d word %d\n", wtoken_index, wtoken->word); 255 #endif 256 257 w = "NULL"; 258 if (wtoken->word < rec->context->olabels->num_words) 259 w = rec->context->olabels->words[wtoken->word]; 260 #if SHOW_END_TIMES 261 /* should be defined outside because it is used outside by w */ 262 /* sprintf(buf,"%s@%d.%d",w, WORD_TOKEN_GET_WD_ETIME(wtoken), wtoken->end_time); */ 263 if (strlen(w) + 12 > sizeof(buf)) 264 { 265 *transcription = 0; 266 return ERROR_TRANSCRIPTION_TOO_LONG; 267 } 268 else 269 { 270 sprintf(buf, "%s@%d", w, wtoken->end_time); 271 w = &buf[0]; 272 } 273 #endif 274 wlen = strlen(w); 275 if (wlen + tr_end - transcription + 1 >= len) 276 { 277 *transcription = 0; 278 return ERROR_TRANSCRIPTION_TOO_LONG; 279 } 280 /*need to tack onto beginning, so move string over*/ 281 from_p = tr_end; 282 to_p = tr_end + wlen + 1; 283 tr_end = to_p; 284 while (from_p >= transcription) *(to_p--) = *(from_p--); 285 286 /* add a space*/ 287 *to_p = ' '; 288 289 /*add the new word*/ 290 to_p = transcription; 291 end = to_p + wlen; 292 293 while (to_p < end) *(to_p++) = *(w++); 294 295 if (wtoken_index == wtoken->backtrace) 296 { 297 *transcription = 0; 298 #if BUILD&BUILD_DEBUG 299 printf("Error: result is loopy, rejecting\n"); 300 #endif 301 return ERROR_RESULT_IS_LOOPY; 302 } 303 wtoken_index = wtoken->backtrace; 304 } 305 return 0; 306 } 307 308 void print_word_token(srec* rec, wtokenID wtoken_index, char* msg) 309 { 310 bigcostdata cost, cost_for_word; 311 char *p = "NULL"; 312 word_token* wtoken = &rec->word_token_array[wtoken_index]; 313 314 PLogMessage ( msg ); 315 if (wtoken->word < rec->context->olabels->num_words) 316 p = rec->context->olabels->words[wtoken->word]; 317 ASSERT(rec->accumulated_cost_offset[ wtoken->end_time] != 0); 318 cost = wtoken->cost + rec->accumulated_cost_offset[wtoken->end_time]; 319 if (wtoken->backtrace != MAXwtokenID) 320 { 321 word_token* next_wtoken = &rec->word_token_array[wtoken->backtrace]; 322 cost_for_word = cost - next_wtoken->cost - rec->accumulated_cost_offset[next_wtoken->end_time]; 323 } 324 else 325 { 326 cost_for_word = cost; 327 } 328 printf("wtoken %d W%i %s cost=%d/%d/%d time=%d/%d node=%d", wtoken_index, 329 wtoken->word, p, wtoken->cost, cost, cost_for_word, wtoken->end_time, WORD_TOKEN_GET_WD_ETIME(wtoken), wtoken->end_node); 330 pfflush(PSTDOUT); 331 print_word_token_backtrace(rec, wtoken->backtrace, "\n"); 332 } 333 334 335 void print_word_token_list(srec* rec, wtokenID wtoken_index, char* msg) 336 { 337 #ifndef NDEBUG 338 int sanity_counter = 0; 339 #endif 340 PLogMessage ( msg ); 341 while (wtoken_index != MAXwtokenID) 342 { 343 word_token* wtoken = &rec->word_token_array[wtoken_index]; 344 print_word_token(rec, wtoken_index, ""); 345 ASSERT(sanity_counter++ < 200); 346 ASSERT(wtoken_index != wtoken->next_token_index); 347 wtoken_index = wtoken->next_token_index; 348 } 349 } 350 351 #define MAX_LEN 256 352 void srec_get_result(srec *rec) 353 { 354 srec_word_lattice *wl; 355 frameID i; 356 wtokenID token_index; 357 word_token *wtoken; 358 359 #if PRINT_SEARCH_DETAILS 360 printf("in srec_get_result\n"); 361 #endif 362 363 wl = rec->word_lattice; 364 #if PRINT_WORD_LATTICE 365 for (i = 0; i <= rec->current_search_frame; i++) 366 { 367 #else 368 for (i = rec->current_search_frame; i <= rec->current_search_frame; i++) 369 { 370 #endif 371 372 /* put the best choice at the top */ 373 sort_word_lattice_at_frame(rec, i); 374 token_index = wl->words_for_frame[i]; 375 376 #if PRINT_WORD_LATTICE 377 printf("----- List of words for frame %d\n", i); 378 print_word_token_list(rec, token_index, ""); 379 #endif 380 381 if (i == rec->current_search_frame && token_index != MAXwtokenID) 382 { 383 wtoken = &(rec->word_token_array[token_index]); 384 print_word_token(rec, token_index, "Final Top Choice: "); 385 } 386 } 387 } 388 389 static srec* WHICH_RECOG(multi_srec* rec) 390 { 391 srec* return_rec = NULL; 392 costdata current_best_cost = MAXcostdata; 393 int i = 0; 394 #if DO_ALLOW_MULTIPLE_MODELS 395 for (i = 0; i < rec->num_activated_recs; i++) 396 { 397 #endif 398 if (current_best_cost > rec->rec[i].current_best_cost) 399 { 400 current_best_cost = rec->rec[i].current_best_cost; 401 return_rec = &rec->rec[i]; 402 } 403 #if DO_ALLOW_MULTIPLE_MODELS 404 } 405 #endif 406 return return_rec; 407 } 408 409 ESR_ReturnCode srec_get_top_choice_wordIDs(multi_srec* recm, wordID* wordIDs, size_t* len) 410 { 411 srec* rec = WHICH_RECOG(recm); 412 frameID end_frame; 413 srec_word_lattice* wl; 414 wtokenID token_index; 415 ESR_ReturnCode rc; 416 417 if (!rec) 418 { 419 rc = ESR_INVALID_STATE; 420 PLogError(ESR_rc2str(rc)); 421 goto CLEANUP; 422 } 423 424 end_frame = rec->current_search_frame; 425 wl = rec->word_lattice; 426 token_index = wl->words_for_frame[end_frame]; 427 428 if (token_index == MAXwtokenID) 429 { 430 PLogError(L("ESR_INVALID_STATE")); 431 return ESR_INVALID_STATE; 432 } 433 #if PRINT_WORD_LATTICE 434 print_word_token_list(rec, token_index, "WORD TOKENS AT END\n"); 435 #endif 436 /* the head of the list on the last frame is always best */ 437 CHKLOG(rc, sprint_word_token_backtraceByWordID(wordIDs, len, rec, token_index)); 438 439 return ESR_SUCCESS; 440 CLEANUP: 441 return rc; 442 } 443 444 int srec_get_top_choice_transcription(multi_srec* recm, char *transcription, int len, int whether_strip_slot_markers) 445 { 446 int rc; 447 srec* rec = WHICH_RECOG(recm); 448 frameID end_frame; 449 srec_word_lattice* wl; 450 wtokenID token_index; 451 452 if (!rec) 453 { 454 *transcription = 0; 455 return 1; 456 } 457 if( recm->eos_status == VALID_SPEECH_NOT_YET_DETECTED) 458 { 459 *transcription = 0; 460 return 1; 461 } 462 463 end_frame = rec->current_search_frame; 464 wl = rec->word_lattice; 465 sort_word_lattice_at_frame(rec, end_frame); 466 token_index = wl->words_for_frame[end_frame]; 467 468 if (token_index != MAXwtokenID) 469 { 470 #if PRINT_WORD_LATTICE 471 print_word_token_list(rec, token_index, "WORD TOKENS AT END\n"); 472 #endif 473 /* the head of the list on the last frame is always best */ 474 rc = sprint_word_token_backtrace(transcription, len, rec, token_index); 475 } 476 else 477 { 478 strcpy(transcription, ""); 479 rc = 1; 480 } 481 if (whether_strip_slot_markers) 482 srec_result_strip_slot_markers(transcription); 483 return rc; 484 } 485 486 int srec_get_top_choice_score(multi_srec* recm, bigcostdata *cost, int do_incsil) 487 { 488 srec* rec = WHICH_RECOG(recm); 489 frameID end_frame; 490 srec_word_lattice* wl; 491 wtokenID token_index; 492 word_token* wtoken; 493 494 if (!rec) 495 { 496 *cost = MAXcostdata; 497 return 1; 498 } 499 500 end_frame = rec->current_search_frame; 501 wl = rec->word_lattice; 502 token_index = wl->words_for_frame[end_frame]; 503 504 if (end_frame < MAXframeID && token_index != MAXwtokenID) 505 { 506 wtoken = &rec->word_token_array[token_index]; 507 *cost = wtoken->cost; 508 *cost += rec->accumulated_cost_offset[ wtoken->end_time]; 509 return 0; 510 } 511 else 512 { 513 *cost = MAXcostdata; 514 return 1; 515 } 516 } 517 518 int srec_print_results(multi_srec *recm, int max_choices) 519 { 520 char transcription[MAX_LEN]; 521 bigcostdata cost; 522 523 srec_get_top_choice_transcription(recm, transcription, MAX_LEN, 1); 524 srec_get_top_choice_score(recm, &cost, SCOREMODE_INCLUDE_SILENCE); 525 526 log_report("R: %8ld %8ld %s\t%.1f\n", 0, 0, transcription, cost); 527 528 return 0; 529 } 530 531 /* sort the word lattice at this frame, todo: remove rec argument */ 532 533 #define MAX_WTOKENS_AT_FRAME 64 /* +1 for the MAXwtokenID! */ 534 void sort_word_lattice_at_frame(srec* rec, frameID frame) 535 { 536 srec_word_lattice* wl = rec->word_lattice; 537 word_token *wtoken, *wtoken2; 538 wtokenID pwi[MAX_WTOKENS_AT_FRAME], token_index; 539 word_token* word_token_array = rec->word_token_array; 540 int i, j, npwi = 0; 541 ASSERT(rec->word_priority_q->max_in_q < MAX_WTOKENS_AT_FRAME); 542 543 ASSERT(frame < wl->max_frames); 544 if (wl->whether_sorted[frame]) 545 return; 546 547 wl->whether_sorted[frame] = 1; 548 549 /* make an array of word token index addresses */ 550 for (pwi[npwi] = wl->words_for_frame[frame]; pwi[npwi] != MAXwtokenID;) 551 { 552 ASSERT(npwi < MAX_WTOKENS_AT_FRAME); 553 token_index = pwi[npwi]; 554 wtoken = &word_token_array[ token_index]; 555 npwi++; 556 pwi[npwi] = wtoken->next_token_index; 557 } 558 559 /* sort the word token indices, bubble sort is fine */ 560 for (i = 0; i < npwi; i++) 561 { 562 for (j = 0; j < (npwi - 1); j++) 563 { 564 wtoken = &word_token_array[ pwi[j]]; 565 wtoken2 = &word_token_array[ pwi[j+1]]; 566 if (wtoken->cost > wtoken2->cost) 567 { 568 token_index = pwi[j]; 569 pwi[j] = pwi[j+1]; 570 pwi[j+1] = token_index; 571 } 572 } 573 } 574 575 /*print_word_token_list(rec,wl->words_for_frame[frame],"## BEFORE SORT\n");*/ 576 wl->words_for_frame[ frame] = pwi[0]; 577 for (i = 0; i < npwi; i++) 578 { 579 wtoken = &word_token_array[ pwi[i]]; 580 wtoken->next_token_index = pwi[i+1]; /* last points nowhwere */ 581 } 582 /*print_word_token_list(rec,wl->words_for_frame[frame],"## AFTER SORT\n");*/ 583 } 584 585 586 /* this frees a word token, it may still have references in the lattice though */ 587 588 void free_word_token(srec *rec, wtokenID old_token_index) 589 { 590 word_token* wtoken; 591 wtoken = &rec->word_token_array[old_token_index]; 592 wtoken->next_token_index = rec->word_token_freelist; 593 rec->word_token_freelist = old_token_index; 594 TRUE_KILL_WTOKEN(rec->word_token_array[rec->word_token_freelist]); 595 } 596 597 /* this frees some earlier allocated word_tokens from previous frames, 598 this makes sure we can always have some to spare for future frames */ 599 600 void free_word_token_from_lattice(srec *rec, wtokenID old_token_index) 601 { 602 word_token* wtoken; 603 wtokenID *rtoken_index; 604 word_token* rtoken; 605 606 #define CHECK_FREE_WORD_TOKEN 1 607 #if CHECK_FREE_WORD_TOKEN 608 stokenID stoken_index, i; 609 ftokenID ftoken_index; 610 fsmarc_token* stoken; 611 fsmnode_token* ftoken; 612 int nerrs = 0; 613 614 stoken_index = rec->active_fsmarc_tokens; 615 for (; stoken_index != MAXstokenID; stoken_index = stoken->next_token_index) 616 { 617 stoken = &rec->fsmarc_token_array[stoken_index]; 618 for (i = 0; i < stoken->num_hmm_states; i++) 619 { 620 if (stoken->word_backtrace[i] == old_token_index) 621 { 622 printf("Error: can't delete wtoken %d cuz stoken%d.%d cost %d\n", 623 old_token_index, stoken_index, i, stoken->cost[i]); 624 nerrs++; 625 } 626 } 627 } 628 629 ftoken_index = rec->active_fsmnode_tokens; 630 for (; ftoken_index != MAXftokenID; ftoken_index = ftoken->next_token_index) 631 { 632 ftoken = &rec->fsmnode_token_array[ftoken_index]; 633 if (ftoken->word_backtrace == old_token_index) 634 { 635 printf("Error: can't delete wtoken %d cuz ftoken %d cost %d\n", 636 old_token_index, ftoken_index, ftoken->cost); 637 nerrs++; 638 } 639 } 640 641 /* wtoken = &rec->word_token_array[old_token_index]; 642 for(ifr=wtoken->end_time+1; ifr>=0; ifr--) { 643 wtoken_index = rec->word_lattice->words_for_frame[ifr]; 644 for( ; wtoken_index!= MAXwtokenID; wtoken_index=wtoken->next_token_index) { 645 wtoken = &rec->word_token_array[wtoken_index]; 646 if(wtoken->backtrace == old_token_index) { 647 printf("Error: can't delete wtoken %d cuz wtoken %d at frame %d backtraces cost %d\n", 648 old_token_index, wtoken_index, ifr, wtoken->cost); 649 nerrs++; 650 } 651 } 652 } 653 */ 654 ASSERT(nerrs == 0); 655 if (nerrs > 0) 656 { 657 print_word_token(rec, old_token_index, "Error: while deleting "); 658 return; 659 } 660 #endif 661 662 wtoken = &rec->word_token_array[old_token_index]; 663 /* remove from word lattice */ 664 rtoken_index = &rec->word_lattice->words_for_frame[ wtoken->end_time+1]; 665 for (; (*rtoken_index) != MAXwtokenID; rtoken_index = &rtoken->next_token_index) 666 { 667 rtoken = &rec->word_token_array[(*rtoken_index)]; 668 if (*rtoken_index == old_token_index) 669 { 670 *rtoken_index = wtoken->next_token_index; 671 break; 672 } 673 } 674 wtoken->next_token_index = rec->word_token_freelist; 675 rec->word_token_freelist = old_token_index; 676 TRUE_KILL_WTOKEN(rec->word_token_array[rec->word_token_freelist]); 677 } 678 679 int reprune_word_tokens(srec* rec, costdata current_best_cost) 680 { 681 int i, keep_astar_prune; 682 arc_token* keep_arc_token_list; 683 684 stokenID stoken_index; 685 fsmarc_token* stoken; 686 wtokenID btindex; 687 word_token* bttoken; 688 wtokenID wtoken_index; 689 word_token* wtoken; 690 altword_token* awtoken; 691 692 /* remember things about the astar before changing it for local purposes */ 693 keep_astar_prune = rec->astar_stack->prune_delta; 694 /* rec->astar_stack->prune_delta = 400; */ 695 /* ignore the grammar constraints for this quick astar backward pass */ 696 keep_arc_token_list = rec->context->arc_token_list; 697 rec->context->arc_token_list = 0; 698 699 /* we will flag all wtokens to be kept */ 700 701 /* initialize the flags to keep all */ 702 memset(rec->word_token_array_flags, 0, sizeof(rec->word_token_array_flags[0])*rec->word_token_array_size); 703 704 /* flag all those tokens not active, ie already free */ 705 wtoken_index = rec->word_token_freelist; 706 for (; wtoken_index != MAXwtokenID; wtoken_index = wtoken->next_token_index) 707 { 708 wtoken = &rec->word_token_array[wtoken_index]; 709 rec->word_token_array_flags[wtoken_index]--; /* already deleted */ 710 } 711 712 /* flag along the best active state paths */ 713 stoken_index = rec->active_fsmarc_tokens; 714 for (; stoken_index != MAXstokenID; stoken_index = stoken->next_token_index) 715 { 716 stoken = &rec->fsmarc_token_array[ stoken_index]; 717 for (i = 0; i < stoken->num_hmm_states; i++) 718 { 719 btindex = stoken->word_backtrace[i]; 720 for (; btindex != MAXwtokenID; btindex = bttoken->backtrace) 721 { 722 bttoken = &rec->word_token_array[ btindex]; 723 ASSERT(rec->word_token_array_flags[ btindex] >= 0); 724 rec->word_token_array_flags[ btindex] = 1; 725 } 726 for (awtoken = stoken->aword_backtrace[i]; awtoken; 727 awtoken = awtoken->next_token) 728 { 729 btindex = awtoken->word_backtrace; 730 for (; btindex != MAXwtokenID; btindex = bttoken->backtrace) 731 { 732 bttoken = &rec->word_token_array[ btindex]; 733 rec->word_token_array_flags[ btindex] = 1; 734 } 735 } 736 } 737 } 738 739 /* run the astar and flag a little more */ 740 astar_stack_prepare_from_active_search(rec->astar_stack, 100, rec); 741 astar_stack_do_backwards_search(rec, 100); 742 astar_stack_flag_word_tokens_used(rec->astar_stack, rec); 743 astar_stack_clear(rec->astar_stack); 744 745 /* kill_word_tokens */ 746 for (i = 0; i < rec->word_token_array_size; i++) 747 { 748 if (rec->word_token_array_flags[i] == 0) /* < 0 are already free! */ 749 free_word_token_from_lattice(rec, (frameID)i); 750 } 751 752 /* set this back to a regular astar from remembered values */ 753 rec->context->arc_token_list = keep_arc_token_list; 754 rec->astar_stack->prune_delta = (costdata) keep_astar_prune; 755 756 SREC_STATS_INC_WTOKEN_REPRUNES(1); 757 return 0; 758 } 759 760