Home | History | Annotate | Download | only in crec
      1 /*---------------------------------------------------------------------------*
      2  *  srec.c  *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 #include <stdlib.h>
     21 #include <string.h>
     22 #include <math.h>
     23 
     24 #include "pstdio.h"
     25 #include "passert.h"
     26 #include "portable.h"
     27 
     28 #include "hmm_desc.h"
     29 #include "utteranc.h"
     30 #include "hmmlib.h"
     31 #include "srec_sizes.h"
     32 #include "search_network.h"
     33 #include "srec_context.h"
     34 #include "srec.h"
     35 #include "srec_stats.h"
     36 #include "srec_debug.h"
     37 #include "srec_tokens.h"
     38 #include "word_lattice.h"
     39 #include "swimodel.h"
     40 #if USE_COMP_STATS
     41 #include "comp_stats.h"
     42 #endif
     43 #include "c42mul.h"
     44 
     45 #ifdef SET_RCSID
     46 static const char *rcsid = 0 ? (const char *) &rcsid :
     47 "$Id: srec.c,v 1.39.4.31 2008/06/23 17:20:39 dahan Exp $";
     48 #endif
     49 
     50 #define SILENCE_MODEL_INDEX 0
     51 #define PRUNE_TIGHTEN 0.9     /*if we run out of room in the state arrays,
     52                                 keep multiplying pruning thresh by this amount
     53                                 until there is room */
     54 
     55 /*--------------------------------------------------------------------------*
     56  *                                                                          *
     57  * protos                                                                   *
     58  *                                                                          *
     59  *--------------------------------------------------------------------------*/
     60 static void reset_cost_offsets(multi_srec* rec, frameID current_search_frame,
     61                                costdata current_best_cost);
     62 static void update_internal_hmm_states(srec *rec, costdata *pcurrent_prune_delta,
     63                                        costdata *pcurrent_best_cost,
     64                                        costdata *precomputed_model_scores);
     65 
     66 /*--------------------------------------------------------------------------*
     67  *                                                                          *
     68  * utils                                                                    *
     69  *                                                                          *
     70  *--------------------------------------------------------------------------*/
     71 
     72 static void dump_core(char *msg)
     73 {
     74   PLogError ( msg );
     75   ASSERT(0);
     76 }
     77 
     78 static altword_token* reprune_altword_token_batch(srec* rec,
     79     altword_token* batch,
     80     bigcostdata costlimit)
     81 {
     82   altword_token *awtoken, *next_awtoken;
     83   altword_token **awtokenp;
     84 
     85   /* look for previous invalidate, see below */
     86   if (batch->costbasis == MAXcostdata / 2)
     87   { /* was costdelta // costlimit */
     88     free_altword_token(rec, batch);
     89     return AWTNULL;
     90   }
     91   /* a flag to check whether we already pruned this batch would be nice */
     92 
     93   /* first prune the CDR of the list, ie everything but the head */
     94   awtokenp = &batch->next_token;
     95   for (awtoken = batch->next_token; awtoken != AWTNULL; awtoken = next_awtoken)
     96   {
     97     next_awtoken = awtoken->next_token;
     98     if ((bigcostdata)batch->costbasis + awtoken->costdelta > costlimit)
     99     {
    100       (*awtokenp) = awtoken->next_token;
    101       awtoken->refcount = 1;             /* to make sure it frees */
    102       free_altword_token(rec, awtoken);
    103     }
    104     else
    105       awtokenp = &awtoken->next_token;
    106   }
    107 
    108   /* now see about the CAR of the list, ie the head */
    109   if ((bigcostdata)(batch->costbasis) + batch->costdelta < costlimit)
    110   {
    111     /* head of list survives pruning => no problem */
    112   }
    113   else if (batch->next_token != AWTNULL)
    114   {
    115     /* head of list does not survive => since we can't change the pointer
    116        we copy a survivor into it and free that original */
    117     awtoken = batch->next_token;
    118     batch->costdelta      = awtoken->costdelta;
    119     batch->word           = awtoken->word;
    120     batch->word_backtrace = awtoken->word_backtrace;
    121     /*ASSERT( batch->refcount == awtoken->refcount); */
    122     /* batch->refcount       = awtoken->refcount; */
    123     batch->next_token     = awtoken->next_token;
    124     awtoken->refcount = 1; /* to make sure it frees */
    125     free_altword_token(rec, awtoken);
    126   }
    127   else
    128   {
    129     /* head of list does not survive, nothing survives, so invalidate it */
    130     batch->costbasis = MAXcostdata / 2; /* was costdelta */
    131     free_altword_token(rec, batch);
    132     batch = AWTNULL;
    133   }
    134   return batch;
    135 }
    136 
    137 static void reprune_altword_tokens(srec* rec)
    138 {
    139   stokenID i, j;
    140   fsmarc_token* stoken;
    141   fsmnode_token* ftoken;
    142   bigcostdata current_prune_delta = rec->prune_delta;
    143   altword_token* awtoken;
    144 
    145   /* first clear the costbases */
    146   for (i = rec->active_fsmarc_tokens; i != MAXstokenID; i = stoken->next_token_index)
    147   {
    148     stoken = &rec->fsmarc_token_array[i];
    149     for (j = 0; j < stoken->num_hmm_states; j++)
    150       if ((awtoken = stoken->aword_backtrace[j]) != AWTNULL)
    151         awtoken->costbasis = MAXcostdata;
    152   }
    153   for (i = rec->active_fsmnode_tokens;i != MAXftokenID;i = ftoken->next_token_index)
    154   {
    155     ftoken = &rec->fsmnode_token_array[i];
    156     if ((awtoken = ftoken->aword_backtrace) != AWTNULL)
    157       awtoken->costbasis = MAXcostdata;
    158   }
    159   /* costbases for altword attached to stoken */
    160   for (i = rec->active_fsmarc_tokens; i != MAXstokenID; i = stoken->next_token_index)
    161   {
    162     stoken = &rec->fsmarc_token_array[i];
    163     for (j = 0; j < stoken->num_hmm_states; j++)
    164       if ((awtoken = stoken->aword_backtrace[j]) != AWTNULL)
    165         if (awtoken->costbasis > stoken->cost[j])
    166           awtoken->costbasis = stoken->cost[j];
    167   }
    168   /* costbases for altword attached to ftokens */
    169   for (i = rec->active_fsmnode_tokens;i != MAXftokenID;i = ftoken->next_token_index)
    170   {
    171     ftoken = &rec->fsmnode_token_array[i];
    172     if ((awtoken = ftoken->aword_backtrace) != AWTNULL)
    173       if (awtoken->costbasis > ftoken->cost)
    174         awtoken->costbasis = ftoken->cost;
    175   }
    176 
    177   /* now reprune */
    178   while (rec->altword_token_freelist_len < rec->altword_token_array_size / 4
    179          || rec->altword_token_freelist_len < 2*rec->word_priority_q->max_in_q)
    180   {
    181     SREC_STATS_INC_AWTOKEN_REPRUNES(1);
    182     current_prune_delta = (costdata)(PRUNE_TIGHTEN * PRUNE_TIGHTEN * current_prune_delta);
    183     for (i = rec->active_fsmarc_tokens; i != MAXstokenID; i = stoken->next_token_index)
    184     {
    185       stoken = &rec->fsmarc_token_array[i];
    186       for (j = 0; j < stoken->num_hmm_states; j++)
    187       {
    188         if (stoken->aword_backtrace[j] != AWTNULL)
    189         {
    190           stoken->aword_backtrace[j] =
    191             reprune_altword_token_batch(rec, stoken->aword_backtrace[j],
    192                                         current_prune_delta);
    193         }
    194       }
    195     }
    196     for (i = rec->active_fsmnode_tokens;i != MAXftokenID;i = ftoken->next_token_index)
    197     {
    198       ftoken = &rec->fsmnode_token_array[i];
    199       if (ftoken->aword_backtrace != AWTNULL)
    200       {
    201         ftoken->aword_backtrace =
    202           reprune_altword_token_batch(rec, ftoken->aword_backtrace,
    203                                       current_prune_delta);
    204       }
    205     }
    206     ASSERT(current_prune_delta > 0);
    207   }
    208 }
    209 
    210 #define refcopy_altwords(rEc, aWtOkEn) (aWtOkEn?(aWtOkEn->refcount++,aWtOkEn):aWtOkEn)
    211 
    212 static altword_token* copy_altwords(srec* rec, altword_token* list1, costdata delta)
    213 {
    214   altword_token *q2, dummy, *last_q2 = &dummy;
    215   costdata q2_costdelta;
    216 
    217   /* first check for space */
    218 #if BUILD & BUILD_DEBUG
    219   int num = 0;
    220 
    221   for (num = 0, q2 = list1; q2 != AWTNULL; q2 = q2->next_token)
    222     num++;
    223   if (num > rec->altword_token_freelist_len)
    224   {
    225     printf("warning: mid-copy reprune_altword_tokens()\n");
    226     ASSERT(0);
    227     reprune_altword_tokens(rec);
    228   }
    229 #endif
    230 
    231   /* now do the copy */
    232   for (; list1 != AWTNULL; list1 = list1->next_token)
    233   {
    234     ASSERT(list1->refcount >= 1);
    235 
    236     q2_costdelta = list1->costdelta + delta;
    237     ASSERT(list1->costdelta != MAXcostdata);
    238     if (q2_costdelta > rec->prune_delta)
    239       continue;
    240     q2 = get_free_altword_token(rec, NULL_IF_NO_TOKENS);
    241     if (!q2) /* this should never happen */
    242       break;
    243     last_q2->next_token = q2;
    244     q2->costdelta      = q2_costdelta;
    245     q2->word           = list1->word;
    246     q2->word_backtrace = list1->word_backtrace;
    247     last_q2 = q2;
    248   }
    249   last_q2->next_token = AWTNULL;
    250   return dummy.next_token;
    251 }
    252 
    253 #if 1
    254 /* sizewise_altwords just makes sure the list of altwords is no longer than
    255    the number of possible word ends.  Any tokens beyond that length will get
    256    ignored later anyways, so we may as well kill them here.
    257    This also is related to the anticipated repruning.  This code currently
    258    makes use of calloc/free and qsort, but can easily be rewritten to just
    259    to a linear in-place sort, linear looking for the 10 best score should not
    260    take too long. This is on the todo list!
    261 */
    262 int altword_token_ptr_cmp(const void* a, const void* b)
    263 {
    264   const altword_token** A = (const altword_token**)a, **B = (const altword_token**)b;
    265   if ((*A)->costdelta > (*B)->costdelta) return 1;
    266   else if ((*A)->costdelta < (*B)->costdelta) return -1;
    267   else return 0;
    268 }
    269 static altword_token* sizewise_altwords(srec* rec, altword_token* awtoken_head)
    270 {
    271 #define SIZEWISE_THRESH (rec->word_priority_q->max_in_q)
    272 #define SIZEWISE_TARGET (rec->word_priority_q->max_in_q*4/5)
    273   int i, num;
    274   altword_token *awtoken, **list;
    275 
    276   if( SIZEWISE_TARGET == 0) {
    277 	  free_altword_token(rec, awtoken_head);
    278 	  return NULL;
    279   }
    280   num = count_altword_token(rec, awtoken_head);
    281   /* if the linked list is shorter than max_in_q we're fine */
    282   if (num <= SIZEWISE_THRESH)
    283     return awtoken_head;
    284   else
    285   {
    286     list = (altword_token**)CALLOC(num, sizeof(altword_token*), L("search.srec.altword_tokens"));
    287     ASSERT(awtoken_head->refcount == 1);
    288     for (i = 0, awtoken = awtoken_head; i < num; i++, awtoken = awtoken->next_token)
    289       list[i] = awtoken;
    290     qsort(list, num, sizeof(altword_token*), altword_token_ptr_cmp);
    291     for (i = 0; i < SIZEWISE_TARGET; i++)
    292       list[i]->next_token = list[i+1];
    293     if(i>0) list[i-1]->next_token = AWTNULL;
    294     for (; i < num; i++)
    295     {
    296       list[i]->refcount = 1; /* make sure it frees */
    297       free_altword_token(rec, list[i]);
    298     }
    299     awtoken_head = list[0];
    300     awtoken_head->refcount = 1;
    301     FREE(list);
    302     return awtoken_head;
    303   }
    304 }
    305 #else
    306 #define sizewise_altwords(ReC,hEad) hEad
    307 #endif
    308 
    309 /*--------------------------------------------------------------------------*
    310  *                                                                          *
    311  * acoustic scoring utils                                                   *
    312  *                                                                          *
    313  *--------------------------------------------------------------------------*/
    314 
    315 #define DO_COMPUTE_MODEL     0
    316 #define DO_NOT_COMPUTE_MODEL MAXcostdata
    317 
    318 static asr_uint16_t best_uint16(asr_uint16_t* p, int n)
    319 {
    320   asr_uint16_t rv = p[0];
    321   for (;--n > 0;p++) if (rv > *p) rv = *p;
    322   return rv;
    323 }
    324 static int compute_model_scores(costdata *current_model_scores, const SWIModel *acoustic_models,
    325                                 pattern_info *pattern, frameID current_search_frame)
    326 {
    327   int i;
    328   int num_models_computed = 0;
    329 
    330   for (i = 0; i < acoustic_models->num_hmmstates; i++)
    331   {
    332     if (current_model_scores[i] == DO_COMPUTE_MODEL)
    333     {
    334       scodata score = mixture_diagonal_gaussian_swimodel(pattern->prep,
    335               &acoustic_models->hmmstates[i], acoustic_models->num_dims);
    336       ASSERT(score <= 0 && "model score out of range");
    337 
    338       current_model_scores[i] = (costdata) - score;
    339       num_models_computed++;
    340     }
    341   }
    342   return num_models_computed;
    343 }
    344 
    345 
    346 
    347 /*precompute all needed models to be used by next frame of search*/
    348 
    349 static int find_which_models_to_compute(srec *rec, const SWIModel *acoustic_models)
    350 {
    351   int i;
    352   modelID model_index;
    353   stokenID current_token_index;
    354   ftokenID current_ftoken_index;
    355   fsmarc_token *current_token;
    356   fsmnode_token *current_ftoken;
    357   costdata *current_model_scores;
    358   /* arcID arc_index; */
    359   arcID fsm_arc_index;
    360   HMMInfo* hmm_info;
    361   FSMnode* fsm_node;
    362   FSMarc* fsm_arc;
    363   /*use the current_model_scores array both to tell the model computing stuff
    364     what models to compute and to get the scores back.  This is a bit ugly, but
    365     saves having another array to allocate*/
    366 
    367   /* this belongs elsewhere at initialization,
    368      eg. where we'll associate search to acoustic models
    369   */
    370   rec->avg_state_durations = acoustic_models->avg_state_durations;
    371 
    372   current_model_scores = rec->current_model_scores;
    373 
    374   for (model_index = 0; model_index < acoustic_models->num_hmmstates; model_index++)
    375   {
    376     current_model_scores[model_index] = DO_NOT_COMPUTE_MODEL;
    377   }
    378 
    379   current_token_index = rec->active_fsmarc_tokens;
    380 
    381   while (current_token_index != MAXstokenID)
    382   {
    383     current_token = &(rec->fsmarc_token_array[current_token_index]);
    384     /*need to compute all models needed within this HMM*/
    385     fsm_arc = &rec->context->FSMarc_list[ current_token->FSMarc_index];
    386     hmm_info = &rec->context->hmm_info_for_ilabel[fsm_arc->ilabel];
    387 
    388     /*handle all states that are alive in this hmm*/
    389     for (i = 0;i < hmm_info->num_states;i++)
    390     {
    391       if ((current_token->cost[i] != MAXcostdata) ||
    392           ((i > 0) && current_token->cost[i-1] != MAXcostdata))
    393       {
    394         model_index = hmm_info->state_indices[i];
    395         current_model_scores[model_index] = DO_COMPUTE_MODEL;
    396       }
    397     }
    398     current_token_index = current_token->next_token_index;
    399   }
    400 
    401   /*for each active FSM node, find models which can come from node*/
    402 
    403   current_ftoken_index = rec->active_fsmnode_tokens;
    404 
    405   while (current_ftoken_index != MAXftokenID)
    406   {
    407     current_ftoken = &(rec->fsmnode_token_array[current_ftoken_index]);
    408     fsm_node = &rec->context->FSMnode_list[current_ftoken->FSMnode_index];
    409     fsm_arc = NULL;
    410     for (fsm_arc_index = fsm_node->un_ptr.first_next_arc; fsm_arc_index != MAXarcID;
    411         fsm_arc_index = fsm_arc->linkl_next_arc) {
    412       fsm_arc = rec->context->FSMarc_list+fsm_arc_index;
    413 
    414       if (fsm_arc->ilabel != MAXlabelID)
    415       {
    416         hmm_info = &rec->context->hmm_info_for_ilabel[fsm_arc->ilabel];
    417         if (hmm_info->num_states > 0)
    418         {
    419 
    420           /* we should build in here a check that this arc has reasonable weight */
    421           /* if(fsm_arc->cost < rec->prune_delta)  */
    422           current_model_scores[hmm_info->state_indices[0]] = DO_COMPUTE_MODEL;
    423         }
    424       }
    425     }
    426     current_ftoken_index = current_ftoken->next_token_index;
    427   }
    428 
    429   /*compute the scores in a batch - this allows the model computing code to
    430     chunk it up however it wants*/
    431   return 0;
    432 }
    433 
    434 /*--------------------------------------------------------------------------*
    435  *                                                                          *
    436  * pruning utils                                                            *
    437  *                                                                          *
    438  *--------------------------------------------------------------------------*/
    439 
    440 /*this is called at the end of the search*/
    441 static int prune_new_tokens(srec *rec, costdata current_prune_thresh)
    442 {
    443   int i;
    444   nodeID num_deleted;
    445   stokenID token_index;
    446   fsmarc_token *token;
    447   stokenID next_token_index;
    448   stokenID *list_head_pointer;
    449   char any_alive;
    450 
    451   num_deleted = 0;
    452   list_head_pointer = &(rec->active_fsmarc_tokens);
    453 
    454   for (token_index = rec->active_fsmarc_tokens; token_index != MAXstokenID;
    455        token_index = next_token_index)
    456   {
    457 
    458     token = &(rec->fsmarc_token_array[token_index]);
    459     next_token_index = token->next_token_index;
    460 
    461     any_alive = 0;
    462 
    463     for (i = 0;i < token->num_hmm_states;i++)
    464     {
    465       if (token->cost[i] < current_prune_thresh)
    466       {
    467         any_alive = 1;
    468       }
    469     }
    470 
    471     if (!any_alive)
    472     { /*everything pruned so recylce the token*/
    473       *list_head_pointer = next_token_index;
    474 
    475       rec->best_token_for_arc[rec->fsmarc_token_array[token_index].FSMarc_index] = MAXstokenID;
    476 
    477       free_fsmarc_token(rec, token_index);
    478 
    479       num_deleted++;
    480       rec->num_new_states--;
    481     }
    482     else
    483     {
    484       list_head_pointer = &token->next_token_index;
    485     }
    486   }
    487   return num_deleted;
    488 }
    489 
    490 static void reprune_word_tokens_if_necessary(srec *rec)
    491 {
    492   word_token* wtoken;
    493   wtokenID wtoken_index = rec->word_token_freelist;
    494   wtokenID num_free_wtokens = 0;
    495 
    496   for (; wtoken_index != MAXwtokenID; wtoken_index = wtoken->next_token_index)
    497   {
    498     wtoken = &rec->word_token_array[ wtoken_index];
    499     num_free_wtokens++;
    500   }
    501   if (num_free_wtokens < 2*rec->word_priority_q->max_in_q)
    502     reprune_word_tokens(rec, 0);
    503 }
    504 
    505 /*this is called when we run out of room in the state token arrays and need to make more room -
    506  it only prunes in theh new time slice - we could also look at pruning the previous slice if needed*/
    507 
    508 
    509 static costdata reprune_new_states(srec *rec, costdata current_best_cost, costdata current_prune_delta)
    510 {
    511   int num_deleted;
    512   /*first check to see if current pruning thresholds make enough room
    513     (because of better max)*/
    514 
    515   prune_new_tokens(rec, current_best_cost + current_prune_delta);
    516 
    517   ASSERT(((float)current_best_cost) + current_prune_delta < (float)SHRT_MAX);
    518 
    519   while ((rec->num_new_states >= rec->max_new_states - 1)
    520          || rec->fsmarc_token_freelist == MAXstokenID)
    521   {
    522 
    523     SREC_STATS_INC_STOKEN_REPRUNES(1);
    524 
    525     current_prune_delta = (costdata)(PRUNE_TIGHTEN * current_prune_delta);
    526 
    527     if (current_prune_delta <= 1)
    528     {
    529       /*this seems like an unlikely case, but if we can't prune enough to make room, give up
    530       on the utterance (better than being stuck here forever!)*/
    531 
    532       /*FIX replace with crec abort mechanism*/
    533       PLogError ("reprune_new_states: can't seem to prune enough - best cost %d num_new_states %d\n",
    534              current_best_cost, rec->num_new_states);
    535 
    536       print_fsmarc_token_list(rec, rec->active_fsmarc_tokens, "CANNOT PRUNE");
    537 
    538       dump_core("reprune died\n");
    539     }
    540 
    541     num_deleted = prune_new_tokens(rec, current_best_cost + current_prune_delta);
    542 
    543     ASSERT(((float)current_best_cost + current_prune_delta) < (float)USHRT_MAX);
    544   }
    545   return current_prune_delta;
    546 }
    547 
    548 
    549 
    550 static void prune_fsmnode_tokens(srec *rec, costdata current_prune_thresh, ftokenID not_this_one)
    551 {
    552   nodeID num_deleted;
    553   ftokenID token_index;
    554   fsmnode_token *token;
    555   ftokenID next_token_index;
    556   ftokenID *list_head_pointer;
    557 
    558   num_deleted = 0;
    559   token_index = rec->active_fsmnode_tokens;
    560 
    561   token = &(rec->fsmnode_token_array[token_index]);
    562   list_head_pointer = &(rec->active_fsmnode_tokens);
    563 
    564   while (token_index != MAXftokenID)
    565   {
    566     next_token_index = token->next_token_index;
    567     if( token_index!=not_this_one && token->cost >= current_prune_thresh)
    568     {
    569       /*pruned so recycle the token*/
    570       *list_head_pointer = next_token_index;
    571       rec->best_token_for_node[token->FSMnode_index] = MAXftokenID;
    572       free_fsmnode_token(rec, token_index);
    573       num_deleted++;
    574     }
    575     else
    576     {
    577       list_head_pointer = &token->next_token_index;
    578     }
    579     token_index = next_token_index;
    580     token = &(rec->fsmnode_token_array[token_index]);
    581   }
    582 }
    583 
    584 
    585 /*this is called when we run out of room in the state token arrays and need to make more room -
    586  it only prunes in theh new time slice - we could also look at pruning the previous slice if needed*/
    587 
    588 
    589 static costdata reprune_fsmnode_tokens(srec *rec, costdata current_best_cost, costdata current_prune_delta,
    590                                        ftokenID not_this_one)
    591 {
    592 
    593   /*first check to see if current pruning thresholds make enough
    594     room (because of better max)*/
    595 
    596   prune_fsmnode_tokens(rec, current_best_cost+current_prune_delta, not_this_one);
    597 
    598   ASSERT((float)current_best_cost + (float)current_prune_delta < (float)SHRT_MAX);
    599 
    600   while (rec->fsmnode_token_freelist == MAXftokenID)
    601   {
    602     SREC_STATS_INC_FTOKEN_REPRUNES(1);
    603 
    604     current_prune_delta = (costdata)(PRUNE_TIGHTEN * current_prune_delta);
    605 
    606     if (current_prune_delta <= 1)
    607     {
    608       /*this seems like an unlikely case, but if we can't prune enough to make room, give up
    609       on the utterance (better than being stuck here forever!)*/
    610 
    611       /*FIX replace with crec abort mechanism*/
    612       PLogError ("reprune_fsmnode_tokens: can't seem to prune enough - best cost %d\n",
    613              current_best_cost);
    614 
    615       print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "CANNOT PRUNE: ");
    616 
    617       dump_core("reprune_fsmnode_tokens() died\n");
    618     }
    619 
    620     prune_fsmnode_tokens(rec, current_best_cost+current_prune_delta, not_this_one);
    621     ASSERT((float)current_best_cost + (float)current_prune_delta < (float)USHRT_MAX);
    622   }
    623 
    624   return current_prune_delta;
    625 }
    626 
    627 
    628 void reset_best_cost_to_zero(srec* rec, costdata current_best_cost)
    629 {
    630   fsmarc_token* stoken;
    631   stokenID token_index;
    632   short i, num_states;
    633 
    634 
    635   /*do the state tokens*/
    636   for (token_index = rec->active_fsmarc_tokens;
    637        token_index != MAXstokenID;
    638        token_index = stoken->next_token_index)
    639   {
    640 
    641     stoken = &rec->fsmarc_token_array[ token_index];
    642     num_states = stoken->num_hmm_states;
    643     for (i = 0; i < num_states; i++)
    644     {
    645       if (stoken->cost[i] < MAXcostdata)
    646       {
    647         ASSERT(stoken->cost[i]  >= current_best_cost);
    648         stoken->cost[i] = (costdata)(stoken->cost[i] - (costdata) current_best_cost);
    649       }
    650     }
    651   }
    652 }
    653 
    654 static void reset_cost_offsets(multi_srec* rec, frameID current_frame,
    655                                costdata current_best_cost)
    656 {
    657   rec->cost_offset_for_frame[current_frame] = current_best_cost;
    658   if (current_frame == 0)
    659     rec->accumulated_cost_offset[current_frame] = current_best_cost;
    660   else
    661     rec->accumulated_cost_offset[current_frame] = rec->accumulated_cost_offset[current_frame-1] + current_best_cost;
    662 }
    663 
    664 
    665 /*--------------------------------------------------------------------------*
    666  *                                                                          *
    667  * word_token functions                                                     *
    668  *                                                                          *
    669  *--------------------------------------------------------------------------*/
    670 
    671 /*this function allocates a new word token and remembers it in a list in the srec structure (to be used for backtrace later on*/
    672 
    673 static wtokenID create_word_token(srec *rec)
    674 {
    675   wtokenID word_token_index;
    676   word_token *word_token;
    677 
    678   word_token_index = get_free_word_token(rec, NULL_IF_NO_TOKENS);
    679 
    680   if (0 && word_token_index == MAXwtokenID)
    681   {
    682     /*FIX - use crec error handling*/
    683     dump_core("create_word_token: cannot allocate word token - we need"
    684               " to figure out a word pruning strategy when this happens!\n");
    685   }
    686 
    687   word_token = &(rec->word_token_array[word_token_index]);
    688 
    689   return word_token_index;
    690 }
    691 
    692 /* gets rid of fsmnode which trace back to this word since
    693    the word is not goingto make it ono the word lattice */
    694 
    695 static int block_fsmnodes_per_backtrace(srec *rec, wtokenID wtoken_id)
    696 {
    697   int num_ftokens_blocked = 0;
    698   ftokenID current_token_index = rec->active_fsmnode_tokens;
    699   while (current_token_index != MAXftokenID) {
    700     fsmnode_token *token = &(rec->fsmnode_token_array[current_token_index]);
    701 	if (token->word_backtrace == wtoken_id) {
    702       num_ftokens_blocked++;
    703       token->cost = MAXcostdata;
    704 	}
    705 	current_token_index = token->next_token_index;
    706   }
    707   return num_ftokens_blocked;
    708 }
    709 
    710 /* processing a word boundary,
    711    current_token is the fsmnode_token to the left of the boundary
    712    cost is the cost through this frame
    713    pcurrent_word_threshold is the worst score for words on the prio.q
    714    returns the word_token index just created
    715 */
    716 static wtokenID srec_process_word_boundary_nbest(srec* rec,
    717     nodeID end_node,
    718     wordID word,
    719     wtokenID word_backtrace,
    720     costdata cost,
    721     costdata* pcurrent_word_threshold,
    722     int *any_nodes_blocked)
    723 {
    724   wtokenID wtoken_index;
    725   wtokenID return_wtoken_index;
    726   wtokenID token_id_to_remove;
    727 
    728   word_token* wtoken;
    729 
    730   if (word_backtrace != MAXwtokenID)
    731   {
    732     word_token* btoken = &rec->word_token_array[word_backtrace];
    733     if (btoken->end_time >= rec->current_search_frame)
    734     {
    735       SREC_STATS_INC_BAD_BACKTRACES();
    736       return MAXwtokenID;
    737     }
    738   }
    739   /*make new word token*/
    740   wtoken_index = create_word_token(rec);
    741   //ASSERT(wtoken_index != MAXwtokenID);
    742   if (wtoken_index == MAXwtokenID)
    743   {
    744     /* we could have called reprune_word_tokens() here, but we instead called it
    745        at the beginning of do_epsilon_updates() to avoid complications of
    746        re-pruning word tokens too deep in the search update */
    747     return_wtoken_index = MAXwtokenID;
    748     return return_wtoken_index;
    749   }
    750 
    751   wtoken = &(rec->word_token_array[wtoken_index]);
    752   wtoken->word = word;
    753   wtoken->_word_end_time = 0; // new
    754   wtoken->end_time = rec->current_search_frame;
    755   wtoken->end_node = end_node;
    756   wtoken->backtrace = word_backtrace;
    757   wtoken->cost = cost;
    758 
    759   token_id_to_remove = add_word_token_to_priority_q(rec->word_priority_q, wtoken_index, rec->word_token_array);
    760 
    761   if (token_id_to_remove == wtoken_index)
    762     return_wtoken_index = MAXwtokenID;
    763   else
    764   {
    765     /* the word was truly added to the priority q, so we must
    766        get the new worst word on that list */
    767     *pcurrent_word_threshold = get_priority_q_threshold(rec->word_priority_q, rec->word_token_array);
    768     return_wtoken_index = wtoken_index;
    769   }
    770 
    771   if (token_id_to_remove != MAXwtokenID)
    772   {
    773     /* Jean: must allow for this word_token to be recycled */
    774 
    775     /* ok, the word won't we maintained, so there's no point to
    776        continue to search this path (as headed by this fsmarc_token) */
    777 
    778       *any_nodes_blocked += block_fsmnodes_per_backtrace( rec, token_id_to_remove);
    779 
    780     /* we killed the fsmnode token associated with the word being removed.
    781        But, we didn't kill it's word backtrace, so there may be word tokens
    782        in the backtrace which cannot connect.  But we can't really kill
    783        the whole backtrace since it might be shared with other
    784        active states, Mike's idea is to add a counter on the
    785        word_tokens, which counts how many active paths are using
    786        this word_token ... TODO */
    787 
    788     /* print_word_token(rec, token_id_to_remove, "srec_process_word_boundary killed wtoken "); */
    789     free_word_token(rec, token_id_to_remove);
    790 
    791   }
    792   return return_wtoken_index;
    793 }
    794 
    795 ftokenID* srec_get_parent_for_active_fsmnode(srec* rec, ftokenID ftoken_index)
    796 {
    797   ftokenID* parent_ftoken_index = &rec->active_fsmnode_tokens;
    798   while( (*parent_ftoken_index) != MAXftokenID) {
    799     fsmnode_token* parent_ftoken = &rec->fsmnode_token_array[ *parent_ftoken_index];
    800     if( *parent_ftoken_index == ftoken_index)
    801 		return parent_ftoken_index;
    802     parent_ftoken_index = &parent_ftoken->next_token_index;
    803   }
    804   return NULL;
    805 }
    806 
    807 /*---------------------------------------------------------------------------*
    808  *                                                                           *
    809  * updates                                                                   *
    810  *                                                                           *
    811  *---------------------------------------------------------------------------*/
    812 
    813 
    814 /*handles epsilon transitions (used for word boundaries).  Epsilons come from active
    815   fsmnode tokens and maximize into new FSMnode tokens (finds the best path to an FSM
    816   node).
    817 
    818   When we hit an
    819   epsilon, create a word token, put it in the path, and remember it in a
    820   list of all word tokens*/
    821 
    822 static int do_epsilon_updates(srec *rec, costdata prune_delta,
    823                               costdata best_cost)
    824 {
    825   fsmnode_token *new_ftoken;
    826   fsmnode_token *current_ftoken;
    827   wtokenID wtoken_index;
    828   FSMnode* fsm_node;
    829   FSMarc* fsm_arc;
    830   costdata cost, cost_with_wtw;
    831   ftokenID new_ftoken_index;
    832   ftokenID current_ftoken_index;
    833   costdata current_word_threshold;
    834   arcID fsm_arc_index;
    835   wordID word_with_wtw;
    836   int num_fsm_nodes_updated=0, num_fsm_nodes_blocked, num_fsm_nodes_blocked2;
    837   int num_wtokens_maybe_homonyms;
    838   costdata current_prune_delta;
    839   costdata current_prune_thresh;
    840   altword_token* awtoken;
    841 
    842 
    843   // printf("FRAME %d\n", rec->current_search_frame);
    844   // print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "BEFORE UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n");
    845 
    846   current_word_threshold = MAXcostdata;
    847   current_prune_delta = prune_delta;
    848   current_prune_thresh = best_cost + current_prune_delta;
    849 
    850   current_ftoken_index = rec->active_fsmnode_tokens;
    851   while (current_ftoken_index != MAXftokenID)
    852   {
    853 	//  print_fsmnode_token(rec, current_ftoken_index, "processing ... ");
    854     current_ftoken = &(rec->fsmnode_token_array[current_ftoken_index]);
    855 
    856     cost = current_ftoken->cost; /*get last state of token*/
    857     fsm_node = &rec->context->FSMnode_list[current_ftoken->FSMnode_index];
    858     fsm_arc = NULL;
    859 
    860     /* Jean: see below too, let's remember the wtoken_index we create in
    861        case we need to re-use it.  All N epsilon updates, and all M
    862        M outgoing arcs can share, cuz this is the same arriving arc@frame */
    863 
    864     wtoken_index = MAXwtokenID;
    865 
    866     if( cost >= current_prune_thresh) {
    867       ftokenID* parent_ftoken_index;
    868       // the srec_get_parent_for_active_fsmnode() functions can be
    869       // gotten rid of if we use a doubly-linked list (fwd/bwd ptrs)
    870       parent_ftoken_index = srec_get_parent_for_active_fsmnode( rec, current_ftoken_index);
    871       if(!parent_ftoken_index) {
    872 	PLogError( ("Error: internal search error near %s %d, finding %d\n"), __FILE__, __LINE__, current_ftoken_index);
    873 	print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "in DO_UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n");
    874 	exit(1);
    875       }
    876       *parent_ftoken_index = current_ftoken->next_token_index;
    877       // effectively release this fsmnode_token and go to next one
    878       rec->best_token_for_node[ current_ftoken->FSMnode_index] = MAXftokenID;
    879       free_fsmnode_token( rec, current_ftoken_index);
    880       current_ftoken_index = *parent_ftoken_index;
    881       continue;
    882     }
    883 
    884     num_fsm_nodes_updated++;
    885     num_fsm_nodes_blocked = 0;
    886     num_wtokens_maybe_homonyms = 0;
    887     for (fsm_arc_index = fsm_node->un_ptr.first_next_arc;
    888 	 fsm_arc_index != MAXarcID;
    889 	 fsm_arc_index = fsm_arc->linkl_next_arc)
    890       {
    891         fsm_arc = &rec->context->FSMarc_list[ fsm_arc_index];
    892 
    893         /* consider only epsilon transitions */
    894         if (fsm_arc->ilabel >= EPSILON_OFFSET)
    895           continue;
    896 
    897         /* can't loop to yourself on epsilon! */
    898         ASSERT(fsm_arc->to_node != current_ftoken->FSMnode_index);
    899 
    900         cost_with_wtw = current_ftoken->cost + fsm_arc->cost; /* WTW */
    901         word_with_wtw = current_ftoken->word;
    902         if(fsm_arc->olabel != WORD_EPSILON_LABEL)
    903           word_with_wtw = fsm_arc->olabel ;
    904 
    905         // we should compare cost_with_wtw but let's let the priority_q
    906 	// do the pruning
    907 	if (cost>=current_prune_thresh || fsm_arc->cost>=current_prune_thresh)
    908 	  continue;
    909 
    910         /*if word boundary, see if it crosses the word end threshold*/
    911         /* no room on the word priority_q, so not worth pursuing */
    912         if (fsm_arc->ilabel == WORD_BOUNDARY && cost_with_wtw >= current_word_threshold) {
    913           continue; // goto NEXT_FTOKEN;
    914         }
    915 
    916 		new_ftoken = NULL;
    917         new_ftoken_index = rec->best_token_for_node[ fsm_arc->to_node];
    918         if(new_ftoken_index != MAXftokenID)
    919           new_ftoken = &rec->fsmnode_token_array[ new_ftoken_index];
    920         if( new_ftoken && (current_ftoken->cost+fsm_arc->cost)<new_ftoken->cost) {
    921           /* clobber it */
    922         } else if(new_ftoken) {
    923           /* merge it */
    924         } else if(rec->fsmnode_token_freelist == MAXftokenID) {
    925 	        /* create it? maybe  */
    926 			current_prune_delta = reprune_fsmnode_tokens(rec, best_cost, current_prune_delta, current_ftoken_index);
    927 		    current_prune_thresh = best_cost + current_prune_delta;
    928         }
    929 
    930         if (fsm_arc->ilabel == WORD_BOUNDARY) {
    931           /* 20030920, for sure the backtrace will change! */
    932           // token->word_backtrace = MAXwtokenID;
    933 
    934           wtoken_index = srec_process_word_boundary_nbest(rec,
    935 						 current_ftoken->FSMnode_index,
    936                                                  word_with_wtw,
    937                                                  current_ftoken->word_backtrace,
    938                                                  cost_with_wtw,
    939                                                  &current_word_threshold,
    940                                                  &num_fsm_nodes_blocked);
    941 		  if (wtoken_index != MAXwtokenID) {
    942             WORD_TOKEN_SET_WD_ETIME( (rec->word_token_array+wtoken_index),
    943               rec->word_token_array[wtoken_index].end_time - current_ftoken->silence_duration);
    944 		  }
    945 		  if( fsm_arc->olabel!=WORD_EPSILON_LABEL && wtoken_index != MAXwtokenID) {
    946 			  num_wtokens_maybe_homonyms++;
    947 			  if( num_wtokens_maybe_homonyms>1)
    948 				WORD_TOKEN_SET_HOMONYM( (rec->word_token_array+wtoken_index), 1);
    949 		  }
    950           /* now drop alternative words, note the use of current_token
    951              because token is on the other side already */
    952           if (current_ftoken->aword_backtrace != AWTNULL) {
    953             awtoken = current_ftoken->aword_backtrace;
    954             for (; awtoken != AWTNULL; awtoken = awtoken->next_token) {
    955               wtokenID wti;
    956               wti = srec_process_word_boundary_nbest(rec,
    957 						     current_ftoken->FSMnode_index,
    958                                                      awtoken->word,
    959                                                      awtoken->word_backtrace,
    960                                                      cost_with_wtw + awtoken->costdelta,
    961                                                      &current_word_threshold,
    962                                                      &num_fsm_nodes_blocked2);
    963             }
    964             /* if we don't free the altwords here, i had thought that
    965                updates from stateN would make the altwords grow and grow,
    966                but by that time all the fsmnodes are brand new */
    967             /* leaving them alive allows them to propagate to state0 thru
    968                other epsilons (eg non .wb) to new nodes but we don't
    969                use such arcs.
    970                the .wb would not get dropped again 'cuz we check
    971                for that in wtoken_index above.
    972                this is quite complex and the case for dropping word_tokens
    973                from the node AFTER the .wb can be made
    974                ie. would need a re-write of do_epsilon_updates() */
    975 			if( current_ftoken->aword_backtrace != AWTNULL)
    976               free_altword_token_batch(rec, current_ftoken->aword_backtrace);
    977             current_ftoken->aword_backtrace = AWTNULL;
    978             /*print_fsmnode_token(rec, token-rec->fsmnode_token_array, "123a");*/
    979           }
    980 
    981           if( wtoken_index != MAXwtokenID) {
    982 
    983             if( new_ftoken == NULL) {
    984               /* create token for the other side */
    985               new_ftoken_index = get_free_fsmnode_token(rec, NULL_IF_NO_TOKENS);
    986 				  // this should not happen because of the above check near
    987 				  // fsmnode_token_freelist == MAXftokenID
    988               ASSERT(new_ftoken_index != MAXftokenID);
    989               new_ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
    990               new_ftoken->word_backtrace = wtoken_index;
    991               new_ftoken->cost = cost_with_wtw;
    992               new_ftoken->word = WORD_EPSILON_LABEL;
    993               new_ftoken->FSMnode_index = fsm_arc->to_node;
    994               new_ftoken->aword_backtrace = AWTNULL;
    995               new_ftoken->next_token_index = current_ftoken->next_token_index;
    996               current_ftoken->next_token_index = new_ftoken_index;
    997               rec->best_token_for_node[fsm_arc->to_node] = new_ftoken_index;
    998             } else if(new_ftoken && cost_with_wtw<new_ftoken->cost) {
    999               /* update token on the other side */
   1000               ftokenID *parent_ftoken_index;
   1001               new_ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
   1002               new_ftoken->cost = cost_with_wtw;
   1003               new_ftoken->word_backtrace = wtoken_index;
   1004               new_ftoken->word = WORD_EPSILON_LABEL;
   1005               // unchanged token->FSMnode_index = fsm_arc->to_node;
   1006               // because this token was updated, we need to reprocess it, right after
   1007               parent_ftoken_index = srec_get_parent_for_active_fsmnode( rec, new_ftoken_index);
   1008               if(!parent_ftoken_index) {
   1009 		PLogError( ("Error: internal search error near %s %d, finding %d\n"), __FILE__, __LINE__, new_ftoken_index);
   1010 		print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "in DO_UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n");
   1011 		exit(1);
   1012               }
   1013               *parent_ftoken_index = new_ftoken->next_token_index;
   1014               new_ftoken->next_token_index = current_ftoken->next_token_index;
   1015               current_ftoken->next_token_index = new_ftoken_index;
   1016               rec->best_token_for_node[ fsm_arc->to_node] = new_ftoken_index;
   1017 	      /* new_ftoken->aword_backtrace must be null, alts here were
   1018 		 processed and dropped in srec_process_word_boundary_nbest() */
   1019               if(new_ftoken->aword_backtrace != AWTNULL) {
   1020 		PLogError( ("Error: internal search error near %s %d\n"), __FILE__, __LINE__);
   1021 		continue;
   1022               }
   1023             } else {
   1024               /* token on other side is same or better, just leave it */
   1025             }
   1026           }
   1027         }
   1028         else if(fsm_arc->ilabel == EPSILON_LABEL) {
   1029           if( new_ftoken == NULL) {
   1030             /* create token for the other side */
   1031             new_ftoken_index = get_free_fsmnode_token(rec, NULL_IF_NO_TOKENS);
   1032 			// this should not happen because of the above check near
   1033 			// fsmnode_token_freelist == MAXftokenID
   1034             ASSERT(new_ftoken_index != MAXftokenID);
   1035             new_ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
   1036             new_ftoken->word_backtrace = current_ftoken->word_backtrace;
   1037             new_ftoken->cost = cost_with_wtw;
   1038             new_ftoken->word = word_with_wtw;
   1039             new_ftoken->FSMnode_index = fsm_arc->to_node;
   1040             new_ftoken->aword_backtrace = refcopy_altwords(rec, current_ftoken->aword_backtrace);
   1041             new_ftoken->next_token_index = current_ftoken->next_token_index;
   1042             current_ftoken->next_token_index = new_ftoken_index;
   1043             rec->best_token_for_node[fsm_arc->to_node] = new_ftoken_index;
   1044           } else if(new_ftoken && cost_with_wtw<new_ftoken->cost) {
   1045             /* update token on the other side */
   1046             ftokenID *parent_ftoken_index;
   1047             new_ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
   1048             new_ftoken->cost = cost_with_wtw;
   1049             new_ftoken->word_backtrace = current_ftoken->word_backtrace;
   1050             new_ftoken->word = word_with_wtw;
   1051 	    /* here we are giving up the path and alternatives that existed at
   1052 	       this node, which is not great! The new (better) top choice
   1053 	       coming in and it's alternatives are propagated instead.
   1054 	       TODO: merge the alternative lists and the previous top choice
   1055 	    */
   1056 	    if(new_ftoken->aword_backtrace!=AWTNULL)
   1057               free_altword_token_batch( rec, new_ftoken->aword_backtrace);
   1058 	    new_ftoken->aword_backtrace = AWTNULL;
   1059             new_ftoken->aword_backtrace = refcopy_altwords(rec, current_ftoken->aword_backtrace);
   1060             // unchanged token->FSMnode_index = fsm_arc->to_node;
   1061             // because this token was updated, we need to re-process it, right after
   1062             parent_ftoken_index = srec_get_parent_for_active_fsmnode( rec, new_ftoken_index);
   1063             if(!parent_ftoken_index) {
   1064 	      PLogError( ("Error: internal search error near %s %d, finding %d\n"), __FILE__, __LINE__, new_ftoken_index);
   1065 	      print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "in DO_UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n");
   1066 	      exit(1);
   1067             }
   1068             *parent_ftoken_index = new_ftoken->next_token_index;
   1069             new_ftoken->next_token_index = current_ftoken->next_token_index;
   1070             current_ftoken->next_token_index = new_ftoken_index;
   1071             rec->best_token_for_node[ fsm_arc->to_node] = new_ftoken_index;
   1072           } else {
   1073             /* token on other side is same or better, just leave it */
   1074 	    /* todo: maybe merge alternative lists? */
   1075           }
   1076         }
   1077       } /* loop over arcs */
   1078 
   1079       ASSERT( current_ftoken->cost != MAXcostdata);
   1080       if( num_fsm_nodes_blocked) {
   1081         /* we do not want to propagate fsm node tokens for paths that have
   1082            just been killed on the basis of no space for word propagation */
   1083         prune_fsmnode_tokens(rec, MAXcostdata/2, current_ftoken_index);
   1084       }
   1085 
   1086     // NEXT_FTOKEN:
   1087       current_ftoken_index = current_ftoken->next_token_index;
   1088     }
   1089 
   1090   // print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "AFTER UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n");
   1091 
   1092   sanity_check_altwords(rec, rec->altword_token_freelist);
   1093   return num_fsm_nodes_updated;
   1094 }
   1095 
   1096 static void update_internal_hmm_states(srec *rec, costdata *pcurrent_prune_delta,
   1097                                        costdata *pcurrent_best_cost,
   1098                                        costdata *precomputed_model_scores)
   1099 {
   1100   stokenID current_token_index;
   1101   fsmarc_token *current_token;
   1102   costdata current_best_cost;
   1103   costdata current_prune_thresh;
   1104   costdata current_prune_delta;
   1105   costdata model_cost;
   1106   asr_int16_t any_alive;
   1107   HMMInfo *hmm_info;
   1108   modelID model_index;
   1109   asr_int16_t internal_state, end_state;
   1110   arcID fsm_arc_index;
   1111   FSMarc* fsm_arc;
   1112 
   1113   costdata prev_cost;
   1114   costdata self_loop_cost;
   1115 
   1116   current_best_cost = *pcurrent_best_cost;
   1117   current_prune_delta = *pcurrent_prune_delta;
   1118   current_prune_thresh = current_best_cost + current_prune_delta;
   1119 
   1120   ASSERT(((float)current_best_cost + current_prune_delta) < (float)USHRT_MAX);
   1121 
   1122   /* best_token_for_arc must be reset here, cuz the same array might have
   1123      been used by another gender.  Alternatively we could have let each
   1124      recog use it's own array thereby save cpu at expense of memory */
   1125   for (fsm_arc_index = 0; fsm_arc_index < rec->context->num_arcs; fsm_arc_index++)
   1126     rec->best_token_for_arc[fsm_arc_index] = MAXstokenID;
   1127 
   1128   current_token_index = rec->active_fsmarc_tokens;
   1129   while (current_token_index != MAXstokenID)
   1130   {
   1131     current_token = &(rec->fsmarc_token_array[current_token_index]);
   1132 
   1133     fsm_arc_index = current_token->FSMarc_index;
   1134     fsm_arc = &rec->context->FSMarc_list[fsm_arc_index];
   1135 
   1136     /* best_token_for_arc must be set here, cuz it was reset above */
   1137     rec->best_token_for_arc[fsm_arc_index] = current_token_index;
   1138 
   1139     hmm_info = &rec->context->hmm_info_for_ilabel[ fsm_arc->ilabel];
   1140     any_alive = 0;
   1141     end_state = current_token->num_hmm_states - 1;
   1142 
   1143     for (internal_state = end_state; internal_state >= 0; internal_state--)
   1144     {
   1145 
   1146       model_index = hmm_info->state_indices[internal_state];
   1147       model_cost = precomputed_model_scores[model_index];
   1148 
   1149       /*better to come from previous or self?*/
   1150 
   1151       if (internal_state > 0)
   1152       {
   1153         prev_cost = current_token->cost[internal_state-1];
   1154                 /* a duration model can be applied here,
   1155                    by changing the prev_cost according to some function of
   1156                      the current_token->duration[internal_state-1] rec->avg_state_durations[ prev_model_index] */
   1157         if (prev_cost < current_prune_thresh)
   1158         {
   1159 	  modelID prev_model_index;
   1160           prev_cost = (costdata)(prev_cost + (costdata) model_cost);
   1161           /* apply duration model for "next" transition, note that it's nice
   1162              to access the duration model (avg_state_durations) somehwere
   1163              other than the acoustic models which could be far away in memory
   1164              arrive penalty would be applied here too if it was reqd */
   1165 
   1166           prev_model_index = hmm_info->state_indices[internal_state-1];
   1167           prev_cost = (costdata)(prev_cost + (costdata) duration_penalty_depart(rec->avg_state_durations[ prev_model_index],
   1168                                  current_token->duration[internal_state-1]));
   1169 
   1170         }
   1171       }
   1172       else
   1173       {
   1174         prev_cost = MAXcostdata;
   1175       }
   1176 
   1177       self_loop_cost = current_token->cost[internal_state];
   1178                 /* a duration model can be applied here,
   1179                    by changing the self_loop_cost according to some function of
   1180                      the current_token->duration[internal_state] rec->avg_state_durations[ prev_model_index] */
   1181       if (self_loop_cost < current_prune_thresh)
   1182       {
   1183         self_loop_cost = (costdata)(self_loop_cost + (costdata) model_cost);
   1184         /* apply duration model for "loop" transition */
   1185 
   1186         self_loop_cost = (costdata)(self_loop_cost + (costdata) duration_penalty_loop(rec->avg_state_durations[ model_index],
   1187                                     current_token->duration[internal_state]));
   1188 
   1189       }
   1190 
   1191       if (prev_cost < self_loop_cost)
   1192       {
   1193         current_token->cost[internal_state] = prev_cost;
   1194         current_token->word_backtrace[internal_state] = current_token->word_backtrace[internal_state-1];
   1195         current_token->word[internal_state] = current_token->word[internal_state-1];
   1196                 current_token->duration[internal_state] = 1;
   1197         if (current_token->word[internal_state-1] != MAXwordID)
   1198         {
   1199           if (current_token->aword_backtrace[internal_state] != AWTNULL)
   1200             free_altword_token_batch(rec,
   1201                                      current_token->aword_backtrace[internal_state]);
   1202           current_token->aword_backtrace[internal_state] = refcopy_altwords(rec, current_token->aword_backtrace[internal_state-1]);
   1203           /*print_fsmarc_token(rec, current_token_index, "123c");*/
   1204         }
   1205         else
   1206         {
   1207           /* if there's no top choice, there shouldn't be alternatives! */
   1208           ASSERT(current_token->aword_backtrace[internal_state] == AWTNULL);
   1209           ASSERT(current_token->aword_backtrace[internal_state-1] == AWTNULL);
   1210         }
   1211       }
   1212       else
   1213       {
   1214         current_token->cost[internal_state] = self_loop_cost;
   1215                 current_token->duration[internal_state]++;
   1216       }
   1217 
   1218       if (current_token->cost[internal_state] < current_prune_thresh)
   1219       {
   1220         any_alive = 1;
   1221         if (current_token->cost[internal_state] < current_best_cost)
   1222         {
   1223           current_best_cost = current_token->cost[internal_state];
   1224           current_prune_thresh = current_best_cost + current_prune_delta;
   1225         }
   1226       }
   1227     }
   1228     current_token_index = current_token->next_token_index;
   1229   }
   1230   *pcurrent_best_cost = current_best_cost;
   1231   *pcurrent_prune_delta = current_prune_delta;
   1232 }
   1233 
   1234 
   1235 
   1236 
   1237 static int GetNumArcsArrivingClip2(srec_context* context, FSMnode* fsm_node)
   1238 {
   1239   arcID fpa = fsm_node->first_prev_arc;
   1240   FSMarc* arc;
   1241 
   1242   if (fpa == MAXarcID)
   1243     return 0;
   1244   arc = &context->FSMarc_list[fpa];
   1245   if (arc->linkl_prev_arc == MAXarcID)
   1246     return 1;
   1247   else
   1248     return 2;
   1249 }
   1250 
   1251 static int update_from_hmms_to_fsmnodes(srec *rec, costdata prune_delta, costdata best_cost)
   1252 {
   1253   stokenID current_token_index;
   1254   fsmarc_token *current_token;
   1255   int end_state;
   1256   costdata end_cost;
   1257   costdata current_prune_thresh;
   1258   costdata current_prune_delta;  /*may get tighter to keep num fsmnodes under control*/
   1259   // vFSMarc vfsm_arc;
   1260   FSMarc* fsm_arc;
   1261   FSMnode* fsm_node;
   1262   // vFSMnode vfsm_node;
   1263   arcID fsm_arc_index;
   1264   nodeID to_node_index;
   1265   ftokenID new_ftoken_index;
   1266   fsmnode_token *ftoken;
   1267   modelID end_model_index;
   1268   labelID ilabel;
   1269   short end_cost_equality_hack;
   1270   HMMInfo* hmm_info;
   1271   altword_token *awtoken, *q;
   1272   int num_fsmnode_updates = 0;
   1273 
   1274   current_prune_delta = prune_delta;
   1275   current_prune_thresh = best_cost + current_prune_delta;
   1276   current_token_index = rec->active_fsmarc_tokens;
   1277 
   1278   for (ilabel = 0; ilabel < NODE_INFO_NUMS; ilabel++)
   1279   {
   1280     rec->current_best_ftoken_cost[ilabel] = MAXcostdata / 2;
   1281     rec->current_best_ftoken_index[ilabel] = MAXftokenID;
   1282   }
   1283   sanity_check_altwords(rec, rec->altword_token_freelist);
   1284 
   1285   while (current_token_index != MAXstokenID)
   1286   {
   1287     current_token = &(rec->fsmarc_token_array[current_token_index]);
   1288 
   1289     /*propagate from end of state token to new FSM node*/
   1290     end_state = (char) current_token->num_hmm_states - 1;
   1291 
   1292     ASSERT((current_token->aword_backtrace[end_state] == AWTNULL)
   1293            || (current_token->word[end_state] != MAXwordID));
   1294     end_cost = current_token->cost[end_state];
   1295 
   1296     /* anticipated repruning: make sure there is enough space before
   1297        beginning complex computation */
   1298     if (rec->word_priority_q->max_in_q>1 && rec->altword_token_freelist_len < 3*rec->word_priority_q->max_in_q)
   1299       reprune_altword_tokens(rec);
   1300 
   1301     if (end_cost < current_prune_thresh)
   1302     {
   1303       num_fsmnode_updates++;
   1304       fsm_arc_index = current_token->FSMarc_index;
   1305       fsm_arc = &rec->context->FSMarc_list[ fsm_arc_index];
   1306 
   1307       hmm_info = &rec->context->hmm_info_for_ilabel[ fsm_arc->ilabel];
   1308 
   1309       end_model_index = hmm_info->state_indices[end_state];
   1310 
   1311       end_cost = (costdata)(end_cost + (costdata) duration_penalty_depart(rec->avg_state_durations[end_model_index],
   1312                             current_token->duration[end_state]));
   1313       to_node_index = fsm_arc->to_node;
   1314       new_ftoken_index = rec->best_token_for_node[to_node_index];
   1315       if (new_ftoken_index == MAXftokenID)
   1316       {
   1317         /*we need to make sure there is room in the new_states array
   1318           and there are free state tokens*/
   1319         if (rec->fsmnode_token_freelist == MAXftokenID)
   1320         {
   1321           /*make sure there is room for another FSMnode token
   1322             - if not, prune until there is room*/
   1323           current_prune_delta = reprune_fsmnode_tokens(rec, best_cost, current_prune_delta, MAXftokenID);
   1324           current_prune_thresh = best_cost + current_prune_delta;
   1325         }
   1326 
   1327         /*because of the above check, this should always succeed*/
   1328         new_ftoken_index = get_free_fsmnode_token(rec, EXIT_IF_NO_TOKENS);
   1329 
   1330         ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
   1331         ftoken->FSMnode_index = to_node_index;
   1332         ftoken->next_token_index = rec->active_fsmnode_tokens;
   1333         ftoken->cost = end_cost;
   1334         ftoken->word_backtrace = current_token->word_backtrace[end_state];
   1335         ftoken->word = current_token->word[end_state];
   1336         if (end_model_index == SILENCE_MODEL_INDEX && ftoken->word != rec->context->beg_silence_word)
   1337         {
   1338           ftoken->silence_duration = current_token->duration[end_state];
   1339         }
   1340         else
   1341         {
   1342           ftoken->silence_duration = 0;
   1343         }
   1344         if (ftoken->word != MAXwordID)
   1345         {
   1346           arcID narr;
   1347           fsm_node = &rec->context->FSMnode_list[ fsm_arc->to_node];
   1348           /* when there is only one arc arriving, a refcopy is good enough
   1349              and saves memory */
   1350           narr = (arcID) GetNumArcsArrivingClip2(rec->context, fsm_node);
   1351           if (narr > 1)
   1352             ftoken->aword_backtrace = copy_altwords(rec, current_token->aword_backtrace[end_state], 0);
   1353           else
   1354             ftoken->aword_backtrace = refcopy_altwords(rec, current_token->aword_backtrace[end_state]);
   1355         }
   1356         else
   1357         {
   1358           /* if there's no top choice, there shouldn't be alternatives! */
   1359           ASSERT(current_token->aword_backtrace[end_state] == AWTNULL);
   1360           ftoken->aword_backtrace = AWTNULL;
   1361         }
   1362         rec->active_fsmnode_tokens = new_ftoken_index;
   1363         rec->best_token_for_node[to_node_index] = new_ftoken_index;
   1364       }
   1365       else /* a token already exists, use it! */
   1366       {
   1367 
   1368         ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
   1369         ASSERT( ((current_token->word[end_state] == MAXwordID) && (ftoken->word == MAXwordID))
   1370              || ((current_token->word[end_state] != MAXwordID) && (ftoken->word != MAXwordID)) );
   1371 
   1372         /* this is a hack for preferring the shorter of the backtrace words
   1373            when scores are equal, used to prefer longer pau2 word */
   1374         end_cost_equality_hack = 0;
   1375         if (end_cost == ftoken->cost)
   1376         {
   1377           if (current_token->word_backtrace[end_state] != ftoken->word_backtrace
   1378               && current_token->word_backtrace[end_state] != MAXwtokenID)
   1379           {
   1380             frameID ct_end_time = MAXframeID, et_end_time = 0;
   1381             if (current_token->word_backtrace[end_state] != MAXwtokenID)
   1382               ct_end_time = rec->word_token_array[current_token->word_backtrace[end_state]].end_time;
   1383             if (ftoken->word_backtrace != MAXwtokenID)
   1384               et_end_time = rec->word_token_array[ftoken->word_backtrace].end_time;
   1385             if (ct_end_time < et_end_time)
   1386               end_cost_equality_hack = 1;
   1387           }
   1388         }
   1389 
   1390         if (end_cost < ftoken->cost || end_cost_equality_hack)
   1391         {
   1392           /* new one coming in is better, so push the current state down */
   1393           /* ftoken info goes into awtoken */
   1394           if (ftoken->word != MAXwordID)
   1395           {
   1396             /* copy_altwords() */
   1397             awtoken = get_free_altword_token(rec, NULL_IF_NO_TOKENS);
   1398             if (awtoken != AWTNULL)
   1399             {
   1400               awtoken->costdelta = ftoken->cost - end_cost;
   1401               awtoken->word_backtrace = ftoken->word_backtrace;
   1402               awtoken->word = ftoken->word;
   1403 
   1404               /* ensure full ownership! */
   1405               q = ftoken->aword_backtrace;
   1406               if (q != AWTNULL && q->refcount > 1)
   1407               {
   1408                 awtoken->next_token = copy_altwords(rec, ftoken->aword_backtrace, ftoken->cost - end_cost);
   1409                 free_altword_token_batch(rec, ftoken->aword_backtrace);
   1410                 /* reversed order above here !! */
   1411               }
   1412               else
   1413               {
   1414                 awtoken->next_token = ftoken->aword_backtrace;
   1415 				count_altword_token( rec, awtoken);
   1416                 for (q = awtoken->next_token; q; q = q->next_token)
   1417                   q->costdelta += ftoken->cost - end_cost;
   1418               }
   1419               ftoken->aword_backtrace = awtoken;
   1420               ftoken->aword_backtrace = sizewise_altwords(rec, ftoken->aword_backtrace);
   1421 			  if( (q=ftoken->aword_backtrace)!=AWTNULL) {
   1422                 for (q = ftoken->aword_backtrace; q->next_token; q = q->next_token) ;
   1423                 q->next_token = copy_altwords(rec, current_token->aword_backtrace[end_state], 0);
   1424                 ftoken->aword_backtrace = sizewise_altwords(rec, ftoken->aword_backtrace);
   1425                 /* awtoken->costbasis = &ftoken->cost; */
   1426                 ftoken->aword_backtrace->refcount = 1;
   1427 			  }
   1428             }
   1429           }
   1430           else
   1431           {
   1432             /* if there's no top choice, there shouldn't be alternatives! */
   1433             ASSERT(ftoken->aword_backtrace == AWTNULL);
   1434           }
   1435           /* and stoken info goes into ftoken */
   1436           ftoken->cost = end_cost;
   1437           ftoken->word_backtrace = current_token->word_backtrace[end_state];
   1438           ftoken->word = current_token->word[end_state];
   1439           if (end_model_index == SILENCE_MODEL_INDEX && ftoken->word != rec->context->beg_silence_word)
   1440           {
   1441             ftoken->silence_duration = current_token->duration[end_state];
   1442           }
   1443           else
   1444           {
   1445             ftoken->silence_duration = 0;
   1446           }
   1447         }
   1448         else
   1449         {
   1450                     /* new arc arriving is worse */
   1451           /* print_fsmarc_token(rec, current_token_index, "new_arc_arriving worse");
   1452              print_fsmnode_token(rec, new_ftoken_index, "new_arc_arriving tonode");*/
   1453           /* append it to the alt list */
   1454           /* stoken info goes into the awtoken, ftoken unchanged */
   1455           if (ftoken->word != MAXwordID)
   1456           {
   1457             /* copy_altwords() */
   1458             awtoken = get_free_altword_token(rec, NULL_IF_NO_TOKENS);
   1459             if (awtoken != AWTNULL)
   1460             {
   1461               awtoken->costdelta = end_cost - ftoken->cost;
   1462               awtoken->word = current_token->word[end_state];
   1463               awtoken->word_backtrace = current_token->word_backtrace[end_state];
   1464 
   1465               if (current_token->aword_backtrace[end_state] != AWTNULL)
   1466                 awtoken->next_token = copy_altwords(rec,
   1467                                                     current_token->aword_backtrace[end_state],
   1468                                                     awtoken->costdelta);
   1469               else
   1470                 awtoken->next_token = AWTNULL;
   1471 
   1472               /* ensure full ownership!, this is new here! */
   1473               q = ftoken->aword_backtrace;
   1474               if (q != AWTNULL && q->refcount > 1)
   1475               {
   1476                 q = copy_altwords(rec, ftoken->aword_backtrace, 0);
   1477                 free_altword_token_batch(rec, ftoken->aword_backtrace);
   1478                 ftoken->aword_backtrace = q;
   1479               }
   1480             }
   1481             if (ftoken->aword_backtrace)
   1482             {
   1483               for (q = ftoken->aword_backtrace; q->next_token; q = q->next_token) ;
   1484               q->next_token = awtoken;
   1485             }
   1486             else
   1487             {
   1488               ftoken->aword_backtrace = awtoken;
   1489             }
   1490 			if (ftoken->aword_backtrace!=AWTNULL) {
   1491 				ftoken->aword_backtrace->refcount = 1;
   1492 				ftoken->aword_backtrace = sizewise_altwords(rec, ftoken->aword_backtrace);
   1493 			}
   1494           }
   1495         }
   1496         /*print_fsmnode_token(rec, new_ftoken_index, "123e reused-token ");*/
   1497       }
   1498       ilabel = rec->context->FSMnode_info_list[ ftoken->FSMnode_index];
   1499       ASSERT(ilabel < NODE_INFO_NUMS);
   1500       if (ftoken->cost < rec->current_best_ftoken_cost[ilabel])
   1501       {
   1502         rec->current_best_ftoken_cost[ilabel]  = ftoken->cost;
   1503         rec->current_best_ftoken_index[ilabel] = new_ftoken_index;
   1504       }
   1505       if (ftoken->cost < rec->current_best_ftoken_cost[NODE_INFO_UNKNOWN])
   1506       {
   1507         rec->current_best_ftoken_cost[NODE_INFO_UNKNOWN]  = ftoken->cost;
   1508         rec->current_best_ftoken_index[NODE_INFO_UNKNOWN] = new_ftoken_index;
   1509       }
   1510       ASSERT(ftoken->word != MAXwordID || ftoken->aword_backtrace == AWTNULL);
   1511     }
   1512     current_token_index = current_token->next_token_index;
   1513   }
   1514   sanity_check_altwords(rec, rec->altword_token_freelist);
   1515   return num_fsmnode_updates;
   1516 }
   1517 
   1518 static int update_from_current_fsm_nodes_into_new_HMMs(srec* rec,
   1519     costdata *pcurrent_prune_delta,
   1520     costdata *pcurrent_best_cost,
   1521     costdata *precomputed_model_scores)
   1522 {
   1523   costdata prev_cost;
   1524   FSMnode* fsm_node;
   1525   FSMarc*  fsm_arc;
   1526   arcID fsm_arc_index;
   1527   HMMInfo *hmm_info;
   1528   modelID model_index;
   1529   fsmarc_token *token;
   1530   stokenID new_token_index = MAXstokenID;
   1531   costdata cost;
   1532   costdata current_prune_thresh;
   1533   costdata current_prune_delta = *pcurrent_prune_delta;
   1534   costdata current_best_cost = *pcurrent_best_cost;
   1535   ftokenID ftoken_index;
   1536   ftokenID old_ftoken_index;
   1537   fsmnode_token *fsmnode_token;
   1538   int num_fsm_nodes_updated = 0;
   1539   costdata orig_prune_delta;
   1540 
   1541   ftoken_index = rec->active_fsmnode_tokens;
   1542 
   1543   current_prune_thresh = *pcurrent_best_cost + *pcurrent_prune_delta;
   1544   orig_prune_delta = *pcurrent_prune_delta;
   1545 
   1546   sanity_check_altwords(rec, rec->altword_token_freelist);
   1547 
   1548   while (ftoken_index != MAXftokenID)
   1549   {
   1550     fsmnode_token = &rec->fsmnode_token_array[ftoken_index];
   1551 
   1552     prev_cost = fsmnode_token->cost; /*get last state of token*/
   1553     if (fsmnode_token->FSMnode_index == rec->context->end_node)
   1554     {
   1555       prev_cost = MAXcostdata;
   1556     }
   1557 
   1558     if (prev_cost < current_prune_thresh)
   1559     {
   1560       num_fsm_nodes_updated++;
   1561 
   1562       fsm_node = &rec->context->FSMnode_list[fsmnode_token->FSMnode_index];
   1563 
   1564       /* loop over arcs leaving this fsm_node */
   1565       for (fsm_arc_index = fsm_node->un_ptr.first_next_arc;
   1566            fsm_arc_index != MAXarcID;
   1567            fsm_arc_index = fsm_arc->linkl_next_arc)
   1568       {
   1569         labelID ilabel;
   1570         wordID olabel;
   1571         nodeID nextnode;
   1572 
   1573         fsm_arc = &rec->context->FSMarc_list[  fsm_arc_index];
   1574 
   1575         ilabel = fsm_arc->ilabel;
   1576         olabel = fsm_arc->olabel;
   1577         nextnode = fsm_arc->to_node;
   1578 
   1579         if (ilabel >= EPSILON_OFFSET)
   1580         {
   1581                     /*so, not an epsilon arc*/
   1582           hmm_info = &rec->context->hmm_info_for_ilabel[ilabel];
   1583 
   1584           model_index = hmm_info->state_indices[0];
   1585 
   1586           cost = prev_cost + precomputed_model_scores[model_index];
   1587           cost = (costdata)(cost + (costdata) fsm_arc->cost);
   1588 
   1589           if (cost < current_prune_thresh)
   1590           {
   1591             /*new node to keep*/
   1592 
   1593             /* look for the fsmarc_token* token, into which to maximize, else create new one */
   1594             if (rec->best_token_for_arc[fsm_arc_index] == MAXstokenID)
   1595             {
   1596 
   1597               /*make sure there is room for another state token - if not, prune
   1598               until there is room*/
   1599               /*we need to make sure there is room in the new_states array and
   1600               there are free state tokens*/
   1601               if (rec->fsmarc_token_freelist == MAXstokenID)
   1602               {
   1603                 current_prune_delta = reprune_new_states(rec, current_best_cost, current_prune_delta);
   1604               }
   1605 
   1606               /* because of the above check, this should always succeed */
   1607               new_token_index = setup_free_fsmarc_token(rec, fsm_arc, fsm_arc_index, EXIT_IF_NO_TOKENS);
   1608 
   1609               token = &(rec->fsmarc_token_array[new_token_index]);
   1610 
   1611               token->next_token_index = rec->active_fsmarc_tokens;
   1612               rec->active_fsmarc_tokens = new_token_index;
   1613               rec->num_new_states++;
   1614 
   1615               rec->best_token_for_arc[fsm_arc_index] = new_token_index;
   1616               token->cost[0] = MAXcostdata;
   1617             }
   1618             else
   1619             {
   1620               new_token_index = rec->best_token_for_arc[fsm_arc_index];
   1621               token = &(rec->fsmarc_token_array[ new_token_index]);
   1622             }
   1623 
   1624             if (cost < token->cost[0])
   1625             {
   1626               token->cost[0] = cost;
   1627                             token->duration[0] = 1;
   1628               token->word_backtrace[0] = fsmnode_token->word_backtrace;
   1629               if (token->aword_backtrace[0] != AWTNULL)
   1630                 free_altword_token_batch(rec, token->aword_backtrace[0]);
   1631               token->aword_backtrace[0] = AWTNULL;
   1632               token->aword_backtrace[0] = refcopy_altwords(rec, fsmnode_token->aword_backtrace);
   1633 
   1634               if (olabel != WORD_EPSILON_LABEL)
   1635               {
   1636                 token->word[0] = olabel;
   1637                 //ASSERT(token->aword_backtrace[0] == AWTNULL);
   1638               }
   1639               else
   1640               {
   1641                 token->word[0] = fsmnode_token->word;
   1642               }
   1643               ASSERT(token->word[0] != MAXwordID
   1644                      || token->aword_backtrace[0] == AWTNULL);
   1645               if (cost < current_best_cost)
   1646               {
   1647                 current_best_cost = cost;
   1648                 current_prune_delta = orig_prune_delta;  /*if we have a new best cost, the prune delta could go back up*/
   1649                 current_prune_thresh = cost + current_prune_delta;
   1650                 ASSERT((float)cost + (float)current_prune_delta < (float)USHRT_MAX);
   1651               }
   1652             }
   1653           }
   1654         }
   1655       }
   1656     }
   1657     rec->best_token_for_node[fsmnode_token->FSMnode_index] = MAXftokenID; /*done with this node - remove it from the array*/
   1658     old_ftoken_index = ftoken_index;
   1659 
   1660     ftoken_index = fsmnode_token->next_token_index;
   1661     free_fsmnode_token(rec, old_ftoken_index); /*done with this node - free the token*/
   1662     rec->active_fsmnode_tokens = ftoken_index; /*needed for sanity_check_altwords*/
   1663   }
   1664   /*done with all the tokens, set active tokens to NULL*/
   1665   rec->active_fsmnode_tokens = MAXftokenID;
   1666   sanity_check_altwords(rec, rec->altword_token_freelist);
   1667 
   1668   *pcurrent_best_cost = current_best_cost;
   1669   *pcurrent_prune_delta = current_prune_delta;
   1670 
   1671   return num_fsm_nodes_updated;
   1672 }
   1673 
   1674 #if USE_COMP_STATS
   1675 void start_front_end_clock(void)
   1676 {
   1677   if (!comp_stats)
   1678     comp_stats = init_comp_stats();
   1679   start_cs_clock(&comp_stats->front_end);
   1680 }
   1681 void stop_front_end_clock(void)
   1682 {
   1683   end_cs_clock(&comp_stats->front_end, 1);
   1684 }
   1685 #endif
   1686 
   1687 
   1688 /*---------------------------------------------------------------------------*
   1689  *                                                                           *
   1690  * begin and end                                                             *
   1691  *                                                                           *
   1692  *---------------------------------------------------------------------------*/
   1693 
   1694 /*gets things started for the viterbi search - sets up things for frame 0*/
   1695 
   1696 int srec_begin(srec *rec, int begin_syn_node)
   1697 {
   1698   FSMnode* fsm_node;
   1699   fsmnode_token *token;
   1700   stokenID new_token_index;
   1701   nodeID node_index;
   1702   arcID arc_index;
   1703 
   1704   if (!rec || !rec->context)
   1705   {
   1706     log_report("Error: bad inputs to srec_begin()\n");
   1707     return 1;
   1708   }
   1709   if (!rec->context->whether_prepared)
   1710   {
   1711     log_report("srec_begin: Grammar not prepared. Compiling!\n");
   1712     FST_PrepareContext(rec->context);
   1713 
   1714     if (!rec->context->whether_prepared)
   1715     {
   1716       PLogError("ESR_INVALID_STATE: Grammar can not be compiled (FST_PrepareContext failed)");
   1717       return ESR_INVALID_STATE ;
   1718     }
   1719   }
   1720 
   1721 #if USE_COMP_STATS
   1722   if (comp_stats == NULL)
   1723     comp_stats = init_comp_stats();
   1724 #endif
   1725   /*initialize token storage - not clear we really need this - as long as they
   1726   are managed correctly, we should be able to do this on startup - not each utt*/
   1727   initialize_free_fsmarc_tokens(rec);
   1728   initialize_free_word_tokens(rec);
   1729   initialize_free_fsmnode_tokens(rec);
   1730   initialize_word_lattice(rec->word_lattice);
   1731   initialize_free_altword_tokens(rec);
   1732 
   1733   if (rec->context->num_nodes > rec->max_fsm_nodes)
   1734   {
   1735     log_report("Error: srec_begin failing due to too many grammar nodes\n");
   1736     return 1;
   1737   }
   1738   for (node_index = 0;node_index < rec->context->num_nodes;node_index++)
   1739   {
   1740     rec->best_token_for_node[node_index] = MAXftokenID;
   1741   }
   1742   if (rec->context->num_arcs > rec->max_fsm_arcs)
   1743   {
   1744     log_report("Error: srec_begin failing due to too many grammar arcs\n");
   1745     return 1;
   1746   }
   1747   for (arc_index = 0;arc_index < rec->context->num_arcs;arc_index++)
   1748   {
   1749     rec->best_token_for_arc[arc_index] = MAXstokenID;
   1750   }
   1751   rec->srec_ended = 0;
   1752   rec->num_new_states = 0;
   1753   rec->current_best_cost = 0;
   1754   rec->current_prune_delta = rec->prune_delta;
   1755 
   1756   /*need help from johan - does ths FSM only have one start node?
   1757   Which one is it?   assume just one and it is node 0*/
   1758 
   1759   fsm_node =  &rec->context->FSMnode_list[ rec->context->start_node];
   1760   node_index = (nodeID) rec->context->start_node;
   1761   /* node_index is still 0 at this point */
   1762 
   1763   /*now we just need to setup an initial fsmnode token (for begin FSM node) and then do epsilon updates*/
   1764 
   1765   rec->active_fsmarc_tokens = MAXstokenID;
   1766 
   1767   new_token_index = get_free_fsmnode_token(rec, EXIT_IF_NO_TOKENS);
   1768 
   1769   token = &(rec->fsmnode_token_array[new_token_index]);
   1770   token->word_backtrace = MAXwtokenID; /* real value set below*/
   1771   token->cost = 0;
   1772   token->word = MAXwordID;
   1773   token->FSMnode_index = node_index;
   1774   token->next_token_index = MAXftokenID;
   1775   token->aword_backtrace = AWTNULL;
   1776 
   1777   rec->best_token_for_node[node_index] = new_token_index;
   1778   rec->active_fsmnode_tokens = new_token_index;
   1779   rec->current_search_frame = 0;
   1780 
   1781   do_epsilon_updates(rec, rec->prune_delta, 0);
   1782   return 0;
   1783 }
   1784 
   1785 void srec_force_the_end(srec* rec, frameID end_frame, wordID end_word)
   1786 {
   1787   srec_word_lattice* wl = rec->word_lattice;
   1788   wtokenID wtoken_index, tmp;
   1789   frameID frame;
   1790   wtoken_index = wl->words_for_frame[end_frame];
   1791   if (wtoken_index == MAXwtokenID)
   1792   {
   1793     for (frame = end_frame - 1; frame > 20; frame--)
   1794     {
   1795       if (wl->words_for_frame[frame] != MAXwtokenID)
   1796       {
   1797         word_token* wtoken;
   1798         wl->words_for_frame[end_frame] = wl->words_for_frame[frame];
   1799         wl->words_for_frame[frame] = MAXwtokenID;
   1800         for (tmp = wl->words_for_frame[end_frame]; tmp != MAXwtokenID;
   1801              tmp = wtoken->next_token_index)
   1802         {
   1803           wtoken = &rec->word_token_array[tmp];
   1804           wtoken->end_time = frame;
   1805           wtoken->word = end_word;
   1806           wtoken->end_node = rec->context->end_node;
   1807         }
   1808 #ifdef _WIN32
   1809         PLogError(L("Forced an end path at end frame %d/%d)\n"), frame, end_frame);
   1810 #endif
   1811         break;
   1812       }
   1813     }
   1814   }
   1815 }
   1816 
   1817 /* when there are no more frames of input, this functions
   1818    kills all paths not ending at the end node and
   1819    creates a word linked list even though there is no WORD_BOUNDARY ilabel */
   1820 
   1821 void srec_no_more_frames(srec* rec)
   1822 {
   1823 #if USE_COMP_STATS
   1824   frameID end_frame = rec->current_search_frame;
   1825 #endif
   1826   nodeID  end_node;
   1827   fsmnode_token* ftoken;
   1828   ftokenID current_token_index;
   1829   costdata current_word_threshold = MAXcostdata;
   1830   wtokenID word_token_index;
   1831   int any_nodes_blocked = 0;
   1832   altword_token* awtoken;
   1833 
   1834   /* this is just for sanity checking, to find out what the state was
   1835      at the end of input */
   1836   srec_check_end_of_speech_end(rec);
   1837 
   1838   if (rec->srec_ended) return;
   1839   rec->srec_ended = 1;
   1840 
   1841 #if USE_COMP_STATS
   1842   comp_stats->total_time += (float)(end_frame / 50.0f);
   1843   dump_comp_stats(comp_stats, PSTDOUT);
   1844 #endif
   1845 
   1846   end_node = rec->context->end_node;
   1847   /*remove all word paths from the priority_q which do not end at end_node
   1848     to make space for those being added below */
   1849   remove_non_end_word_from_q(rec, rec->word_priority_q, rec->word_token_array,
   1850                              end_node);
   1851 
   1852   if (rec->current_search_frame == 0)
   1853     return;
   1854 
   1855   rec->accumulated_cost_offset[ rec->current_search_frame] =
   1856     rec->accumulated_cost_offset[ rec->current_search_frame-1];
   1857   rec->cost_offset_for_frame[ rec->current_search_frame] = 0;
   1858 
   1859   /* watch out if using the best_token_for_node[] array here
   1860      is it valid? not if multiple recognizers, maybe we
   1861      should remember best_token_for_end_node separately */
   1862 
   1863   current_token_index = rec->active_fsmnode_tokens;
   1864   while (current_token_index != MAXftokenID)
   1865   {
   1866     ftoken = &rec->fsmnode_token_array[current_token_index];
   1867     if (ftoken->FSMnode_index == end_node)
   1868     {
   1869       /* print_fsmnode_token(rec, current_token_index, "fsmnode_token at end_node "); */
   1870       word_token_index = srec_process_word_boundary_nbest(rec,
   1871                          ftoken->FSMnode_index,
   1872                          ftoken->word,
   1873                          ftoken->word_backtrace,
   1874                           ftoken->cost, &current_word_threshold, &any_nodes_blocked);
   1875       if (word_token_index != MAXwtokenID)
   1876       {
   1877         WORD_TOKEN_SET_WD_ETIME( (rec->word_token_array+word_token_index),
   1878           rec->word_token_array[word_token_index].end_time - ftoken->silence_duration);
   1879       }
   1880       /* now also dump alternatives at this last frame, sep19'03 fixed */
   1881       awtoken = ftoken->aword_backtrace;
   1882       for (; awtoken != AWTNULL; awtoken = awtoken->next_token)
   1883       {
   1884         srec_process_word_boundary_nbest(rec,
   1885                                          ftoken->FSMnode_index,
   1886                                          awtoken->word,
   1887                                          awtoken->word_backtrace,
   1888                                          ftoken->cost + awtoken->costdelta,
   1889                                          &current_word_threshold,
   1890                                          &any_nodes_blocked);
   1891       }
   1892     }
   1893     current_token_index = ftoken->next_token_index;
   1894   }
   1895 
   1896   /* we clobber the word_lattice at the last frame that was created
   1897      in do_epsilon_updates() */
   1898   word_token_index = get_word_token_list(rec->word_priority_q, rec->word_token_array);
   1899   lattice_add_word_tokens(rec->word_lattice, rec->current_search_frame, word_token_index);
   1900 
   1901   if (FST_IsVoiceEnrollment(rec->context) && word_token_index == MAXwtokenID)
   1902   {
   1903     srec_force_the_end(rec, rec->current_search_frame, rec->context->end_silence_word);
   1904   }
   1905 
   1906   /* find the current_best_cost for this recognizer ... at the end node,
   1907      it will be used to decide which recognizer wins! */
   1908   rec->current_best_cost = lattice_best_cost_to_frame(rec->word_lattice,
   1909                            rec->word_token_array,
   1910                            rec->current_search_frame);
   1911 
   1912 }
   1913 
   1914 void srec_terminate(srec* rec)
   1915 {
   1916   frameID ifr;
   1917   stokenID stoken_index, next_stoken_index;
   1918   fsmarc_token* stoken;
   1919   ftokenID ftoken_index, next_ftoken_index;
   1920   fsmnode_token* ftoken;
   1921   wtokenID wtoken_index, next_wtoken_index;
   1922   word_token* wtoken;
   1923 
   1924   /* release all state tokens */
   1925   for (stoken_index = rec->active_fsmarc_tokens; stoken_index != MAXstokenID;
   1926        stoken_index = next_stoken_index)
   1927   {
   1928     stoken = &rec->fsmarc_token_array[ stoken_index];
   1929     next_stoken_index = stoken->next_token_index;
   1930     free_fsmarc_token(rec, stoken_index);
   1931   }
   1932   rec->active_fsmarc_tokens = MAXstokenID;
   1933 
   1934   /* release all fsmnode tokens */
   1935   for (ftoken_index = rec->active_fsmnode_tokens; ftoken_index != MAXftokenID;
   1936        ftoken_index = next_ftoken_index)
   1937   {
   1938     ftoken = &rec->fsmnode_token_array[ ftoken_index];
   1939     next_ftoken_index = ftoken->next_token_index;
   1940     free_fsmnode_token(rec, ftoken_index);
   1941   }
   1942   rec->active_fsmnode_tokens = MAXftokenID;
   1943 
   1944   /* release all word tokens */
   1945   for (ifr = 0; ifr < rec->current_search_frame; ifr++)
   1946   {
   1947     for (wtoken_index = rec->word_lattice->words_for_frame[ifr];
   1948          wtoken_index != MAXwtokenID; wtoken_index = next_wtoken_index)
   1949     {
   1950       wtoken = &rec->word_token_array[wtoken_index];
   1951       next_wtoken_index = wtoken->next_token_index;
   1952       free_word_token(rec, wtoken_index);
   1953     }
   1954     rec->word_lattice->words_for_frame[ifr] = MAXwtokenID;
   1955   }
   1956   rec->current_model_scores[SILENCE_MODEL_INDEX] = DO_NOT_COMPUTE_MODEL;
   1957   rec->current_best_cost = MAXcostdata;
   1958   rec->srec_ended = 1;
   1959 }
   1960 /*------------------------------------------------------------------------*
   1961  *                                                                        *
   1962  * main work of the viterbi search                                        *
   1963  *                                                                        *
   1964  *------------------------------------------------------------------------*/
   1965 
   1966 /*with new update to FSM node scheme, the sequence of operation is:
   1967 
   1968   for each frame:
   1969 
   1970   1. Handle all internal HMM updates based on new frame observations.  This is
   1971   done in place with the current list of HMM tokens.
   1972 
   1973   2. For each current active FSM node (from previous frame), activate update
   1974   into state 0 (either for existing HMM tokens or for new HMM tokens) by going
   1975   through an observation frame (so, only go from an FSM node to a new HMM
   1976   token if the first observation frame gets a score above the current pruning
   1977   threshold).  FSM nodes are freed as this is done.  So, no FSMnode tokens are left
   1978   at the end of this.
   1979 
   1980   3. Prune.  Note that the best score will have already been established for
   1981   this frame (so therefore the pruning threshold will not change).
   1982 
   1983   4. reset best cost to 0 (to keep scores in range).  We can do this here since we already  know the best score.
   1984 
   1985   5. For end hmm states which are above the pruning threshold, create new
   1986   FSMnode_tokens.
   1987 
   1988   6. update epsilons, including word boundary arcs (which put words onto the word lattice).
   1989   epsilon updates go from FSM node to FSM node.
   1990 
   1991   repeat for next frame based on new FSM nodes and current HMMs
   1992 
   1993 */
   1994 
   1995 void srec_viterbi_part1(srec *rec,
   1996                         const SWIModel *acoustic_models,
   1997                         pattern_info *pattern,
   1998                         costdata silence_model_cost);
   1999 
   2000 void srec_viterbi_part2(srec *rec);
   2001 
   2002 int multi_srec_viterbi(multi_srec *recm,
   2003                        srec_eos_detector_parms* eosd,
   2004                        pattern_info *pattern,
   2005                        utterance_info* utt_not_used)
   2006 {
   2007   EOSrc eosrc1 = SPEECH_ENDED, eosrc2 = SPEECH_ENDED;
   2008 #if DO_ALLOW_MULTIPLE_MODELS
   2009   ASSERT(recm->num_activated_recs == recm->num_swimodels);
   2010     if (recm->num_activated_recs == 1)
   2011   {
   2012 #endif
   2013     srec* rec1 = &recm->rec[0];
   2014 #if USE_COMP_STATS
   2015     start_cs_clock1(&comp_stats->overall_search);
   2016 #endif
   2017     if (rec1->current_search_frame >= (rec1->word_lattice->max_frames - 1))
   2018       return 1;
   2019     srec_viterbi_part1(&recm->rec[0], recm->swimodel[0], pattern, DO_NOT_COMPUTE_MODEL);
   2020     reset_best_cost_to_zero(rec1, rec1->current_best_cost);
   2021     reset_cost_offsets(recm, rec1->current_search_frame, rec1->current_best_cost);
   2022     rec1->current_prune_delta = rec1->prune_delta;
   2023     rec1->current_best_cost   = 0;
   2024     srec_viterbi_part2(&recm->rec[0]);
   2025     eosrc1 = srec_check_end_of_speech(eosd, &recm->rec[0]);
   2026 #if USE_COMP_STATS
   2027     end_cs_clock1(&comp_stats->overall_search, 1);
   2028 #endif
   2029 
   2030     SREC_STATS_UPDATE(&recm->rec[0]);
   2031     recm->eos_status = eosrc1;
   2032 #if DO_ALLOW_MULTIPLE_MODELS
   2033     }
   2034   else if (recm->num_activated_recs == 2)
   2035   {
   2036     srec* rec1 = &recm->rec[0];
   2037     srec* rec2 = &recm->rec[1];
   2038     const SWIModel* acoustic_models1 = recm->swimodel[0];
   2039     const SWIModel* acoustic_models2 = recm->swimodel[1];
   2040     costdata diff;
   2041     costdata current_best_cost;
   2042 
   2043     ASSERT(rec1->prune_delta == rec2->prune_delta);
   2044     /* in part 1 we need to operate by adjusting the prune delta, 'cuz we want
   2045        to operate on scores after consumption of a frame */
   2046     if ((rec1->current_best_cost > MAXcostdata / 2 && !rec1->srec_ended) ||
   2047         (rec2->current_best_cost > MAXcostdata / 2 && !rec2->srec_ended))
   2048     {
   2049       printf("hey %d %d\n", rec1->current_best_cost, rec2->current_best_cost);
   2050     }
   2051 
   2052     /* figure out the prune_delta for the different genders, we
   2053        want that pruning should be joint (i.e. prune male and
   2054        female relative to overall best).  Before part1 we don't
   2055        yet know the overall best, so we use the gender score gap
   2056        from the last frame, and make the prune the worse gender
   2057        accordingly more aggressive */
   2058 
   2059     if (!rec2->srec_ended && rec1->current_best_cost < rec2->current_best_cost)
   2060     {
   2061       diff = rec2->current_best_cost - rec1->current_best_cost;
   2062       if (rec2->current_search_frame >= (rec2->word_lattice->max_frames - 1))
   2063       {
   2064         return 1;
   2065       }
   2066       if (diff > rec2->prune_delta)
   2067       {
   2068         srec_terminate(rec2);
   2069 #ifdef SREC_ENGINE_VERBOSE_LOGGING
   2070         PLogMessage("T: terminate_viterbi(rec2) @%d", rec2->current_search_frame);
   2071 #endif
   2072       }
   2073       else
   2074         rec2->current_prune_delta = rec2->prune_delta - diff;
   2075       rec1->current_prune_delta = rec1->prune_delta;
   2076     }
   2077     else if (!rec1->srec_ended)
   2078     {
   2079       if (rec1->current_search_frame >= (rec1->word_lattice->max_frames - 1))
   2080       {
   2081         return 1;
   2082       }
   2083       diff = rec1->current_best_cost - rec2->current_best_cost;
   2084       if (diff > rec1->prune_delta)
   2085       {
   2086         srec_terminate(rec1);
   2087 #ifdef SREC_ENGINE_VERBOSE_LOGGING
   2088         PLogMessage("T: terminate_viterbi(rec1) @%d", rec1->current_search_frame);
   2089 #endif
   2090       }
   2091       else
   2092         rec1->current_prune_delta = rec1->prune_delta - diff;
   2093       rec2->current_prune_delta = rec2->prune_delta;
   2094     }
   2095 
   2096     /* now run part1 for each gender */
   2097     if (!rec1->srec_ended)
   2098     {
   2099       srec_viterbi_part1(rec1, acoustic_models1, pattern, DO_NOT_COMPUTE_MODEL);
   2100       SREC_STATS_UPDATE(rec1);
   2101     }
   2102 
   2103     if (!rec2->srec_ended)
   2104     {
   2105       srec_viterbi_part1(rec2, acoustic_models2, pattern, rec1->current_model_scores[SILENCE_MODEL_INDEX]);
   2106       SREC_STATS_UPDATE(rec2);
   2107     }
   2108 
   2109     /* now adjust score offsets, score offsets are shared across genders */
   2110 
   2111     if (rec1->current_best_cost <= rec2->current_best_cost)
   2112     {
   2113       /* am1 is winning, prune 2 harder */
   2114       current_best_cost = rec1->current_best_cost;
   2115       reset_cost_offsets(recm, rec1->current_search_frame, current_best_cost);
   2116     }
   2117     else
   2118     {
   2119       /* am2 is winning, prune 1 harder */
   2120       current_best_cost = rec2->current_best_cost;
   2121       reset_cost_offsets(recm, rec2->current_search_frame, current_best_cost);
   2122     }
   2123 
   2124     /* jean: some cleanup needed here */
   2125     /** best_token_for_arc = rec1->best_token_for_arc;
   2126       rec1->best_token_for_arc = 0; **/
   2127     if (!rec1->srec_ended)
   2128     {
   2129       reset_best_cost_to_zero(rec1, current_best_cost);
   2130       rec1->current_best_cost = (costdata)(rec1->current_best_cost - (costdata) current_best_cost);
   2131       srec_viterbi_part2(rec1);
   2132       if (rec1->active_fsmnode_tokens == MAXftokenID)
   2133         srec_terminate(rec1);
   2134       if (!rec1->srec_ended)
   2135         eosrc1 = srec_check_end_of_speech(eosd, rec1);
   2136     }
   2137     /** rec1->best_token_for_arc = best_token_for_arc;
   2138       best_token_for_arc = rec2->best_token_for_arc;
   2139       rec2->best_token_for_arc = 0; **/
   2140     if (!rec2->srec_ended)
   2141     {
   2142       reset_best_cost_to_zero(rec2, current_best_cost);
   2143       rec2->current_best_cost = (costdata)(rec2->current_best_cost - (costdata) current_best_cost);
   2144       srec_viterbi_part2(rec2);
   2145       if (rec2->active_fsmnode_tokens == MAXftokenID)
   2146         srec_terminate(rec2);
   2147       if (!rec2->srec_ended)
   2148         eosrc2 = srec_check_end_of_speech(eosd, rec2);
   2149     }
   2150     /** rec2->best_token_for_arc = best_token_for_arc; **/
   2151     SREC_STATS_UPDATE(rec1);
   2152     SREC_STATS_UPDATE(rec2);
   2153     recm->eos_status = eosrc1;
   2154     if (rec1->current_best_cost > rec2->current_best_cost)
   2155       recm->eos_status = eosrc2;
   2156   }
   2157 #endif
   2158     return 0;
   2159 }
   2160 
   2161 
   2162 void srec_viterbi_part1(srec *rec,
   2163                         const SWIModel *acoustic_models,
   2164                         pattern_info *pattern,
   2165                         costdata silence_model_cost)
   2166 {
   2167   costdata current_best_cost;
   2168   /*  costdata current_prune_thresh; */
   2169   costdata current_prune_delta;
   2170   /* the score difference for pruning - can get adjusted below if
   2171      pruning gets tighted to keep array sizes in check*/
   2172   costdata *current_model_scores;
   2173   int num_models_computed;
   2174   nodeID num_fsm_nodes_updated;
   2175 
   2176 #if USE_COMP_STATS
   2177   start_cs_clock(&comp_stats->models);
   2178 #endif
   2179 
   2180   /*first go ahead and compute scores for all models which are needed by the search at this point*/
   2181 
   2182 
   2183   find_which_models_to_compute(rec, acoustic_models);
   2184   /* communication happens via rec->current_model_scores */
   2185 #define SCORE_FIRST_SILENCE_ONLY
   2186 #ifdef SCORE_FIRST_SILENCE_ONLY
   2187   if (silence_model_cost != DO_NOT_COMPUTE_MODEL)
   2188     rec->current_model_scores[SILENCE_MODEL_INDEX] = silence_model_cost;
   2189 #endif
   2190   num_models_computed = compute_model_scores(rec->current_model_scores, acoustic_models, pattern, rec->current_search_frame);
   2191   rec->best_model_cost_for_frame[rec->current_search_frame] = best_uint16(rec->current_model_scores, acoustic_models->num_hmmstates);
   2192 
   2193 #if USE_COMP_STATS
   2194   end_cs_clock(&comp_stats->models, num_models_computed);
   2195   start_cs_clock(&comp_stats->internal_hmm);
   2196 #endif
   2197 
   2198   /*get some things out of the rec structure to make things a bit faster*/
   2199   current_model_scores = rec->current_model_scores;
   2200 
   2201   /*update search to next frame*/
   2202   current_best_cost = MAXcostdata - ((costdata)2) * rec->prune_delta; /*to avoid overflows, must clean up later */
   2203   /* current_prune_thresh = MAXcostdata; */
   2204   current_prune_delta = rec->current_prune_delta;
   2205 
   2206   /* srec_stats_update(rec, "(...0) "); */
   2207   /*------------------------------------------------------------------------*
   2208     1. Handle all internal HMM updates based on new frame observations.  This is
   2209     done in place with the current list of HMM tokens.
   2210    *------------------------------------------------------------------------*/
   2211 
   2212   update_internal_hmm_states(rec, &current_prune_delta, &current_best_cost, current_model_scores);
   2213 
   2214   /*  check_if_any_token_better_than_best_cost(rec, rec->active_fsmarc_tokens, current_best_cost, "after update into new");*/
   2215 
   2216 #if USE_COMP_STATS
   2217   end_cs_clock(&comp_stats->internal_hmm, rec->num_new_states);
   2218   start_cs_clock(&comp_stats->fsm_to_hmm);
   2219 #endif
   2220 
   2221   /* srec_stats_update(rec, "(...1) "); */
   2222   /*------------------------------------------------------------------------*
   2223     2. For each current active FSM node (from previous frame), activate update
   2224     into state 0 (either for existing HMM tokens or for new HMM tokens) by going
   2225     through an observation frame (so, only go from an FSM node to a new HMM
   2226     token if the first observation frame gets a score above the current pruning
   2227     threshold).  FSM nodes are freed as this is done.  So, no FSMnode tokens are left
   2228     at the end of this.
   2229    *------------------------------------------------------------------------*/
   2230 
   2231   num_fsm_nodes_updated = (nodeID) update_from_current_fsm_nodes_into_new_HMMs(rec, &current_prune_delta, &current_best_cost, current_model_scores);
   2232   /* srec_stats_update(rec, "(...2) "); */
   2233   /*------------------------------------------------------------------------*
   2234     3. Prune.  Note that the best score will have already been established for
   2235     this frame (so therefore the pruning threshold will not change).
   2236    *------------------------------------------------------------------------*/
   2237 
   2238 #if USE_COMP_STATS
   2239   end_cs_clock(&comp_stats->fsm_to_hmm, num_fsm_nodes_updated);
   2240   start_cs_clock(&comp_stats->prune);
   2241 #endif
   2242 
   2243   prune_new_tokens(rec, (costdata)(current_best_cost + current_prune_delta));
   2244 
   2245   /* it's nice to do word token pruning here 'cuz we only need to traceback
   2246      the active_fsmarc_tokens, active_fsmnode_tokens are propogated thereto */
   2247 
   2248   reprune_word_tokens_if_necessary(rec);
   2249 
   2250   rec->current_prune_delta = current_prune_delta;
   2251   rec->current_best_cost = current_best_cost;
   2252   /* srec_stats_update(rec, "(...3) "); */
   2253 #if USE_COMP_STATS
   2254   end_cs_clock(&comp_stats->prune, rec->num_new_states);
   2255 #endif
   2256 }
   2257 
   2258 void srec_viterbi_part2(srec *rec)
   2259 {
   2260   wtokenID word_token_index;
   2261   nodeID inode, num_fsm_nodes_updated;
   2262   costdata current_prune_delta = rec->current_prune_delta;
   2263   costdata current_best_cost = rec->current_best_cost;
   2264   ftokenID* ftmp;
   2265   int num_updates;
   2266 
   2267   /* first we clear the best_token_for_node array, there are no live
   2268      fsmnode_tokens at this point, and we don't want leftovers from
   2269      the last frame */
   2270   ftmp = rec->best_token_for_node;
   2271   for (inode = 0; inode < rec->context->num_nodes; inode++)
   2272     *ftmp++ = MAXftokenID;
   2273 
   2274   /*------------------------------------------------------------------------*
   2275     4. reset best cost to 0 (to keep scores in range).  We can do this here
   2276     since we already know the best score.  This is done here so that
   2277     no fsmnode tokens (there are none active now) need updating.  This is also
   2278     done here before epsilons - that way we don't need to update the word
   2279     tokens .
   2280 
   2281     We assume this was done just before part2.
   2282    *------------------------------------------------------------------------*/
   2283 
   2284 #if USE_COMP_STATS
   2285   start_cs_clock(&comp_stats->hmm_to_fsm);
   2286 #endif
   2287 
   2288   /*------------------------------------------------------------------------*
   2289     5. For end hmm states which are above the pruning threshold, create new
   2290     FSMnode_tokens.
   2291    *------------------------------------------------------------------------*/
   2292 
   2293   num_updates = update_from_hmms_to_fsmnodes(rec, current_prune_delta, current_best_cost);
   2294   if (num_updates == 0)
   2295   {
   2296     num_updates = update_from_hmms_to_fsmnodes(rec, 2 * current_prune_delta, current_best_cost);
   2297     SREC_STATS_INC_FORCED_UPDATES();
   2298   }
   2299   SREC_STATS_UPDATE(rec);
   2300 
   2301 #if USE_COMP_STATS
   2302   end_cs_clock(&comp_stats->hmm_to_fsm, rec->num_new_states);
   2303   start_cs_clock(&comp_stats->epsilon);
   2304 #endif
   2305 
   2306   /* srec_stats_update(rec, "(...5) "); */
   2307 
   2308   /*------------------------------------------------------------------------*
   2309     6. update epsilons, including word boundary arcs (which put words onto the word lattice).
   2310     epsilon updates go from FSM node to FSM node.
   2311    *------------------------------------------------------------------------*/
   2312 
   2313   /*clear priority_q for this frame*/
   2314   clear_priority_q(rec->word_priority_q);
   2315 
   2316   num_fsm_nodes_updated = (nodeID) do_epsilon_updates(rec, current_prune_delta, current_best_cost);
   2317 
   2318 #if USE_COMP_STATS
   2319   end_cs_clock(&comp_stats->epsilon, num_fsm_nodes_updated);
   2320 #endif
   2321 
   2322   /* srec_stats_update(rec, "(...6) "); */
   2323   rec->current_search_frame++;
   2324 
   2325   /* no need to prune again after epsilons since they add no new cost - if we
   2326      add costs to epsilon arcs (at word boundaries for example), add another
   2327      pruning stage */
   2328 
   2329   word_token_index = get_word_token_list(rec->word_priority_q, rec->word_token_array);
   2330   lattice_add_word_tokens(rec->word_lattice, rec->current_search_frame, word_token_index);
   2331 }
   2332 
   2333 /* get the top choice, trace it back, and find out where speech starts
   2334    and ends.  this is used for channel normalization */
   2335 
   2336 static srec* WHICH_RECOG(multi_srec* rec)
   2337 {
   2338 #if DO_ALLOW_MULTIPLE_MODELS
   2339   srec* return_rec = NULL;
   2340   costdata current_best_cost = MAXcostdata;
   2341   int i = 0;
   2342   for (i = 0; i < rec->num_activated_recs; i++)
   2343   {
   2344     if (current_best_cost > rec->rec[i].current_best_cost)
   2345     {
   2346       current_best_cost = rec->rec[i].current_best_cost;
   2347       return_rec = &rec->rec[i];
   2348     }
   2349   }
   2350   return return_rec;
   2351 #else
   2352     return &rec->rec[0];
   2353 #endif
   2354 }
   2355 
   2356 void multi_srec_get_speech_bounds(multi_srec* recm, frameID* start_frame, frameID* end_frame)
   2357 {
   2358   frameID csf;
   2359   wtokenID token_index;
   2360   wordID last_word;
   2361   srec* rec = WHICH_RECOG(recm);
   2362 
   2363   *start_frame = *end_frame = 0;
   2364 
   2365   if (!rec)
   2366     return;
   2367   csf = rec->current_search_frame;
   2368   token_index = rec->word_lattice->words_for_frame[csf];
   2369   last_word = MAXwordID;
   2370   while (token_index != MAXwtokenID)
   2371   {
   2372     word_token* wtoken = &rec->word_token_array[token_index];
   2373     word_token* next_wtoken;
   2374 
   2375     if (wtoken->word == rec->context->beg_silence_word)
   2376     {
   2377       if (*start_frame == 0) *start_frame = wtoken->end_time;
   2378     }
   2379     if (wtoken->word == rec->context->hack_silence_word)
   2380     {
   2381       if (wtoken->backtrace != MAXwtokenID)
   2382       {
   2383         next_wtoken = &rec->word_token_array[wtoken->backtrace];
   2384         if (next_wtoken->word == rec->context->beg_silence_word)
   2385           *start_frame = wtoken->end_time;
   2386       }
   2387     }
   2388 
   2389     if (last_word == rec->context->end_silence_word)
   2390     {
   2391       *end_frame = wtoken->end_time;
   2392       if (wtoken->word ==  rec->context->hack_silence_word
   2393           && wtoken->backtrace != MAXwtokenID)
   2394       {
   2395         next_wtoken = &rec->word_token_array[wtoken->backtrace];
   2396         *end_frame = WORD_TOKEN_GET_WD_ETIME( next_wtoken);
   2397       }
   2398     }
   2399     if (token_index ==  wtoken->backtrace)
   2400     {
   2401       /* infinite loop! */
   2402       PLogError ("warning: breaking infinite loop\n");
   2403       *end_frame = 0;
   2404       break;
   2405     }
   2406     token_index = wtoken->backtrace;
   2407     last_word = wtoken->word;
   2408   }
   2409 }
   2410 
   2411 int multi_srec_get_eos_status(multi_srec* rec)
   2412 {
   2413   int rc;
   2414   ASSERT(rec);
   2415   rc = (int)rec->eos_status;
   2416   if (rc < 0) rc = 0;
   2417   return rc;
   2418 }
   2419 
   2420   /*
   2421      ToDo List:
   2422 
   2423      end-pointing
   2424      duration
   2425      channel normalization
   2426      re-use and appropriate killing of word_tokens
   2427      pruning fsmnode_tokens
   2428      astar backward for alternative choices
   2429 
   2430      minimized graphs and word merging
   2431      Johans idea:
   2432      When propagating a fsmarc_token, we need to remember the word.id when it
   2433      is observed.  Let's continue to use fsmarc_token->word[] to remember those.
   2434      When merging 2+ fsmarc_tokens into a fsmnode_token, we need remember
   2435      both histories, not just the best. All histories and maintained on a linked
   2436      list, with word_token->next_token_index serving as links, somehow we also
   2437      remember the cost offset from one link to the next and keep track of that.
   2438      Try to create the word_token as late a possible, so as to keep usage down.
   2439      The list should be sorted so that we can drop things off the end, Ie. don't
   2440      need to keep all word, a max of 10 is fine cuz that's the most we'll need
   2441      to drop off at a .wb anyways!
   2442 
   2443      altwords .. working .. now cpu optimize ... ideas
   2444      use only the head refcount, #define the refcopy, not a function
   2445      free_altword_token_batch() should not double check for AWTNULL
   2446      BUILD & BUILD_DEBUG in selected areas
   2447      reprune_altword_token_batch ... change costbasis to a tag ... to say (already repruned)
   2448 
   2449 
   2450 
   2451      endpointing
   2452      at grammar prepare ...
   2453      get the list of endnodes ... get the list of opendnodes
   2454      ... start from the graph's endnode, walk backwards on all null or silence arcs, find the nodes which have a silence or null path to the end: those are sinknodes
   2455      ... sinknodes are endnodes or opendnodes ... the sinknodesO are the sinknodes that do go to speech arcs .. the sinknodes1 are the sinknodes that do not go to any speech arcs
   2456      ... walkforward all sinknodes0 through iwt arcs, those are openendnodes
   2457      ... walkforward all sinknodes1 through iwt arcs, those are endnodes
   2458      get the top score fsmnode_token ...
   2459      ... is it on an endnode ... has this been the top choice for the last 30 frames
   2460      ... is it on an optional endnode ... has this neen the top choice for the last 50 frames?
   2461 
   2462   */
   2463