Home | History | Annotate | Download | only in crec
      1 /*---------------------------------------------------------------------------*
      2  *  priority_q.c  *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 #include <stdlib.h>
     21 #include <string.h>
     22 #include <math.h>
     23 #include "passert.h"
     24 
     25 #include "portable.h"
     26 
     27 #include "hmm_desc.h"
     28 #include "utteranc.h"
     29 #include "hmmlib.h"
     30 
     31 #include "srec_sizes.h"
     32 #include "search_network.h"
     33 #include "srec.h"
     34 #include "word_lattice.h"
     35 
     36 #define PRINT_SEARCH_DETAILS 0
     37 
     38 /*this is just implemented as a list so far - FIX this!!*/
     39 
     40 /*allocates priority_q to han le max_n entries*/
     41 priority_q* allocate_priority_q(int max_n)
     42 {
     43   priority_q *pq;
     44 
     45   pq = (priority_q*) CALLOC(1, sizeof(priority_q), "search.srec.priority_q");
     46   pq->max_cost_in_q = MAXcostdata;
     47   pq->word_token_list = MAXwordID;
     48   pq->max_in_q = (miscdata)max_n;
     49   pq->num_in_q = 0;
     50   return pq;
     51 }
     52 
     53 void free_priority_q(priority_q* pq)
     54 {
     55   FREE(pq);
     56 }
     57 
     58 /*empties out priority_q*/
     59 
     60 void clear_priority_q(priority_q *pq)
     61 {
     62   pq->max_cost_in_q = MAXcostdata;
     63   pq->word_token_list = MAXwordID;
     64   pq->num_in_q = 0;
     65   /* Jean: what about the list of free tokens? */
     66 }
     67 /* returns the head of a linked list of all words in the priority_q.
     68    Return MAXwtokenID if list is empty */
     69 
     70 wtokenID get_word_token_list(priority_q *pq, word_token *word_token_array)
     71 {
     72   return pq->word_token_list;
     73 }
     74 
     75 void remove_non_end_word_from_q(srec *rec, priority_q *pq, word_token *word_token_array, nodeID end_node)
     76 {
     77   word_token *token;
     78   wtokenID *ptoken_index;
     79   wtokenID old_token_index;
     80 
     81   pq->max_cost_in_q = MAXcostdata;
     82   pq->num_in_q = 0;
     83   ptoken_index = &(pq->word_token_list);
     84 
     85   while (*ptoken_index != MAXwtokenID)
     86   {
     87     token = &(word_token_array[*ptoken_index]);
     88     if (token->end_node != end_node)
     89     {
     90       old_token_index = *ptoken_index;
     91       *ptoken_index = token->next_token_index;
     92       free_word_token(rec, old_token_index);
     93       pq->max_cost_in_q = MAXcostdata; /* fix: sep9 */
     94     }
     95     else
     96     {
     97       pq->num_in_q++;
     98       if ((pq->max_cost_in_q == MAXcostdata) || (token->cost > pq->max_cost_in_q))
     99       {
    100         pq->max_cost_in_q = token->cost;
    101       }
    102       ptoken_index = &(token->next_token_index);
    103     }
    104   }
    105 }
    106 
    107 int compare_histories(word_token* token1, word_token* token2,
    108                       word_token* word_token_array)
    109 {
    110   int history_for_token1 = 0;
    111   int history_for_token2 = 0;
    112 
    113   /* compare_histories() was an attempt to be smart about the priority_q,
    114      in that we don't need to store two word_tokens when the two tokens
    115      are the same word (obviously ending at the same frame), and with the
    116      same word history.  This happens for a digit that has multiple end nodes
    117      due to context-dependency.  When "history_for_token" ignores the end_node,
    118      then we're all clear to save just 1 word_token, but continue propagating
    119      all paths from the end nodes.  That bit of "continue propagating" is not
    120      done. THE OTHER PROBLEM is that the two nodes may NOT be
    121      simply different CD end models, they may be different from digit shifting!
    122      We're screwed if we drop the path, unless we compare all the way back to
    123      the start of utterance. */
    124 
    125   if (token1->word != token2->word)
    126     return 1;
    127   if (token1->end_node != token2->end_node)
    128     return 1;
    129 
    130   if (token1->backtrace != MAXwordID)
    131   {
    132     history_for_token1 += token1->end_node * 1000000;
    133     history_for_token1 += word_token_array[token1->backtrace].word * 10000;
    134     history_for_token1 += word_token_array[token1->backtrace].end_time;
    135   }
    136 
    137   if (token2->backtrace != MAXwordID)
    138   {
    139     history_for_token2 += token2->end_node * 1000000;
    140     history_for_token2 += word_token_array[token2->backtrace].word * 10000;
    141     history_for_token2 += word_token_array[token2->backtrace].end_time;
    142   }
    143 
    144 #if PRINT_SEARCH_DETAILS
    145   printf("comparing history_for_token1 %d history_for_token2 %d\n",
    146          history_for_token1, history_for_token2);
    147 #endif
    148 
    149   if (history_for_token1 == history_for_token2)
    150   {
    151     return 0;
    152   }
    153   else
    154   {
    155     return 1;
    156   }
    157 }
    158 
    159 #if PRINT_SEARCH_DETAILS
    160 void sanity_check_priority_q(priority_q* pq, word_token *word_token_array)
    161 {
    162   int n = 0;
    163   wtokenID token_index;
    164   word_token* token;
    165   n = 0;
    166   token_index = pq->word_token_list;
    167   while (token_index != MAXwordID)
    168   {
    169     token = &(word_token_array[token_index]);
    170     token_index = token->next_token_index;
    171     n++;
    172   }
    173   ASSERT(n == pq->num_in_q);
    174   if (pq->num_in_q == pq->max_in_q)
    175   {
    176     token = &(word_token_array[pq->word_token_list]);
    177     ASSERT(pq->max_cost_in_q == token->cost);
    178   }
    179 }
    180 #endif
    181 
    182 /*adds a word token to the priority_q.  Returns the index of the word to
    183   remove.
    184   if nothing needs to be removed, returns MAXwtokenID.
    185   if no room on priority_q, returns the one being put on */
    186 
    187 wtokenID add_word_token_to_priority_q(priority_q *pq, wtokenID token_index_to_add, word_token *word_token_array)
    188 {
    189   word_token *token;
    190   word_token *token_to_add;
    191   wtokenID token_index, return_token_index;
    192   wordID word_to_add;
    193   costdata cost_to_add;
    194   wtokenID *ptoken_index;
    195   wtokenID *pplace_to_add;
    196   wtokenID *pdelete_index;
    197   word_token *token_to_delete;
    198 
    199   token_to_add = &(word_token_array[token_index_to_add]);
    200   cost_to_add = token_to_add->cost;
    201 
    202 #if PRINT_SEARCH_DETAILS
    203   printf("WORDADD PQ tokenid %d cost %d\n", token_index_to_add, cost_to_add);
    204   token_index = pq->word_token_list;
    205   while (token_index != MAXwordID)
    206   {
    207     token = &(word_token_array[token_index]);
    208     printf("WORDADD PQ token %d word %d cost %d\n", token_index, token->word, token->cost);
    209     token_index = token->next_token_index;
    210   }
    211 #endif
    212 
    213   if (cost_to_add >= pq->max_cost_in_q && pq->num_in_q >= pq->max_in_q)
    214   {
    215 #if PRINT_SEARCH_DETAILS
    216     printf("WORDADD PQ - rejecting because cost too high cost_to_add(%d) max_cost_in_q(%d) num_in_q(%d)\n",
    217            cost_to_add, pq->max_cost_in_q, pq->num_in_q);
    218 #endif
    219 #if PRINT_SEARCH_DETAILS
    220     printf("WORDADD PQ (D) returning %d\n", token_index_to_add);
    221     sanity_check_priority_q(pq, word_token_array);
    222 #endif
    223     return token_index_to_add;
    224   }
    225 
    226   word_to_add = token_to_add->word;
    227   /* search for duplicate words first */
    228   ptoken_index = &(pq->word_token_list);
    229   pplace_to_add = NULL;
    230   pdelete_index = NULL;
    231   while ((*ptoken_index) != MAXwordID)
    232   {
    233     token = &word_token_array[(*ptoken_index)];
    234 
    235     if (token->word == token_to_add->word
    236         && !compare_histories(token, token_to_add, word_token_array))
    237     {
    238       if (token->cost < cost_to_add)
    239       {
    240         /* don't bother adding, there's another like it on the list!
    241            with a better score! */
    242 #if PRINT_SEARCH_DETAILS
    243         printf("WORDADD PQ - rejecting because another like it is on the list\n");
    244 #endif
    245         /* TODO: when returning back on the basis that something else is better,
    246            we should let the caller know what to use instead, ie, make the
    247            distinction between no-space and something-else-better */
    248         token = &word_token_array[ token_index_to_add];
    249         token->next_token_index = (*ptoken_index);
    250         return token_index_to_add;
    251       }
    252       else
    253       {
    254         /* ok, replace the one on the list with this better scoring one! */
    255         pdelete_index = ptoken_index;
    256       }
    257     }
    258     if (token->cost < cost_to_add && pplace_to_add == NULL)
    259     {
    260       pplace_to_add = ptoken_index;
    261       /* do not break, 'cuz we're still searching for a possible duplicates */
    262     }
    263     ptoken_index = &(token->next_token_index);
    264   }
    265   if (!pplace_to_add)
    266     pplace_to_add = ptoken_index;
    267 
    268   /* add the token by inserting in the linked list */
    269   token_index = *pplace_to_add;
    270   *pplace_to_add = token_index_to_add;
    271   token_to_add->next_token_index = token_index;
    272   pq->num_in_q++;
    273   if (pplace_to_add == &pq->word_token_list && pq->num_in_q >= pq->max_in_q)
    274     pq->max_cost_in_q = cost_to_add;
    275 
    276   /* now delete any duplicate that was found */
    277   if (pdelete_index)
    278   {
    279     token_index = *pdelete_index;
    280     token_to_delete = &word_token_array[  token_index];
    281     *pdelete_index = token_to_delete->next_token_index;
    282     pq->num_in_q--;
    283 #if PRINT_SEARCH_DETAILS
    284     printf("WORDADD PQ (B) returning %d\n", token_index);
    285 #endif
    286     return_token_index = token_index;
    287   }
    288 
    289   /* now check for max length in the queue */
    290   if (pq->num_in_q > pq->max_in_q)
    291   { /* really expecting just 1 over */
    292     token_index = pq->word_token_list;
    293     token = &(word_token_array[ token_index]);
    294     pq->num_in_q--;
    295     pq->word_token_list = token->next_token_index;
    296 #if PRINT_SEARCH_DETAILS
    297     printf("WORDADD PQ (C) returning %d\n", token_index);
    298 #endif
    299     return_token_index = token_index;
    300   }
    301   else
    302   {
    303     return_token_index = MAXwtokenID;
    304   }
    305   if (pq->num_in_q >= pq->max_in_q)
    306   {
    307     token_index = pq->word_token_list;
    308     token = &(word_token_array[token_index]);
    309     pq->max_cost_in_q = token->cost;
    310   }
    311   else
    312   { /* pq->num_in_q < pq->max_in_q, fixed sep9 */
    313     pq->max_cost_in_q = MAXcostdata;
    314   }
    315 #if PRINT_SEARCH_DETAILS
    316   printf("WORDADD PQ (A) returning %d\n", token_index);
    317   sanity_check_priority_q(pq, word_token_array);
    318 #endif
    319   return return_token_index;
    320 }
    321 
    322 
    323 /*returns the cost threshold for the end of the priority queue.
    324   If words have greater cost than this, no need to try to put them on the
    325   queue*/
    326 
    327 costdata get_priority_q_threshold(priority_q *pq, word_token *word_token_array)
    328 {
    329   return pq->max_cost_in_q;
    330 }
    331 
    332 
    333 
    334 
    335 
    336