Home | History | Annotate | Download | only in crec
      1 /*---------------------------------------------------------------------------*
      2  *  srec_initialize.c  *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 #ifndef _RTT
     21 #include "pstdio.h"
     22 #endif
     23 #include <stdlib.h>
     24 #include <string.h>
     25 #include <math.h>
     26 #include "passert.h"
     27 
     28 #include "portable.h"
     29 
     30 #include "hmm_desc.h"
     31 #include "utteranc.h"
     32 #include "hmmlib.h"
     33 
     34 #include "srec_sizes.h"
     35 #include "srec.h"
     36 #include "word_lattice.h"
     37 #include "swimodel.h"
     38 
     39 #include "c42mul.h"
     40 
     41 /*this file contains code which handles the initialization of the srec data structures*/
     42 
     43 /*allocates an srec -
     44 
     45 input args come from config and are:
     46 
     47     int         viterbi_prune_thresh;  score-based pruning threshold - only keep paths within this delta of best cost
     48 
     49     int         max_hmm_tokens;       controls the maximum number of HMM's alive in any frame.  If number
     50      exceeded, pruning gets tightened.  So, this threshold can be used
     51      to tradeoff accuracy for computation an memory
     52     int         max_fsmnode_tokens;   controls the maximum number of FSMs alive in any frame.  If number,
     53      exceeded, pruning gets tightened.  So, this threshold can be used
     54      to tradeoff accuracy for computation an memory
     55     int         max_word_tokens;      controls the maximum number of word tokens kept in the word lattice.
     56      if number exceeded, the word lattice is pruned more tightly (less word
     57      ends per frame
     58 
     59     int         max_altword_tokens;     controls the maximum number of alternative paths to propagate for proper nbest
     60 
     61     int         num_wordends_per_frame; controls the size of the word lattice - the number of word ends to
     62        keep at each time frame
     63     int         max_fsm_nodes;        allocation size of a few arrays in the search - needs to be big enough
     64      to handle any grammar that the search needs to run.  Initialization fails
     65      if num exceeded
     66     int         max_fsm_arcs;         allocation size of a few arrays in the search - needs to be big enough
     67      to handle any grammar that the search needs to run.  Initialization fails
     68      if num exceeded
     69 
     70 */
     71 
     72 static void allocate_recognition1(srec *rec,
     73                                   int viterbi_prune_thresh,  /*score-based pruning threshold - only keep paths within this delta of best cost*/
     74                                   int max_hmm_tokens,
     75                                   int max_fsmnode_tokens,
     76                                   int max_word_tokens,
     77                                   int max_altword_tokens,
     78                                   int num_wordends_per_frame,
     79                                   int max_frames,
     80                                   int max_model_states)
     81 {
     82 #ifdef SREC_ENGINE_VERBOSE_LOGGING
     83   PLogMessage("allocating recognition arrays2 prune %d max_hmm_tokens %d max_fsmnode_tokens %d max_word_tokens %d max_altword_tokens %d max_wordends_per_frame %d\n",
     84               viterbi_prune_thresh,
     85               max_hmm_tokens,
     86               max_fsmnode_tokens,
     87               max_word_tokens,
     88               max_altword_tokens,
     89               num_wordends_per_frame);
     90 #endif
     91   rec->current_model_scores = (costdata*) CALLOC_CLR(max_model_states, sizeof(costdata), "search.srec.current_model_scores"); /*FIX - either get NUM_MODELS from acoustic models, or check this someplace to make sure we have enough room*/
     92   rec->num_model_slots_allocated = (modelID)max_model_states;
     93 
     94   rec->fsmarc_token_array_size = (stokenID)max_hmm_tokens;
     95 
     96   rec->fsmarc_token_array = (fsmarc_token*) CALLOC_CLR(rec->fsmarc_token_array_size , sizeof(fsmarc_token), "search.srec.fsmarc_token_array");
     97   rec->max_new_states = (stokenID)max_hmm_tokens;
     98 
     99   rec->word_token_array = (word_token*) CALLOC_CLR(max_word_tokens, sizeof(word_token), "search.srec.word_token_array");
    100   rec->word_token_array_size = (wtokenID)max_word_tokens;
    101   /* todo: change this to a bit array later */
    102   rec->word_token_array_flags = (asr_int16_t*) CALLOC_CLR(max_word_tokens, sizeof(asr_int16_t), "search.srec.word_token_array_flags");
    103 
    104   rec->fsmnode_token_array = (fsmnode_token*) CALLOC_CLR(max_fsmnode_tokens, sizeof(fsmnode_token), "search.srec.fsmnode_token_array");
    105   rec->fsmnode_token_array_size = (ftokenID)max_fsmnode_tokens;
    106 
    107   rec->altword_token_array = (altword_token*) CALLOC_CLR(max_altword_tokens, sizeof(altword_token), "search.srec.altword_token_array");
    108   rec->altword_token_array_size = (wtokenID)max_altword_tokens;
    109 
    110   rec->prune_delta = (costdata)viterbi_prune_thresh;
    111 
    112   rec->max_frames   = (frameID)max_frames;
    113   rec->best_model_cost_for_frame = (costdata*)CALLOC_CLR(max_frames, sizeof(costdata), "search.srec.best_model_cost_for_frame");
    114   rec->word_lattice = allocate_word_lattice((frameID)max_frames);
    115 
    116   rec->word_priority_q = allocate_priority_q(num_wordends_per_frame);
    117   rec->best_fsmarc_token = MAXstokenID;
    118 
    119 #define ASTAR_NBEST_LEN 10
    120   rec->astar_stack = astar_stack_make(rec, ASTAR_NBEST_LEN);
    121   rec->context = NULL;
    122 }
    123 
    124 static int check_parameter_range(int parval, int parmin, int parmax, const char* parname)
    125 {
    126   if (parval > parmax)
    127   {
    128     log_report("Error: %s value %d is out-of-range [%d,%d]\n", parname,
    129                parval, parmin, parmax);
    130     return 1;
    131   }
    132   else
    133   {
    134     return 0;
    135   }
    136 }
    137 
    138 int allocate_recognition(multi_srec *rec,
    139                          int viterbi_prune_thresh,  /*score-based pruning threshold - only keep paths within this delta of best cost*/
    140                          int max_hmm_tokens,
    141                          int max_fsmnode_tokens,
    142                          int max_word_tokens,
    143                          int max_altword_tokens,
    144                          int num_wordends_per_frame,
    145                          int max_fsm_nodes,
    146                          int max_fsm_arcs,
    147                          int max_frames,
    148                          int max_model_states,
    149                          int max_searches)
    150 {
    151   int i;
    152 
    153   if (check_parameter_range(max_fsm_nodes, 1, MAXnodeID, "max_fsm_nodes"))
    154     return 1;
    155   if (check_parameter_range(max_fsm_arcs, 1, MAXarcID, "max_fsm_arcs"))
    156     return 1;
    157   if (check_parameter_range(max_frames, 1, MAXframeID, "max_frames"))
    158     return 1;
    159   if (check_parameter_range(max_model_states, 1, MAXmodelID, "max_model_states"))
    160     return 1;
    161   if (check_parameter_range(max_hmm_tokens, 1, MAXstokenID, "max_hmm_tokens"))
    162     return 1;
    163   if (check_parameter_range(max_fsmnode_tokens, 1, MAXftokenID, "max_fsmnode_tokens"))
    164     return 1;
    165   if (check_parameter_range(viterbi_prune_thresh, 1, MAXcostdata, "viterbi_prune_thresh"))
    166     return 1;
    167   if (check_parameter_range(max_altword_tokens, 0, MAXftokenID, "max_altword_tokens"))
    168     return 1;
    169   if (check_parameter_range(max_searches, 1, 2, "max_searches"))
    170     return 1;
    171 
    172   rec->rec = (srec*)CALLOC_CLR(max_searches, sizeof(srec), "search.srec.base");
    173   rec->num_allocated_recs = max_searches;
    174   rec->num_swimodels      = 0;
    175 
    176   /* best_token_for_arc and best_token_for_node are shared across
    177      multiple searches */
    178   rec->best_token_for_arc = (stokenID*)CALLOC_CLR(max_fsm_arcs, sizeof(stokenID), "search.srec.best_token_for_arc");
    179   rec->max_fsm_arcs = (arcID)max_fsm_arcs;
    180 
    181   rec->best_token_for_node = (ftokenID*)CALLOC_CLR(max_fsm_nodes, sizeof(ftokenID), "search.srec.best_token_for_node");
    182   rec->max_fsm_nodes = (nodeID)max_fsm_nodes;
    183 
    184   /* cost offsets and accumulated cost offsets are pooled for all
    185      different searches, this saves memory and enables each search
    186      to know it's total scores */
    187   rec->cost_offset_for_frame = (costdata*)CALLOC_CLR(max_frames, sizeof(costdata), "search.srec.current_best_costs");
    188   rec->accumulated_cost_offset = (bigcostdata*)CALLOC_CLR(max_frames, sizeof(bigcostdata), "search.srec.accumulated_cost_offset");
    189   rec->max_frames = (frameID)max_frames;
    190   for (i = 0; i < max_frames; i++)
    191     rec->accumulated_cost_offset[i] = 0;
    192 
    193   /* now copy the shared data down to individual recogs */
    194   for (i = 0; i < rec->num_allocated_recs; i++)
    195   {
    196     allocate_recognition1(&rec->rec[i], viterbi_prune_thresh, max_hmm_tokens, max_fsmnode_tokens, max_word_tokens, max_altword_tokens, num_wordends_per_frame, max_frames, max_model_states);
    197     rec->rec[i].best_token_for_node     = rec->best_token_for_node;
    198     rec->rec[i].max_fsm_nodes           = rec->max_fsm_nodes;
    199     rec->rec[i].best_token_for_arc      = rec->best_token_for_arc;
    200     rec->rec[i].max_fsm_arcs            = rec->max_fsm_arcs;
    201     rec->rec[i].max_frames              = rec->max_frames;
    202     rec->rec[i].cost_offset_for_frame   = rec->cost_offset_for_frame;
    203     rec->rec[i].accumulated_cost_offset = rec->accumulated_cost_offset;
    204     rec->rec[i].id = (asr_int16_t)i;
    205   }
    206   rec->eos_status = VALID_SPEECH_NOT_YET_DETECTED;
    207   return 0;
    208 }
    209 
    210 
    211 static void free_recognition1(srec *rec)
    212 {
    213   FREE(rec->current_model_scores);
    214   FREE(rec->fsmarc_token_array);
    215   FREE(rec->word_token_array);
    216   FREE(rec->word_token_array_flags);
    217   FREE(rec->fsmnode_token_array);
    218   FREE(rec->altword_token_array);
    219   FREE(rec->best_model_cost_for_frame);
    220   destroy_word_lattice(rec->word_lattice);
    221   free_priority_q(rec->word_priority_q);
    222   astar_stack_destroy(rec);
    223 }
    224 
    225 void free_recognition(multi_srec *rec)
    226 {
    227   int i;
    228   for (i = 0; i < rec->num_allocated_recs; i++)
    229     free_recognition1(&rec->rec[i]);
    230   FREE(rec->accumulated_cost_offset);
    231   FREE(rec->cost_offset_for_frame);
    232   FREE(rec->best_token_for_node);
    233   FREE(rec->best_token_for_arc);
    234   FREE(rec->rec);
    235 }
    236 
    237