Home | History | Annotate | Download | only in clib
      1 /*---------------------------------------------------------------------------*
      2  *  srec_arb.c                                                               *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 #include "pstdio.h"
     21 #include "passert.h"
     22 #include "portable.h"
     23 
     24 #include<string.h>
     25 
     26 #include"portable.h"
     27 
     28 #include"sizes.h"
     29 #include"hmm_desc.h"
     30 #include"search_network.h"     /* for EPSILON_OFFSET */
     31 #include"srec_arb.h"
     32 
     33 #define DEBUG_PRONS       0
     34 #define IF_DEBUG_PRONS(X)
     35 
     36 static const char *rcsid = 0 ? (const char *) &rcsid :
     37 "$Id: srec_arb.c,v 1.27.4.15 2007/12/14 22:03:51 dahan Exp $";
     38 
     39 int question_check(srec_question* quest, phonemeID lphon, phonemeID cphon, phonemeID rphon)
     40 {
     41   asr_int16_t a = 0, b = 0;
     42   /* phon = a*16+b */
     43   if (quest->qtype == QUESTION_LEFT)
     44   {
     45     BIT_ADDRESS(lphon, a, b);
     46   }
     47 #if USE_WWTRIPHONE
     48   else if(quest->qtype == QUESTION_WBLEFT) {
     49     if( lphon == WBPHONEME_CODE) return ANSWER_PASS;
     50     else return ANSWER_FAIL;
     51   }
     52   else if(quest->qtype == QUESTION_WBRIGHT) {
     53     if( rphon == WBPHONEME_CODE) return ANSWER_PASS;
     54     else return ANSWER_FAIL;
     55   }
     56 #endif
     57   else
     58   {
     59     ASSERT(quest->qtype == QUESTION_RIGHT);
     60     BIT_ADDRESS(rphon, a, b);
     61   }
     62   return (quest->membership_bits[a] & b ? ANSWER_PASS : ANSWER_FAIL);
     63 }
     64 
     65 /* get model id for phoneme in context */
     66 int get_modelid_for_pic(srec_arbdata* allotree, phonemeID lphon, phonemeID cphon, phonemeID rphon)
     67 {
     68   int ans;
     69   tree_node* tnode = allotree->pdata[cphon].model_nodes;
     70   while (tnode->node.quest_index >= 0)
     71   {
     72     ans = question_check(&allotree->questions[tnode->node.quest_index],
     73                          lphon, cphon, rphon);
     74     tnode = (ans == ANSWER_FAIL ? (tree_node*)tnode->node.fail : (tree_node*)tnode->node.pass);
     75   }
     76   return tnode->term.pelid;
     77 }
     78 
     79 void read_questions(srec_question** pquestions, asr_int16_t num_questions, char **buffer, PFile *fp)
     80 {
     81   srec_question *q;
     82 
     83   q = *pquestions = (srec_question*)(*buffer);
     84 
     85   *buffer += num_questions * sizeof(srec_question);
     86   while (num_questions-- > 0)
     87   {
     88     pfread(&(q->qtype), sizeof(asr_uint16_t), 1, fp);
     89     pfread(&(q->membership_bits), sizeof(asr_uint16_t), PSET_BIT_ARRAY_SIZE, fp);
     90     q++;
     91   }
     92 }
     93 
     94 /* we need to handle the interword silence here somehow,
     95    proposal:  we create one supermodel which combines the
     96    the model preceding silence and that follows silence, so
     97    "boston&mass" .. we'll have "n&m" as a single "supermodel",
     98    we'll put that supermodel in the graph but then overlay the
     99    actual models there on.   the overlay only needs to be done
    100    once.  The number of possible supermodels is 113655 which is
    101    larger than what an ilabel can hold, the solution to that is
    102    to encode also on the "cost" of the supermodel arc.
    103 
    104    /------SUPER(a&b)---\
    105    o----a1---o----b1----o
    106    \--a2--o--#--o--b2--/
    107 
    108    cost is 16bits, ilabel is 16bits
    109    on ilabel we encode the a1,(a2-a1)
    110    on cost we encode b1,(b2-b1)
    111    ... a1,b1 use 9 bits (400 models)
    112    ... deltas use 6 bits (+/-31 range)
    113    That leaves 1 bit left over, which is the top bit to signal this encoding,
    114    and make sure the cost is very high.
    115 */
    116 
    117 
    118 int get_modelids_for_pron(srec_arbdata* allotree,
    119                           const char* phonemes, int num_phonemes,
    120                           modelID* acoustic_model_ids)
    121 {
    122   int i;
    123   modelID modelid;
    124   phonemeID lphon, cphon, rphon;
    125 
    126   if( allotree == NULL)
    127 	  return 1;
    128 
    129   if (num_phonemes == 0)
    130     return 0;
    131 
    132   IF_DEBUG_PRONS(printf("%s get_modelids_for_pron pronunciation %s\n", __FILE__, (char*)phonemes));
    133 
    134 #if !USE_WWTRIPHONE
    135   lphon = (phonemeID)allotree->phoneme_index[ SILENCE_CODE];
    136   cphon = (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[0]];
    137 #else
    138   lphon = WBPHONEME_CODE; //(phonemeID)allotree->phoneme_index[ WBPHONEME_CODE];
    139   cphon = (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[0]];
    140 #endif
    141   if(cphon == MAXphonemeID)
    142     return 1; /* bad phoneme */
    143   for(i=0; i<num_phonemes; i++) {
    144 #if !USE_WWTRIPHONE
    145     rphon = (i==num_phonemes-1 ?
    146 	     (phonemeID)allotree->phoneme_index[ SILENCE_CODE] :
    147 	     (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[i+1] ] ) ;
    148 #else
    149     rphon = (i==num_phonemes-1 ?
    150 	     WBPHONEME_CODE /*(phonemeID)allotree->phoneme_index[ WBPHONEME_CODE] */ :
    151 	     (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[i+1] ] ) ;
    152 #endif
    153     if (rphon == MAXphonemeID)
    154       return 1; /* bad phoneme */
    155 
    156     modelid = (modelID) get_modelid_for_pic(allotree, lphon, cphon, rphon);
    157     acoustic_model_ids[i] = modelid;
    158 #if DEBUG_PRONS
    159     printf("%c%c%c hmm%d states", allotree->pdata[lphon].code,
    160            allotree->pdata[cphon].code, allotree->pdata[rphon].code,
    161            acoustic_model_ids[i]);
    162     for (j = 0; j < allotree->hmm_infos[modelid].num_states; j++)
    163       printf(" %d", allotree->hmm_infos[modelid].state_indices[j]);
    164     printf("\n");
    165 #endif
    166     lphon = cphon;
    167     cphon = rphon;
    168   }
    169   return 0;
    170 }
    171 
    172 /*-----------------------------------------------------------------------*
    173  *                                                                       *
    174  * phoneme data stream functions                                         *
    175  *                                                                       *
    176  *-----------------------------------------------------------------------*/
    177 
    178 tree_node* read_tree_node_f(char **buffer, PFile *fp)
    179 {
    180   tree_node* tnode = (tree_node*) * buffer;
    181   pfread(&(tnode->node.quest_index), sizeof(asr_int16_t), 1, fp);
    182   pfread(&(tnode->term.pelid), sizeof(asr_int16_t), 1, fp);
    183   pfread(&(tnode->node.fail), sizeof(tree_branch_info*), 1, fp);
    184   pfread(&(tnode->node.pass), sizeof(tree_branch_info*), 1, fp);
    185 
    186   /* because tree_node is a union, the actual size maybe large than we have read */
    187   ASSERT(sizeof(asr_int16_t)*2 + sizeof(tree_branch_info *)*2 == sizeof(tree_node));
    188 
    189   *buffer += sizeof(tree_node);
    190   if (tnode->node.quest_index >= 0)
    191   {
    192     tnode->node.fail = (struct tree_branch_info*)read_tree_node_f(buffer, fp);
    193     tnode->node.pass = (struct tree_branch_info*)read_tree_node_f(buffer, fp);
    194   }
    195   return tnode;
    196 }
    197 
    198 void read_phoneme_data(phoneme_data** pdata, asr_int16_t num_phonemes, char **buffer,  PFile *fp)
    199 {
    200   int i, ptr;
    201   phoneme_data *pd;
    202 
    203   pd = *pdata = (phoneme_data*)(*buffer);
    204 
    205   for (i = 0; i < num_phonemes; i++)
    206   {
    207     pfread(&(pd->name), sizeof(char), MAX_PHONEME_NAME_LEN, fp);
    208     pfread(&(pd->code), sizeof(asr_uint16_t), 1, fp);
    209     pfread(&ptr, sizeof(asr_int16_t), 1, fp);
    210     pfread(&(pd->model_nodes), sizeof(tree_node *), 1, fp);
    211     pfread(&(pd->num_states), sizeof(asr_uint16_t), 1, fp);
    212     pfread(&ptr, sizeof(asr_int16_t), 1, fp);
    213     pfread(&(pd->state_nodes), sizeof(tree_node *), MAX_PHONE_STATES, fp);
    214     pd++;
    215   }
    216   ASSERT(sizeof(phoneme_data) == MAX_PHONEME_NAME_LEN + sizeof(asr_int16_t)*4 + sizeof(tree_node *)*(1 + MAX_PHONE_STATES));
    217   (*buffer) += num_phonemes * sizeof(phoneme_data) / BYTES_PER_ATOM;
    218   ASSERT((char *)pd == *buffer);
    219 
    220   for (i = 0; i < num_phonemes; i++)
    221   {
    222 #if STATE_NODES_NEEDED_AT_RUNTIME
    223     for (j = 0; j < (*pdata)[i].num_states; j++)
    224       (*pdata)[i].state_nodes[j] = read_tree_node_f(buffer);
    225 #endif
    226     (*pdata)[i].model_nodes = read_tree_node_f(buffer, fp);
    227   }
    228 }
    229 
    230 /*-----------------------------------------------------------------------*
    231  *                                                                       *
    232  * hmm info stream functions                                             *
    233  *                                                                       *
    234  *-----------------------------------------------------------------------*/
    235 
    236 void read_hmminfos(srec_arbdata* allotree, char** buffer, PFile *fp)
    237 {
    238   int i, offset, num_atoms, num_hmms = allotree->num_hmms, ptr;
    239   HMMInfo* hmm_infos;
    240   hmm_infos = (HMMInfo*) * buffer;
    241   num_atoms = sizeof(HMMInfo) * num_hmms / BYTES_PER_ATOM;
    242   (*buffer) += num_atoms;
    243   for (i = 0; i < num_hmms; i++)
    244   {
    245     pfread(&hmm_infos[i].name[0], sizeof(char), MAX_PHONEME_NAME_LEN, fp);
    246     pfread(&(hmm_infos[i].num_states), sizeof(asr_int16_t), 1, fp);
    247     pfread(&ptr, sizeof(asr_int16_t), 1, fp);
    248     pfread(&(hmm_infos[i].state_indices), sizeof(asr_int16_t*), 1, fp);
    249   }
    250 
    251   /* through this and comments below, I was trying to keep the state_indices
    252      self-contained, to calculate offsets from saved pointers, but it doesn't
    253      appear to work;  so we resort to recovering state offsets from num_states
    254      state_indices = hmm_infos[0].state_indices; */
    255   pfread(*buffer, sizeof(asr_int16_t), allotree->num_states, fp);
    256 
    257   hmm_infos[0].state_indices = (asr_int16_t*) * buffer;
    258   num_atoms = sizeof(hmm_infos[0].state_indices[0]) * allotree->num_states / BYTES_PER_ATOM;
    259   (*buffer) += num_atoms;
    260 
    261   for (i = 0, offset = 0; i < num_hmms; i++)
    262   {
    263     /* int j,offset2 = hmm_infos[i].state_indices - state_indices; */
    264     hmm_infos[i].state_indices = hmm_infos[0].state_indices + offset;
    265     if (i >= HMM_COUNTER_OFFSET + NUM_SILENCE_HMMS - 1)
    266       offset += hmm_infos[i].num_states;
    267     /* printf("offset %d %d offset2 %d\n", i, offset, offset2);
    268        printf("hmm %d %x states", i, hmm_infos[i].state_indices);
    269        for(j=0; j<hmm_infos[i].num_states; j++)
    270        printf(" %d", hmm_infos[i].state_indices[j]);
    271        printf("\n"); */
    272 
    273   }
    274   allotree->hmm_infos = hmm_infos;
    275 }
    276 
    277 /*-----------------------------------------------------------------------*
    278  *                                                                       *
    279  * top level stream functions                                            *
    280  *                                                                       *
    281  *-----------------------------------------------------------------------*/
    282 
    283 int read_arbdata_from_stream(srec_arbdata** pallotree, char* filename, int buffer_size)
    284 {
    285   char* pbuf;
    286   srec_arbdata* allotree;
    287   int ptr;
    288 
    289   PFile* fp;
    290   long fpos;
    291   char* buffer;
    292 
    293   fp = file_must_open(NULL, (char*)filename, L("rb"), ESR_TRUE);
    294   if(!fp) {
    295     *pallotree = NULL;
    296     return 0;
    297   }
    298   pfseek(fp, 0, SEEK_END);
    299   fpos = pftell(fp);
    300   buffer = (char*)CALLOC_CLR(fpos, sizeof(char), "srec.arbdata");
    301   pfseek(fp, 0, SEEK_SET);
    302 
    303   buffer_size = fpos;
    304   pbuf = buffer;
    305 
    306   allotree = (srec_arbdata*)buffer;
    307   /* ASSERT(allotree->image_size == buffer_size); hack for now */
    308 
    309   /* read structure arbdata from file */
    310   pfread(&allotree->image, sizeof(char *), 1, fp);             /* image */
    311   pfread(&allotree->image_size, sizeof(asr_uint16_t), 1, fp);       /* image_szie */
    312   pfread(&allotree->num_phonemes, sizeof(asr_int16_t), 1, fp);      /* num_phonemes */
    313   pfread(&allotree->pdata, sizeof(phoneme_data *), 1, fp);     /* pdate */
    314   pfread(&allotree->num_questions, sizeof(asr_int16_t), 1, fp);     /* num_questions */
    315 
    316   pfread(&ptr, sizeof(asr_int16_t), 1, fp);     /* alignment problem */
    317 
    318   pfread(&allotree->questions, sizeof(srec_question *), 1, fp);/* questions */
    319   pfread(&allotree->num_states, sizeof(asr_int16_t), 1, fp);        /* num_states */
    320   pfread(&allotree->num_hmms, sizeof(asr_int16_t), 1, fp);          /* num_hmms */
    321   pfread(&allotree->hmm_infos, sizeof(HMMInfo *), 1, fp);      /* hmm_infos */
    322   pfread(allotree->phoneme_index, sizeof(asr_uint16_t), NUM_PHONEME_INDICES, fp); /* phoneme_index */
    323 
    324   allotree->image = buffer;
    325 
    326   pbuf += sizeof(*allotree) / BYTES_PER_ATOM;
    327   pbuf -= sizeof(void*); // PCPinfo
    328 
    329   ASSERT(pftell(fp) == pbuf - buffer);
    330 
    331 #ifdef SREC_ENGINE_VERBOSE_LOGGING
    332   PLogMessage("read allotree done %d\n", (int)(pbuf - buffer));
    333 #endif
    334 
    335   allotree->questions = (srec_question *)pbuf;
    336   read_questions(&allotree->questions, allotree->num_questions, &pbuf, fp);
    337 #ifdef SREC_ENGINE_VERBOSE_LOGGING
    338   PLogMessage("read_questions done %d\n", (int)(pbuf - buffer));
    339 #endif
    340   ASSERT(pftell(fp) == pbuf - buffer);
    341 
    342   /* readme phoneme_data */
    343   read_phoneme_data(&allotree->pdata, allotree->num_phonemes, &pbuf, fp);
    344 #ifdef SREC_ENGINE_VERBOSE_LOGGING
    345   PLogMessage("read_phoneme_data done %d\n", (int)(pbuf - buffer));
    346 #endif
    347   ASSERT(pftell(fp) == pbuf - buffer);
    348 
    349   read_hmminfos(allotree, &pbuf, fp);
    350 #ifdef SREC_ENGINE_VERBOSE_LOGGING
    351   PLogMessage("read_hmminfos done %d\n", (int)(pbuf - buffer));
    352 #endif
    353   ASSERT(pftell(fp) == pbuf - buffer);
    354 
    355   *pallotree = allotree;
    356 #ifdef SREC_ENGINE_VERBOSE_LOGGING
    357   PLogMessage("read arbdata image size %d\n", allotree->image_size);
    358 #endif
    359   ASSERT(pbuf - buffer == buffer_size);
    360 
    361   pfclose(fp);
    362 
    363   return 0;
    364 }
    365 
    366 /**
    367  * @todo document
    368  */
    369 typedef struct
    370 {
    371   unsigned short nnodes;
    372   unsigned long  size;
    373   long    phoneme;
    374   unsigned short node_pos;
    375   unsigned long  node_off;
    376   short    low_genone_no;
    377   short    high_genone_no;
    378   short    low_pel_no;
    379   short    high_pel_no;
    380 }
    381 tree_head;
    382 
    383 static int traverse_tree(tree_node* node, tree_head *tree_topo, int *num_terminal_nodes)
    384 {
    385   if (!node) return 0; /* should not happen */
    386   if (!tree_topo) return 0; /* should not happen */
    387   if (tree_topo->nnodes > 255)
    388     return 0; /* should not happen, might indicate infinite looping */
    389 
    390   tree_topo->nnodes++;
    391 
    392   if (node->node.quest_index < 0)
    393   {
    394     if (num_terminal_nodes)
    395     {
    396       (*num_terminal_nodes)++;
    397     }
    398     if (node->term.pelid < tree_topo->low_pel_no)
    399     {
    400       tree_topo->low_pel_no = node->term.pelid;
    401       tree_topo->low_genone_no = node->term.pelid;
    402     }
    403     if (node->term.pelid > tree_topo->high_pel_no)
    404     {
    405       tree_topo->high_pel_no = node->term.pelid;
    406       tree_topo->high_genone_no = node->term.pelid;
    407     }
    408   }
    409   else
    410   {
    411     traverse_tree((tree_node*)node->node.fail, tree_topo, num_terminal_nodes);
    412     traverse_tree((tree_node*)node->node.pass, tree_topo, num_terminal_nodes);
    413   }
    414   return 0;
    415 
    416 }
    417 
    418 #if 0
    419 static int num_nodes_in_tree(tree_node* node, int *num_terminal_nodes)
    420 {
    421   tree_head topo;
    422   *num_terminal_nodes = 0;
    423   topo.nnodes = 0;
    424   traverse_tree(node, &topo, num_terminal_nodes);
    425   return topo.nnodes;
    426 }
    427 #endif
    428 
    429 static unsigned int version_arbdata_add(unsigned int ics, int data)
    430 {
    431   unsigned int ocs = ((ics << 3) | (ics >> 29)) + data;
    432   /* if(debug)printf("ocs %d ics %d data %d\n", ocs, ics, data);*/
    433   return ocs;
    434 }
    435 
    436 
    437 unsigned int version_arbdata_models(srec_arbdata* a)
    438 {
    439   int i, num_hmms_in_phoneme;
    440 
    441   tree_head topo;
    442   unsigned int checksum = 0;
    443   /* if(debug)printf("num_hmms %d\n", a->num_hmms); */
    444   /* if(debug)printf("num_phonemes %d\n", a->num_phonemes); */
    445   for (i = 0; i < a->num_phonemes; i++)
    446   {
    447     num_hmms_in_phoneme = 0;
    448     topo.low_pel_no = 32567;
    449     topo.high_pel_no = 0;
    450     topo.nnodes = 0;
    451     traverse_tree(a->pdata[i].model_nodes, &topo, &num_hmms_in_phoneme);
    452     /* if(debug)printf("phoneme %d num_hmms %d (%d-%d)\n", i, num_hmms_in_phoneme,
    453     topo.low_pel_no, topo.high_pel_no); */
    454     if (topo.nnodes == 256) return 0;
    455     checksum = version_arbdata_add(checksum, topo.low_pel_no);
    456   }
    457   return checksum;
    458 }
    459 
    460 
    461 
    462 
    463