1 /*---------------------------------------------------------------------------* 2 * srec_arb.c * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 #include "pstdio.h" 21 #include "passert.h" 22 #include "portable.h" 23 24 #include<string.h> 25 26 #include"portable.h" 27 28 #include"sizes.h" 29 #include"hmm_desc.h" 30 #include"search_network.h" /* for EPSILON_OFFSET */ 31 #include"srec_arb.h" 32 33 #define DEBUG_PRONS 0 34 #define IF_DEBUG_PRONS(X) 35 36 static const char *rcsid = 0 ? (const char *) &rcsid : 37 "$Id: srec_arb.c,v 1.27.4.15 2007/12/14 22:03:51 dahan Exp $"; 38 39 int question_check(srec_question* quest, phonemeID lphon, phonemeID cphon, phonemeID rphon) 40 { 41 asr_int16_t a = 0, b = 0; 42 /* phon = a*16+b */ 43 if (quest->qtype == QUESTION_LEFT) 44 { 45 BIT_ADDRESS(lphon, a, b); 46 } 47 #if USE_WWTRIPHONE 48 else if(quest->qtype == QUESTION_WBLEFT) { 49 if( lphon == WBPHONEME_CODE) return ANSWER_PASS; 50 else return ANSWER_FAIL; 51 } 52 else if(quest->qtype == QUESTION_WBRIGHT) { 53 if( rphon == WBPHONEME_CODE) return ANSWER_PASS; 54 else return ANSWER_FAIL; 55 } 56 #endif 57 else 58 { 59 ASSERT(quest->qtype == QUESTION_RIGHT); 60 BIT_ADDRESS(rphon, a, b); 61 } 62 return (quest->membership_bits[a] & b ? ANSWER_PASS : ANSWER_FAIL); 63 } 64 65 /* get model id for phoneme in context */ 66 int get_modelid_for_pic(srec_arbdata* allotree, phonemeID lphon, phonemeID cphon, phonemeID rphon) 67 { 68 int ans; 69 tree_node* tnode = allotree->pdata[cphon].model_nodes; 70 while (tnode->node.quest_index >= 0) 71 { 72 ans = question_check(&allotree->questions[tnode->node.quest_index], 73 lphon, cphon, rphon); 74 tnode = (ans == ANSWER_FAIL ? (tree_node*)tnode->node.fail : (tree_node*)tnode->node.pass); 75 } 76 return tnode->term.pelid; 77 } 78 79 void read_questions(srec_question** pquestions, asr_int16_t num_questions, char **buffer, PFile *fp) 80 { 81 srec_question *q; 82 83 q = *pquestions = (srec_question*)(*buffer); 84 85 *buffer += num_questions * sizeof(srec_question); 86 while (num_questions-- > 0) 87 { 88 pfread(&(q->qtype), sizeof(asr_uint16_t), 1, fp); 89 pfread(&(q->membership_bits), sizeof(asr_uint16_t), PSET_BIT_ARRAY_SIZE, fp); 90 q++; 91 } 92 } 93 94 /* we need to handle the interword silence here somehow, 95 proposal: we create one supermodel which combines the 96 the model preceding silence and that follows silence, so 97 "boston&mass" .. we'll have "n&m" as a single "supermodel", 98 we'll put that supermodel in the graph but then overlay the 99 actual models there on. the overlay only needs to be done 100 once. The number of possible supermodels is 113655 which is 101 larger than what an ilabel can hold, the solution to that is 102 to encode also on the "cost" of the supermodel arc. 103 104 /------SUPER(a&b)---\ 105 o----a1---o----b1----o 106 \--a2--o--#--o--b2--/ 107 108 cost is 16bits, ilabel is 16bits 109 on ilabel we encode the a1,(a2-a1) 110 on cost we encode b1,(b2-b1) 111 ... a1,b1 use 9 bits (400 models) 112 ... deltas use 6 bits (+/-31 range) 113 That leaves 1 bit left over, which is the top bit to signal this encoding, 114 and make sure the cost is very high. 115 */ 116 117 118 int get_modelids_for_pron(srec_arbdata* allotree, 119 const char* phonemes, int num_phonemes, 120 modelID* acoustic_model_ids) 121 { 122 int i; 123 modelID modelid; 124 phonemeID lphon, cphon, rphon; 125 126 if( allotree == NULL) 127 return 1; 128 129 if (num_phonemes == 0) 130 return 0; 131 132 IF_DEBUG_PRONS(printf("%s get_modelids_for_pron pronunciation %s\n", __FILE__, (char*)phonemes)); 133 134 #if !USE_WWTRIPHONE 135 lphon = (phonemeID)allotree->phoneme_index[ SILENCE_CODE]; 136 cphon = (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[0]]; 137 #else 138 lphon = WBPHONEME_CODE; //(phonemeID)allotree->phoneme_index[ WBPHONEME_CODE]; 139 cphon = (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[0]]; 140 #endif 141 if(cphon == MAXphonemeID) 142 return 1; /* bad phoneme */ 143 for(i=0; i<num_phonemes; i++) { 144 #if !USE_WWTRIPHONE 145 rphon = (i==num_phonemes-1 ? 146 (phonemeID)allotree->phoneme_index[ SILENCE_CODE] : 147 (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[i+1] ] ) ; 148 #else 149 rphon = (i==num_phonemes-1 ? 150 WBPHONEME_CODE /*(phonemeID)allotree->phoneme_index[ WBPHONEME_CODE] */ : 151 (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[i+1] ] ) ; 152 #endif 153 if (rphon == MAXphonemeID) 154 return 1; /* bad phoneme */ 155 156 modelid = (modelID) get_modelid_for_pic(allotree, lphon, cphon, rphon); 157 acoustic_model_ids[i] = modelid; 158 #if DEBUG_PRONS 159 printf("%c%c%c hmm%d states", allotree->pdata[lphon].code, 160 allotree->pdata[cphon].code, allotree->pdata[rphon].code, 161 acoustic_model_ids[i]); 162 for (j = 0; j < allotree->hmm_infos[modelid].num_states; j++) 163 printf(" %d", allotree->hmm_infos[modelid].state_indices[j]); 164 printf("\n"); 165 #endif 166 lphon = cphon; 167 cphon = rphon; 168 } 169 return 0; 170 } 171 172 /*-----------------------------------------------------------------------* 173 * * 174 * phoneme data stream functions * 175 * * 176 *-----------------------------------------------------------------------*/ 177 178 tree_node* read_tree_node_f(char **buffer, PFile *fp) 179 { 180 tree_node* tnode = (tree_node*) * buffer; 181 pfread(&(tnode->node.quest_index), sizeof(asr_int16_t), 1, fp); 182 pfread(&(tnode->term.pelid), sizeof(asr_int16_t), 1, fp); 183 pfread(&(tnode->node.fail), sizeof(tree_branch_info*), 1, fp); 184 pfread(&(tnode->node.pass), sizeof(tree_branch_info*), 1, fp); 185 186 /* because tree_node is a union, the actual size maybe large than we have read */ 187 ASSERT(sizeof(asr_int16_t)*2 + sizeof(tree_branch_info *)*2 == sizeof(tree_node)); 188 189 *buffer += sizeof(tree_node); 190 if (tnode->node.quest_index >= 0) 191 { 192 tnode->node.fail = (struct tree_branch_info*)read_tree_node_f(buffer, fp); 193 tnode->node.pass = (struct tree_branch_info*)read_tree_node_f(buffer, fp); 194 } 195 return tnode; 196 } 197 198 void read_phoneme_data(phoneme_data** pdata, asr_int16_t num_phonemes, char **buffer, PFile *fp) 199 { 200 int i, ptr; 201 phoneme_data *pd; 202 203 pd = *pdata = (phoneme_data*)(*buffer); 204 205 for (i = 0; i < num_phonemes; i++) 206 { 207 pfread(&(pd->name), sizeof(char), MAX_PHONEME_NAME_LEN, fp); 208 pfread(&(pd->code), sizeof(asr_uint16_t), 1, fp); 209 pfread(&ptr, sizeof(asr_int16_t), 1, fp); 210 pfread(&(pd->model_nodes), sizeof(tree_node *), 1, fp); 211 pfread(&(pd->num_states), sizeof(asr_uint16_t), 1, fp); 212 pfread(&ptr, sizeof(asr_int16_t), 1, fp); 213 pfread(&(pd->state_nodes), sizeof(tree_node *), MAX_PHONE_STATES, fp); 214 pd++; 215 } 216 ASSERT(sizeof(phoneme_data) == MAX_PHONEME_NAME_LEN + sizeof(asr_int16_t)*4 + sizeof(tree_node *)*(1 + MAX_PHONE_STATES)); 217 (*buffer) += num_phonemes * sizeof(phoneme_data) / BYTES_PER_ATOM; 218 ASSERT((char *)pd == *buffer); 219 220 for (i = 0; i < num_phonemes; i++) 221 { 222 #if STATE_NODES_NEEDED_AT_RUNTIME 223 for (j = 0; j < (*pdata)[i].num_states; j++) 224 (*pdata)[i].state_nodes[j] = read_tree_node_f(buffer); 225 #endif 226 (*pdata)[i].model_nodes = read_tree_node_f(buffer, fp); 227 } 228 } 229 230 /*-----------------------------------------------------------------------* 231 * * 232 * hmm info stream functions * 233 * * 234 *-----------------------------------------------------------------------*/ 235 236 void read_hmminfos(srec_arbdata* allotree, char** buffer, PFile *fp) 237 { 238 int i, offset, num_atoms, num_hmms = allotree->num_hmms, ptr; 239 HMMInfo* hmm_infos; 240 hmm_infos = (HMMInfo*) * buffer; 241 num_atoms = sizeof(HMMInfo) * num_hmms / BYTES_PER_ATOM; 242 (*buffer) += num_atoms; 243 for (i = 0; i < num_hmms; i++) 244 { 245 pfread(&hmm_infos[i].name[0], sizeof(char), MAX_PHONEME_NAME_LEN, fp); 246 pfread(&(hmm_infos[i].num_states), sizeof(asr_int16_t), 1, fp); 247 pfread(&ptr, sizeof(asr_int16_t), 1, fp); 248 pfread(&(hmm_infos[i].state_indices), sizeof(asr_int16_t*), 1, fp); 249 } 250 251 /* through this and comments below, I was trying to keep the state_indices 252 self-contained, to calculate offsets from saved pointers, but it doesn't 253 appear to work; so we resort to recovering state offsets from num_states 254 state_indices = hmm_infos[0].state_indices; */ 255 pfread(*buffer, sizeof(asr_int16_t), allotree->num_states, fp); 256 257 hmm_infos[0].state_indices = (asr_int16_t*) * buffer; 258 num_atoms = sizeof(hmm_infos[0].state_indices[0]) * allotree->num_states / BYTES_PER_ATOM; 259 (*buffer) += num_atoms; 260 261 for (i = 0, offset = 0; i < num_hmms; i++) 262 { 263 /* int j,offset2 = hmm_infos[i].state_indices - state_indices; */ 264 hmm_infos[i].state_indices = hmm_infos[0].state_indices + offset; 265 if (i >= HMM_COUNTER_OFFSET + NUM_SILENCE_HMMS - 1) 266 offset += hmm_infos[i].num_states; 267 /* printf("offset %d %d offset2 %d\n", i, offset, offset2); 268 printf("hmm %d %x states", i, hmm_infos[i].state_indices); 269 for(j=0; j<hmm_infos[i].num_states; j++) 270 printf(" %d", hmm_infos[i].state_indices[j]); 271 printf("\n"); */ 272 273 } 274 allotree->hmm_infos = hmm_infos; 275 } 276 277 /*-----------------------------------------------------------------------* 278 * * 279 * top level stream functions * 280 * * 281 *-----------------------------------------------------------------------*/ 282 283 int read_arbdata_from_stream(srec_arbdata** pallotree, char* filename, int buffer_size) 284 { 285 char* pbuf; 286 srec_arbdata* allotree; 287 int ptr; 288 289 PFile* fp; 290 long fpos; 291 char* buffer; 292 293 fp = file_must_open(NULL, (char*)filename, L("rb"), ESR_TRUE); 294 if(!fp) { 295 *pallotree = NULL; 296 return 0; 297 } 298 pfseek(fp, 0, SEEK_END); 299 fpos = pftell(fp); 300 buffer = (char*)CALLOC_CLR(fpos, sizeof(char), "srec.arbdata"); 301 pfseek(fp, 0, SEEK_SET); 302 303 buffer_size = fpos; 304 pbuf = buffer; 305 306 allotree = (srec_arbdata*)buffer; 307 /* ASSERT(allotree->image_size == buffer_size); hack for now */ 308 309 /* read structure arbdata from file */ 310 pfread(&allotree->image, sizeof(char *), 1, fp); /* image */ 311 pfread(&allotree->image_size, sizeof(asr_uint16_t), 1, fp); /* image_szie */ 312 pfread(&allotree->num_phonemes, sizeof(asr_int16_t), 1, fp); /* num_phonemes */ 313 pfread(&allotree->pdata, sizeof(phoneme_data *), 1, fp); /* pdate */ 314 pfread(&allotree->num_questions, sizeof(asr_int16_t), 1, fp); /* num_questions */ 315 316 pfread(&ptr, sizeof(asr_int16_t), 1, fp); /* alignment problem */ 317 318 pfread(&allotree->questions, sizeof(srec_question *), 1, fp);/* questions */ 319 pfread(&allotree->num_states, sizeof(asr_int16_t), 1, fp); /* num_states */ 320 pfread(&allotree->num_hmms, sizeof(asr_int16_t), 1, fp); /* num_hmms */ 321 pfread(&allotree->hmm_infos, sizeof(HMMInfo *), 1, fp); /* hmm_infos */ 322 pfread(allotree->phoneme_index, sizeof(asr_uint16_t), NUM_PHONEME_INDICES, fp); /* phoneme_index */ 323 324 allotree->image = buffer; 325 326 pbuf += sizeof(*allotree) / BYTES_PER_ATOM; 327 pbuf -= sizeof(void*); // PCPinfo 328 329 ASSERT(pftell(fp) == pbuf - buffer); 330 331 #ifdef SREC_ENGINE_VERBOSE_LOGGING 332 PLogMessage("read allotree done %d\n", (int)(pbuf - buffer)); 333 #endif 334 335 allotree->questions = (srec_question *)pbuf; 336 read_questions(&allotree->questions, allotree->num_questions, &pbuf, fp); 337 #ifdef SREC_ENGINE_VERBOSE_LOGGING 338 PLogMessage("read_questions done %d\n", (int)(pbuf - buffer)); 339 #endif 340 ASSERT(pftell(fp) == pbuf - buffer); 341 342 /* readme phoneme_data */ 343 read_phoneme_data(&allotree->pdata, allotree->num_phonemes, &pbuf, fp); 344 #ifdef SREC_ENGINE_VERBOSE_LOGGING 345 PLogMessage("read_phoneme_data done %d\n", (int)(pbuf - buffer)); 346 #endif 347 ASSERT(pftell(fp) == pbuf - buffer); 348 349 read_hmminfos(allotree, &pbuf, fp); 350 #ifdef SREC_ENGINE_VERBOSE_LOGGING 351 PLogMessage("read_hmminfos done %d\n", (int)(pbuf - buffer)); 352 #endif 353 ASSERT(pftell(fp) == pbuf - buffer); 354 355 *pallotree = allotree; 356 #ifdef SREC_ENGINE_VERBOSE_LOGGING 357 PLogMessage("read arbdata image size %d\n", allotree->image_size); 358 #endif 359 ASSERT(pbuf - buffer == buffer_size); 360 361 pfclose(fp); 362 363 return 0; 364 } 365 366 /** 367 * @todo document 368 */ 369 typedef struct 370 { 371 unsigned short nnodes; 372 unsigned long size; 373 long phoneme; 374 unsigned short node_pos; 375 unsigned long node_off; 376 short low_genone_no; 377 short high_genone_no; 378 short low_pel_no; 379 short high_pel_no; 380 } 381 tree_head; 382 383 static int traverse_tree(tree_node* node, tree_head *tree_topo, int *num_terminal_nodes) 384 { 385 if (!node) return 0; /* should not happen */ 386 if (!tree_topo) return 0; /* should not happen */ 387 if (tree_topo->nnodes > 255) 388 return 0; /* should not happen, might indicate infinite looping */ 389 390 tree_topo->nnodes++; 391 392 if (node->node.quest_index < 0) 393 { 394 if (num_terminal_nodes) 395 { 396 (*num_terminal_nodes)++; 397 } 398 if (node->term.pelid < tree_topo->low_pel_no) 399 { 400 tree_topo->low_pel_no = node->term.pelid; 401 tree_topo->low_genone_no = node->term.pelid; 402 } 403 if (node->term.pelid > tree_topo->high_pel_no) 404 { 405 tree_topo->high_pel_no = node->term.pelid; 406 tree_topo->high_genone_no = node->term.pelid; 407 } 408 } 409 else 410 { 411 traverse_tree((tree_node*)node->node.fail, tree_topo, num_terminal_nodes); 412 traverse_tree((tree_node*)node->node.pass, tree_topo, num_terminal_nodes); 413 } 414 return 0; 415 416 } 417 418 #if 0 419 static int num_nodes_in_tree(tree_node* node, int *num_terminal_nodes) 420 { 421 tree_head topo; 422 *num_terminal_nodes = 0; 423 topo.nnodes = 0; 424 traverse_tree(node, &topo, num_terminal_nodes); 425 return topo.nnodes; 426 } 427 #endif 428 429 static unsigned int version_arbdata_add(unsigned int ics, int data) 430 { 431 unsigned int ocs = ((ics << 3) | (ics >> 29)) + data; 432 /* if(debug)printf("ocs %d ics %d data %d\n", ocs, ics, data);*/ 433 return ocs; 434 } 435 436 437 unsigned int version_arbdata_models(srec_arbdata* a) 438 { 439 int i, num_hmms_in_phoneme; 440 441 tree_head topo; 442 unsigned int checksum = 0; 443 /* if(debug)printf("num_hmms %d\n", a->num_hmms); */ 444 /* if(debug)printf("num_phonemes %d\n", a->num_phonemes); */ 445 for (i = 0; i < a->num_phonemes; i++) 446 { 447 num_hmms_in_phoneme = 0; 448 topo.low_pel_no = 32567; 449 topo.high_pel_no = 0; 450 topo.nnodes = 0; 451 traverse_tree(a->pdata[i].model_nodes, &topo, &num_hmms_in_phoneme); 452 /* if(debug)printf("phoneme %d num_hmms %d (%d-%d)\n", i, num_hmms_in_phoneme, 453 topo.low_pel_no, topo.high_pel_no); */ 454 if (topo.nnodes == 256) return 0; 455 checksum = version_arbdata_add(checksum, topo.low_pel_no); 456 } 457 return checksum; 458 } 459 460 461 462 463