1 /*---------------------------------------------------------------------------* 2 * test_g2g.c * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 21 22 #include "pstdio.h" 23 #include "pmemory.h" 24 #include "plog.h" 25 #include "HashMap.h" 26 #include "SR_Grammar.h" 27 #include "SR_Vocabulary.h" 28 #include "SR_SemanticResult.h" 29 #include "ESR_Session.h" 30 #include "ESR_Locale.h" 31 #include "ESR_CommandLine.h" 32 #include "LCHAR.h" 33 34 #include "PFileSystem.h" 35 #include "PANSIFileSystem.h" 36 37 #include "SR_GrammarImpl.h" 38 39 #include "simapi.h" 40 #include "srec_context.h" 41 #include "srec_arb.h" 42 43 /** 44 * @todo document 45 */ 46 typedef struct 47 { 48 unsigned short nnodes; 49 unsigned long size; 50 long phoneme; 51 unsigned short node_pos; 52 unsigned long node_off; 53 short low_genone_no; 54 short high_genone_no; 55 short low_pel_no; 56 short high_pel_no; 57 } 58 tree_head; 59 60 61 int usage(LCHAR* exename) 62 { 63 pfprintf(PSTDOUT,"usage: %s -base <basefilename> \n",exename); 64 pfprintf(PSTDOUT,"<basefilename> can be a file.g2g or @g2gfilelist\n"); 65 pfprintf(PSTDOUT,"[-checkword id] .. also checks word id in the file\n"); 66 pfprintf(PSTDOUT,"[-swiarb esr/config/lang/models/generic.swiarb] ... enables word check\n"); 67 return 1; 68 } 69 70 /* protos */ 71 ESR_ReturnCode find_phonemes_for_ihmms( CA_Arbdata* ca_arbdata, modelID* ihmms, int num_hmms); 72 ESR_ReturnCode Parse(SR_Grammar* grammar, LCHAR* trans, PFile* fout); 73 int CheckG2G(CA_Arbdata* arbdata, int* p4pTable, const char* base, int wordid, char* outbase); 74 void load_filelist(char* filelist, char*** pfiles, int *pnum_files); 75 int *phonemecode_for_pel_table(CA_Arbdata* arbdata); 76 77 int debug = 0; 78 #define MAX_LINE_LENGTH 256 79 #define MAX_STR_LENGTH 80 80 #define MAX_SEM_RESULTS 3 81 #define MAX_KEYS 30 82 83 /* main */ 84 85 int main (int argc, char **argv) 86 { 87 ESR_ReturnCode rc; 88 LCHAR base[P_PATH_MAX] = L(""); 89 int i; 90 CA_Arbdata* ca_arbdata; 91 char* arbfile = NULL; 92 char** g2glist; 93 int g2glist_len; 94 char* outbase = NULL; 95 int *p4pTable; 96 int wordid = 0; 97 int log_level = 0; 98 99 /* 100 * Initialize portable library. 101 */ 102 CHKLOG(rc, PMemInit()); 103 /* CHKLOG(rc, PFileSystemCreate()); 104 CHKLOG(rc, PANSIFileSystemCreate()); 105 CHKLOG(rc, PANSIFileSystemAddPath(L("/dev/ansi"), L("/")));*/ 106 107 /* Set ANSI file-system as default file-system */ 108 /* CHKLOG(rc, PANSIFileSystemSetDefault(ESR_TRUE));*/ 109 /* Set virtual current working directory to native current working directory */ 110 /* len = P_PATH_MAX; 111 CHKLOG(rc, PANSIFileSystemGetcwd(cwd, &len)); 112 CHKLOG(rc, PFileSystemChdir(cwd));*/ 113 114 if( argc <= 1) 115 { 116 usage(argv[0]); 117 exit(EXIT_FAILURE); 118 } 119 120 for (i = 1; i < argc; ++i) 121 { 122 if(!LSTRCMP(argv[i], L("-base"))) 123 { 124 ++i; 125 LSTRCPY(base, argv[i]); 126 } 127 else if(!LSTRCMP(argv[i],L("-out"))) 128 { 129 outbase = argv[++i]; 130 } 131 else if(!LSTRCMP(argv[i],L("-swiarb"))) 132 { 133 arbfile = argv[++i]; 134 } 135 else if(!LSTRCMP(argv[i],L("-checkword"))) 136 { 137 wordid = atoi(argv[++i]); 138 } 139 else if(!LSTRCMP(argv[i],L("-log"))) 140 { 141 log_level = 10; 142 } 143 else 144 { 145 printf("unrecog'd argument %s\n", argv[i]); 146 exit(1); 147 } 148 } 149 150 CHK(rc, PLogInit(NULL, log_level)); 151 152 if(arbfile) { 153 ca_arbdata = CA_LoadArbdata(arbfile); 154 if(!ca_arbdata) { 155 pfprintf(PSTDOUT, "Error: loading arbfile %s\n", arbfile); 156 goto CLEANUP; 157 } 158 pfprintf(PSTDOUT, "arbdata done\n"); 159 p4pTable = phonemecode_for_pel_table(ca_arbdata); 160 pfprintf(PSTDOUT, "p4pTable done\n"); 161 } else { 162 ca_arbdata = 0; 163 p4pTable = 0; 164 } 165 166 if(base[0] == '@') { 167 load_filelist(base+1, &g2glist, &g2glist_len); 168 pfprintf(PSTDOUT, "g2glist %s .. %d entries\n", g2glist_len); 169 for(i=0; i<g2glist_len; i++) 170 CheckG2G( ca_arbdata, p4pTable, g2glist[i], wordid, outbase); 171 } 172 else { 173 CheckG2G( ca_arbdata, p4pTable, base, wordid, outbase); 174 } 175 176 CLEANUP: 177 PLogShutdown(); 178 /* PANSIFileSystemDestroy(); 179 PFileSystemDestroy();*/ 180 PMemSetLogFile(PSTDOUT); 181 PMemDumpLogFile(); 182 PMemShutdown(); 183 return rc; 184 } 185 186 int CheckG2G(CA_Arbdata* ca_arbdata, int* p4pTable, const char* base, int wordid, char* outbase) 187 { 188 ESR_ReturnCode rc; 189 SR_GrammarImpl *grammarImpl; 190 SR_Grammar* grammar = NULL; 191 srec_context* fst; 192 CA_Syntax* syntax; 193 modelID ilabels_preceding[64], num_ilabels_preceding; 194 modelID ilabels_following[64], num_ilabels_following; 195 modelID ilabels[128], num_ilabels; 196 int i,j; 197 unsigned long g2gsize; 198 199 if(1) { 200 FILE* fp; 201 fp = fopen(base, "rb"); 202 if(!fp) g2gsize = 0; 203 else { 204 fseek(fp, 0, SEEK_END); 205 g2gsize = ftell(fp); 206 fclose(fp); 207 } 208 } 209 210 rc = SR_GrammarLoad(base, &grammar); 211 if(rc != ESR_SUCCESS) { 212 pfprintf(PSTDOUT, "%s failed at load\n", base); 213 goto CLEANUP; 214 } 215 216 grammarImpl = (SR_GrammarImpl*)grammar; 217 syntax = grammarImpl->syntax; 218 if(outbase) { 219 CA_DumpSyntax( syntax, outbase); 220 } 221 222 fst = syntax->synx; 223 pfprintf(PSTDOUT, "%s %d arcs %d/%d/%d nodes %d/%d/%d words %d/%d chars %d/%d modelver %d\n", 224 base, g2gsize, 225 fst->num_arcs, fst->num_base_arcs, fst->FSMarc_list_len, 226 fst->num_nodes, fst->num_base_nodes, fst->FSMnode_list_len, 227 fst->olabels->num_words, fst->olabels->max_words, 228 fst->olabels->next_chars-fst->olabels->chars, 229 fst->olabels->max_chars, 230 #ifdef IMAGE_FORMAT_V2 231 fst->modelid 232 #else 233 -1 234 #endif 235 ); 236 237 if(wordid == 0 || ca_arbdata == 0) 238 goto CLEANUP; 239 240 if(wordid >= fst->olabels->num_words) { 241 pfprintf(PSTDOUT, "%s failed 'cuz numwords(%d) < %d\n", base, 242 fst->olabels->num_words, wordid); 243 goto CLEANUP; 244 } 245 246 for(i=0; i<fst->num_arcs; i++) { 247 if(fst->FSMarc_list[i].olabel == wordid) { 248 FSMnode* node; 249 FSMarc* arc = &fst->FSMarc_list[i]; 250 nodeID fr_node = arc->fr_node; 251 arcID iarc; 252 ilabels_following[0] = arc->ilabel; 253 num_ilabels_following = 1; 254 num_ilabels_preceding = 0; 255 for( ; fr_node!=fst->start_node; fr_node=arc->fr_node) { 256 node = &fst->FSMnode_list[fr_node]; 257 iarc = node->first_prev_arc; 258 for( ; iarc!=MAXarcID; iarc=arc->linkl_prev_arc) { 259 arc = &fst->FSMarc_list[iarc]; 260 if(arc->fr_node != fr_node) break; 261 } 262 if(iarc == MAXarcID) { 263 pfprintf(PSTDOUT, "%s failed at 11\n", base); 264 goto CLEANUP; 265 } 266 if(arc->ilabel == WORD_BOUNDARY) break; 267 ilabels_preceding[num_ilabels_preceding++] = arc->ilabel; 268 } 269 arc = &fst->FSMarc_list[i]; 270 fr_node = arc->to_node; 271 for( ; fr_node!=fst->end_node; fr_node=arc->to_node) { 272 node = &fst->FSMnode_list[fr_node]; 273 iarc = node->un_ptr.first_next_arc; 274 for( ; iarc!=MAXarcID; iarc=arc->linkl_next_arc) { 275 arc = &fst->FSMarc_list[iarc]; 276 if(arc->to_node != fr_node) break; 277 } 278 if(iarc == MAXarcID) { 279 pfprintf(PSTDOUT, "%s failed at 12\n", base); 280 goto CLEANUP; 281 } 282 ilabels_following[num_ilabels_following++] = arc->ilabel; 283 if(arc->ilabel == WORD_BOUNDARY) break; 284 } 285 num_ilabels = 0; 286 for(j=0; j<num_ilabels_preceding; j++) 287 ilabels[num_ilabels++] = ilabels_preceding[num_ilabels_preceding-1-j]; 288 for(j=0; j<num_ilabels_following; j++) 289 ilabels[num_ilabels++] = ilabels_following[j]; 290 if(ilabels[num_ilabels-1] == WORD_BOUNDARY) 291 num_ilabels--; 292 for(j=0; j<num_ilabels; j++) { 293 if(ilabels[j]<fst->hmm_ilabel_offset) { 294 pfprintf(PSTDOUT, "%s failed at 15\n", base); 295 goto CLEANUP; 296 } else 297 ilabels[j] = ilabels[j] - (labelID)fst->hmm_ilabel_offset; 298 } 299 pfprintf(PSTDOUT, "%s (W%d) ihmms ", fst->olabels->words[wordid], wordid); 300 for(j=0;j<num_ilabels;j++) 301 pfprintf(PSTDOUT, " %d", ilabels[j]); 302 pfprintf(PSTDOUT, "\n"); 303 if(num_ilabels < 2) { 304 pfprintf(PSTDOUT, "%s failed at 1\n", base); 305 goto CLEANUP; 306 } 307 if(p4pTable) 308 rc = find_phonemes_for_ihmms( ca_arbdata, ilabels, num_ilabels); 309 else { 310 rc = ESR_SUCCESS; 311 for(j=0; j<num_ilabels; j++) { 312 if(p4pTable[ ilabels[j]]<0) { 313 rc = ESR_NO_MATCH_ERROR; 314 ilabels[j] = MAXmodelID; 315 } else { 316 ilabels[j] = (modelID)p4pTable[ ilabels[j]]; 317 } 318 } 319 } 320 321 if(rc) { 322 pfprintf(PSTDOUT, "%s failed at 2\n", base); 323 goto CLEANUP; 324 } 325 pfprintf(PSTDOUT, "%s ", fst->olabels->words[wordid]); 326 for(j=0;j<num_ilabels;j++) pfprintf(PSTDOUT, "%c", ilabels[j]); 327 pfprintf(PSTDOUT, "\n"); 328 rc = Parse( grammar, fst->olabels->words[wordid], PSTDOUT); 329 if(rc) { 330 pfprintf(PSTDOUT, "%s failed at 3\n", base); 331 goto CLEANUP; 332 } 333 pfprintf(PSTDOUT, "%s PASSED (on %s)\n", base, fst->olabels->words[wordid]); 334 break; 335 } 336 } 337 338 return 0; 339 CLEANUP: 340 if(grammar) SR_GrammarDestroy(grammar); 341 return 1; 342 343 } 344 345 346 int traverse_tree(tree_node* node, tree_head *tree_topo, int *num_terminal_nodes) 347 { 348 if(node) 349 tree_topo->nnodes++; 350 351 if(node->node.quest_index < 0) { 352 if(num_terminal_nodes) 353 (*num_terminal_nodes)++; 354 if( node->term.pelid < tree_topo->low_pel_no) 355 tree_topo->low_pel_no = tree_topo->low_genone_no = node->term.pelid; 356 if( node->term.pelid > tree_topo->high_pel_no) 357 tree_topo->high_pel_no = tree_topo->high_genone_no = node->term.pelid; 358 } else { 359 traverse_tree( (tree_node*)node->node.fail, tree_topo, num_terminal_nodes); 360 traverse_tree( (tree_node*)node->node.pass, tree_topo, num_terminal_nodes); 361 } 362 return 0; 363 364 } 365 366 int num_nodes_in_tree(tree_node* node, int *num_terminal_nodes) 367 { 368 tree_head topo; 369 *num_terminal_nodes = 0; 370 topo.nnodes = 0; 371 traverse_tree(node, &topo, num_terminal_nodes); 372 return topo.nnodes; 373 } 374 375 ESR_ReturnCode find_phonemes_for_ihmms( CA_Arbdata* ca_arbdata, modelID* ihmms, int num_ihmms) 376 { 377 int ii, i; 378 int num_hmms_in_phoneme; 379 tree_head topo; 380 srec_arbdata* a = (srec_arbdata*)ca_arbdata; 381 int num_phonemes_for_ihmms = 0; 382 383 for(ii=0; ii<num_ihmms; ii++) { 384 for(i=0; i<a->num_phonemes; i++) { 385 num_hmms_in_phoneme = 0; 386 topo.low_pel_no = 32567; 387 topo.high_pel_no = 0; 388 traverse_tree(a->pdata[i].model_nodes, &topo, &num_hmms_in_phoneme); 389 if(debug)printf("phoneme %d num_hmms %d (%d-%d)\n", i, num_hmms_in_phoneme, 390 topo.low_pel_no, topo.high_pel_no); 391 if(ihmms[ii] >= topo.low_pel_no && ihmms[ii]<= topo.high_pel_no) { 392 ihmms[ii] = (modelID)i; 393 num_phonemes_for_ihmms++; 394 break; 395 } 396 } 397 if( i==a->num_phonemes) { 398 if(ihmms[ii]<=5) { 399 ihmms[ii] = 0; 400 num_phonemes_for_ihmms++; 401 } else { 402 PLogError("error: could not find hmm%d under any phoneme! ",ihmms[ii]); 403 } 404 } 405 406 } 407 if(num_phonemes_for_ihmms != num_ihmms) 408 return ESR_INVALID_ARGUMENT; 409 else { 410 for(ii=0; ii<num_ihmms; ii++) ihmms[ii] = a->pdata[ ihmms[ii]].code; 411 return ESR_SUCCESS; 412 } 413 } 414 415 void display_results(SR_SemanticResult *result, PFile* fout) 416 { 417 size_t i, size, len; 418 LCHAR* keys[MAX_KEYS]; /* array of pointers to strings */ 419 LCHAR value[MAX_STR_LENGTH]; 420 ESR_ReturnCode rc; 421 422 size = MAX_KEYS; 423 rc = result->getKeyList(result, (LCHAR**) &keys, &size); /* get the key list */ 424 if(rc == ESR_SUCCESS) 425 { 426 for(i=0; i<size; i++) 427 { 428 len = MAX_STR_LENGTH; 429 if ((rc = result->getValue(result,keys[i],value,&len)) == ESR_SUCCESS) 430 pfprintf(fout,"{%s : %s}\n",keys[i],value); 431 else 432 pfprintf(fout,"Error: %s\n",ESR_rc2str(rc)); 433 } 434 } 435 else 436 pfprintf(fout,"Error: %s\n",ESR_rc2str(rc)); 437 } 438 439 ESR_ReturnCode Parse(SR_Grammar* grammar, LCHAR* trans, PFile* fout) 440 { 441 ESR_ReturnCode rc; 442 int i, result_count; 443 SR_SemanticResult* semanticResults[MAX_SEM_RESULTS]; 444 445 result_count = MAX_SEM_RESULTS; /* initially not greater than MAX */ 446 for(i =0; i<result_count; i++) 447 SR_SemanticResultCreate(&semanticResults[i]); /* create the result holders */ 448 449 lstrtrim(trans); 450 451 rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) &result_count); 452 if(rc != ESR_SUCCESS) 453 return rc; 454 455 if(result_count < 1) 456 { 457 pfprintf(fout,"no parse\n\n"); 458 return ESR_NO_MATCH_ERROR; 459 } 460 else 461 { 462 pfprintf(fout,"parse ok (%d results)\n", result_count); 463 for(i=0; i < result_count; i++) 464 display_results(semanticResults[i],fout); 465 466 for(i=0; i < MAX_SEM_RESULTS; i++) 467 { 468 rc = semanticResults[i]->destroy(semanticResults[i]); 469 if(rc != ESR_SUCCESS) 470 return rc; 471 } 472 return ESR_SUCCESS; 473 } 474 } 475 476 void load_filelist(char* filelist, char*** pfiles, int *pnum_files) 477 { 478 int i = 0; 479 FILE* fp; 480 char line[512]; 481 char **files = 0, *file; 482 int num_files = 0; 483 484 fp = fopen(filelist, "r"); 485 if(!fp) { 486 pfprintf(PSTDOUT, "failed to open %s\n", filelist); 487 goto DONE; 488 } 489 490 while( fgets(line, sizeof(line), fp)) { 491 if(line[0] == '#') continue; 492 i++; 493 } 494 fclose(fp); 495 496 num_files = i; 497 *files = CALLOC( num_files, sizeof(char*), __FILE__); 498 fp = fopen(filelist, "r"); 499 for(i=0; fgets(line,sizeof(line),fp) && i<num_files; i++) { 500 if(line[0] == '#') continue; 501 strtok(line,"\n\r\t"); 502 file = files[i++] = CALLOC(strlen(line)+1,sizeof(char),__FILE__); 503 strcpy( file, line); 504 } 505 fclose(fp); 506 num_files = i; 507 508 DONE: 509 *pfiles = files; 510 *pnum_files = num_files; 511 } 512 513 int* phonemecode_for_pel_table(CA_Arbdata* ca_arbdata) 514 { 515 static int table[2048]; 516 int i,j; 517 tree_head topo; 518 srec_arbdata* a = (srec_arbdata*)ca_arbdata; 519 int num_hmms_in_phoneme; 520 521 for(j=0; j< (int)(sizeof(table)/sizeof(int)); j++) 522 table[j] = 0; 523 524 for(i=0; i<a->num_phonemes; i++) { 525 num_hmms_in_phoneme = 0; 526 topo.low_pel_no = 32567; 527 topo.high_pel_no = 0; 528 traverse_tree(a->pdata[i].model_nodes, &topo, &num_hmms_in_phoneme); 529 if(debug)printf("phoneme %d num_hmms %d (%d-%d)\n", i, num_hmms_in_phoneme, 530 topo.low_pel_no, topo.high_pel_no); 531 532 for(j=topo.low_pel_no; j<=topo.high_pel_no; j++) 533 table[j] = a->pdata[i].code; 534 } 535 return &table[0]; 536 } 537