1 /*---------------------------------------------------------------------------* 2 * grxmlcompile.cpp * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 #include "ptypes.h" 21 22 #include <ext/hash_map> 23 24 #include "fst/lib/fst.h" 25 #include "fst/lib/fstlib.h" 26 #include "fst/lib/arc.h" 27 #include "fst/lib/fst-decl.h" 28 #include "fst/lib/vector-fst.h" 29 #include "fst/lib/arcsort.h" 30 #include "fst/lib/invert.h" 31 32 #include "fst-io.h" 33 34 #include "ESR_Locale.h" 35 #include "LCHAR.h" 36 #include "pstdio.h" 37 #include "PFileSystem.h" 38 #include "PANSIFileSystem.h" 39 #include "plog.h" 40 #include "pmemory.h" 41 #include "ESR_Session.h" 42 #include "SR_Session.h" 43 #include "SR_Vocabulary.h" 44 #include "srec_arb.h" // for EPSILON_LABEL etc 45 #include <fstream> 46 #include <iostream> 47 #include "tinyxml.h" 48 #include "grxmldoc.h" 49 50 #ifdef MEMTRACE 51 #include <mcheck.h> 52 #endif 53 54 #define OPENFST_ACKNOWLEDGEMENT \ 55 "This tool uses the OpenFst library. \n" \ 56 "Licensed under the Apache License, Version 2.0 (the \"License\");\n" \ 57 " you may not use this file except in compliance with the License.\n" \ 58 " You may obtain a copy of the License at" \ 59 "\n" \ 60 " http://www.apache.org/licenses/LICENSE-2.0\n" \ 61 "\n" \ 62 " Unless required by applicable law or agreed to in writing, software\n" \ 63 " distributed under the License is distributed on an \"AS IS\" BASIS,\n" \ 64 " WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" \ 65 " See the License for the specific language governing permissions and\n" \ 66 " limitations under the License.\n" \ 67 "\n" \ 68 " This library was developed at Google Research (M. Riley, J. Schalkwyk, W. Skut) and NYU's Courant Institute (C. Allauzen, M. Mohri). It is intended to be comprehensive, flexible, efficient and scale well to large problems. It is an open source project distributed under the Apache license. \n" 69 70 71 #define TINYXML_ACKNOWLEDGEMENT \ 72 "This tool uses the tinyxml library. \n" \ 73 "Copyright (c) 2007 Project Admins: leethomason \n" \ 74 "The TinyXML software is provided 'as-is', without any express or implied\n" \ 75 "warranty. In no event will the authors be held liable for any damages\n" \ 76 "arising from the use of this software.\n" \ 77 "\n" \ 78 "Permission is granted to anyone to use this software for any purpose,\n" \ 79 "including commercial applications, and to alter it and redistribute it\n" \ 80 "freely, subject to the following restrictions:\n" 81 82 #define NUANCE_COPYRIGHT \ 83 "// grxmlcompile\n" \ 84 "//\n" \ 85 "// Licensed under the Apache License, Version 2.0 (the \"License\");\n" \ 86 "// you may not use this file except in compliance with the License.\n" \ 87 "// You may obtain a copy of the License at\n" \ 88 "//\n" \ 89 "// http://www.apache.org/licenses/LICENSE-2.0\n" \ 90 "//\n" \ 91 "// Unless required by applicable law or agreed to in writing, software\n" \ 92 "// distributed under the License is distributed on an \"AS IS\" BASIS,\n" \ 93 "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" \ 94 "// See the License for the specific language governing permissions and\n" \ 95 "// limitations under the License.\n" \ 96 "//\n" \ 97 "// This program compiles a .grxml grammar into the graphs needed for \n" \ 98 "// decoding with SREC\n" \ 99 "// \n" 100 101 #define MAX_LINE_LENGTH 256 102 #define MAX_PATH_NAME 512 103 #define MAX_PRONS_LENGTH 1024 104 #define SILENCE_PREFIX_WORD "-pau-" 105 #define SILENCE_SUFFIX_WORD "-pau2-" 106 #define SLOT_SUFFIX "__" 107 #define SLOT_PREFIX "__" 108 #define MAX_NUM_SLOTS 12 /* must agree with srec_context.h */ 109 #define EXTRA_EPSILON_LABEL 39999 // must be higher than the number of models 110 #define DEFAULT_WB_COST 40 111 #define DEFAULT_WB_COST_STR "40" 112 #define SLOT_COUNTER_OFFSET 30000 // must be higher than the number of models 113 #define NOISE_PHONEME_CODE 'J' 114 115 static int debug = 0; 116 static int verbose = 0; 117 118 using namespace std; 119 120 ESR_ReturnCode make_openfst_graphs(GRXMLDoc* pDoc, /* for metas */ 121 const std::string& grxmlBasename, 122 const char* vocabFilename, 123 const char* cfstFilename, 124 const char* modelmapFilename); 125 126 const char* showline(const char* fn, int line_num) 127 { 128 static char line[8096] = { 0 }; 129 int line_count = 0; 130 ifstream strm(fn); 131 while (strm && strm.getline(line, sizeof(line))) 132 if(line_count++ == line_num) break; 133 return &line[0]; 134 } 135 136 std::string ExtractFileName(const std::string& full) 137 { 138 std::string::size_type idx = full.find_last_of("/"); 139 140 if (idx != std::string::npos) 141 return full.substr(idx+1); 142 else 143 return full; 144 } 145 146 /*-----------------------------------------------------------------------* 147 * * 148 * * 149 *-----------------------------------------------------------------------*/ 150 151 int usage_error(const char* prgname) 152 { 153 printf("USAGE: -par <par file> -grxml <grxml grammar file> -vocab <dictionary file (.ok)> [-outdir <output directory>]\n"); 154 return (int)ESR_INVALID_ARGUMENT; 155 } 156 157 int main(int argc, char* argv[]) 158 { 159 ESR_ReturnCode status = ESR_SUCCESS; 160 char *parfile = NULL; 161 char *grxmlfile = NULL; 162 char *cmdline_vocfile = NULL; 163 std::string outdir("."); // default output dir is current directory 164 /* for now, assume char and LCHAR are the same, else fail to compile! */ 165 { char zzz[ 1 - (sizeof(LCHAR)!=sizeof(char))]; zzz[0] = 0; } 166 167 #ifdef MEMTRACE 168 mtrace(); 169 #endif 170 171 #if defined(GRXMLCOMPILE_PRINT_ACKNOWLEDGEMENT) 172 cout << OPENFST_ACKNOWLEDGEMENT <<std::endl; 173 cout << TINYXML_ACKNOWLEDGEMENT <<std::endl; 174 cout << NUANCE_COPYRIGHT <<std::endl; 175 #endif 176 177 // Process all XML files given on command line 178 179 if(argc<5){ 180 return usage_error(argv[0]); 181 } 182 183 for(int i=1;i<argc;i++) 184 { 185 if(!strcmp(argv[i],"-grxml")) 186 grxmlfile = argv[++i]; 187 else if(!strcmp(argv[i],"-debug")) 188 debug++; 189 else if(!strcmp(argv[i],"-verbose")) 190 verbose++; 191 else if(!strcmp(argv[i],"-par") || !strcmp(argv[i],"-parfile")) 192 parfile = argv[++i]; 193 else if(!strcmp(argv[i],"-vocab")) 194 cmdline_vocfile = argv[++i]; 195 else if(!strcmp(argv[i],"-outdir")) 196 outdir = std::string(argv[++i]); 197 else { 198 printf("error_usage: argument [%s]\n", argv[i]); 199 return usage_error(argv[0]); 200 return (int)ESR_INVALID_ARGUMENT; 201 } 202 } 203 204 //process_xml( std::string(grxmlfile), parfile ); 205 std::string filename = std::string(grxmlfile); 206 207 /*************************** 208 process xml 209 ***************************/ 210 211 cout << "processing [" << filename << "] ..." << endl; 212 213 TiXmlDocument node; 214 bool bLoadedOK = node.LoadFile( filename.c_str() ); 215 if(!bLoadedOK || node.Error()) { 216 std::cout << "Error: while creating TiXmlDocument from " << filename << std::endl; 217 std::cout << "Error: " << node.Error() << " id " << node.ErrorId() << " row " << node.ErrorRow() << " col " << node.ErrorCol() << std::endl; 218 std::cout << "Error: " << node.ErrorDesc() << std::endl; 219 std::cout << "Error: near " << showline( filename.c_str(), node.ErrorRow()) << std::endl; 220 return (int)ESR_INVALID_ARGUMENT; 221 } 222 223 224 // ************************************************* 225 // Parse the file into a DOM object and create word graph 226 // 227 GRXMLDoc *doc = new (GRXMLDoc); 228 std::string filenameNoPath = ExtractFileName(filename); 229 doc->parseGrammar( node, filenameNoPath ); // THE PARSING AND NETWORK BUILD HAPPENS IN HERE 230 /************************ 231 end of xml processing 232 ************************/ 233 234 // Create grammar network files. Use prefix of input file for output. 235 std::string s = filename; 236 std::string grxmlbase = outdir + "/" + ExtractFileName(grxmlfile); 237 unsigned int p1 = grxmlbase.find_last_of("."); 238 if ( p1 != string::npos ) 239 grxmlbase.assign( grxmlbase, 0, p1); 240 241 std::string newName; 242 newName = grxmlbase + ".map"; 243 doc->writeMapFile( newName ); 244 newName = grxmlbase + ".script"; 245 doc->writeScriptFile( newName ); 246 247 doc->writeGraphFiles( grxmlbase, false ); 248 249 // 250 // SR initialization 251 // 252 char vocfile[MAX_PATH_NAME]; 253 char cfstfile[MAX_PATH_NAME]; 254 char modelmapfile[MAX_PATH_NAME]; 255 size_t len; 256 257 PMemInit(); 258 printf("info: Using parfile %s\n",parfile); 259 status = SR_SessionCreate((const LCHAR*) parfile); 260 // status = SR_SessionCreate ( parfile ); 261 if ( status != ESR_SUCCESS ) { 262 LPRINTF("Error: SR_SessionCreate(%s) %s\n", parfile, ESR_rc2str(status)); 263 return (int)status; 264 } 265 266 // vocfile 267 if(cmdline_vocfile) { 268 strcpy( vocfile, cmdline_vocfile); 269 } else { 270 len = MAX_PATH_NAME; 271 ESR_SessionGetLCHAR ( L("cmdline.vocabulary"), (LCHAR*)vocfile, &len ); 272 // skip PrefixWithBaseDirectory(), 'tis done inside SR_VocabularyLoad() 273 } 274 printf("info: Using dictionary %s\n",vocfile); 275 276 // modelmapfile 277 len = MAX_PATH_NAME; 278 ESR_SessionGetLCHAR ( L("cmdline.arbfile"), (LCHAR*)modelmapfile, &len); 279 len = MAX_PATH_NAME; 280 status = ESR_SessionPrefixWithBaseDirectory ( (LCHAR*)modelmapfile, &len); 281 char* p = strrchr(modelmapfile,'/'); 282 if(!p) p = strrchr(modelmapfile,'\\'); 283 if(p) strcpy(p, "/models128x.map"); 284 285 // cfstfile 286 len = MAX_PATH_NAME; 287 ESR_SessionGetLCHAR ( L("cmdline.arbfile"), (LCHAR*)cfstfile, &len); 288 len = MAX_PATH_NAME; 289 status = ESR_SessionPrefixWithBaseDirectory ( (LCHAR*)cfstfile, &len); 290 p = strrchr(cfstfile,'/'); 291 if(!p) p = strrchr(cfstfile,'\\'); 292 if(p) strcpy(p, "/generic.C"); 293 294 status = make_openfst_graphs( doc, grxmlbase, (const char*)vocfile, (const char*)cfstfile, (const char*)modelmapfile); 295 if(status != ESR_SUCCESS) { 296 LPRINTF("Error: make_openfst_graphs() returned %s\n", ESR_rc2str(status)); 297 } else { 298 /* make_openfst_graphs() can sometimes call doc->setMeta() to put 299 Session parameters into the .params file, so writeParamsFile() 300 should be called after make_openfst_graphs() */ 301 newName = grxmlbase + ".params"; 302 doc->writeParamsFile( newName ); 303 } 304 305 // 306 // SR de-initialization 307 // 308 SR_SessionDestroy(); 309 PMemShutdown(); 310 311 delete doc; 312 return (int)status; 313 } 314 315 /*-----------------------------------------------------------------* 316 * utils * 317 *-----------------------------------------------------------------*/ 318 319 bool is_slot_symbol( const char* sym) 320 { 321 const char* p = strstr(sym,SLOT_PREFIX); 322 int len = strlen(sym); 323 if(len>4 && !strcmp(sym+len-2,SLOT_SUFFIX) && (p-sym)<len-2) { 324 return true; 325 } else 326 return false; 327 } 328 329 int64 StrToId(const char *s, fst::SymbolTable *syms, 330 const char *name) 331 { 332 int64 n; 333 if (syms) { 334 n = syms->Find(s); 335 if (n < 0) { 336 cerr << "FstReader: Symbol \"" << s 337 << "\" is not mapped to any integer " << name 338 << ", symbol table = " << syms->Name(); 339 } 340 } else { 341 char *p; 342 n = strtoll(s, &p, 10); 343 if (p < s + strlen(s) || n < 0) { 344 cerr << "FstReader: Bad " << name << " integer = \"" << s; 345 } 346 } 347 return n; 348 } 349 350 /* FstMergeOLabelsToILabels, FstSplitOLabelsFromILabels 351 are used to make sure the minimization does not go overboard in pushing 352 output labels toward the beginning of the graph. When that happens 353 then the speech recognition decoder fails! */ 354 355 ESR_ReturnCode FstMergeOLabelsToILabels( fst::StdVectorFst& fst_, int max_ilabels ) 356 { 357 fst::StdArc::StateId s = fst_.Start(); 358 if (s == fst::kNoStateId) 359 return ESR_INVALID_ARGUMENT; 360 for (fst::StateIterator< fst::StdVectorFst> siter(fst_); 361 !siter.Done(); siter.Next()) { 362 s = siter.Value(); 363 364 for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s); 365 !aiter.Done(); aiter.Next()) { 366 fst::StdArc arc = aiter.Value(); 367 if( arc.ilabel >= max_ilabels || 368 (float)arc.ilabel + ((float)max_ilabels)*arc.olabel > INT_MAX) { 369 std::cout << "Error: internal error in FstMergeOLabelsToILabels() " << std::endl; 370 return ESR_NOT_IMPLEMENTED; 371 } 372 arc.ilabel = arc.ilabel + max_ilabels * arc.olabel; 373 arc.olabel = 0; 374 aiter.SetValue( arc); 375 } 376 } 377 return ESR_SUCCESS; 378 } 379 380 ESR_ReturnCode FstMergeOLabelsToILabels_GetMax( fst::StdVectorFst& fst_, int& max_ilabel ) 381 { 382 if (fst_.Start() == fst::kNoStateId) return ESR_INVALID_ARGUMENT; 383 for (fst::StateIterator< fst::StdVectorFst> siter(fst_); 384 !siter.Done(); siter.Next()) { 385 for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, siter.Value()); 386 !aiter.Done(); aiter.Next()) { 387 if( aiter.Value().ilabel > max_ilabel) 388 max_ilabel = aiter.Value().ilabel; 389 } 390 } 391 max_ilabel++; 392 return ESR_SUCCESS; 393 } 394 395 ESR_ReturnCode FstSplitOLabelsFromILabels( fst::StdVectorFst& fst_, int max_ilabels ) 396 { 397 fst::StdArc::StateId s = fst_.Start(); 398 if (s == fst::kNoStateId) 399 return ESR_INVALID_ARGUMENT; 400 for (fst::StateIterator< fst::StdVectorFst> siter(fst_); 401 !siter.Done(); siter.Next()) { 402 s = siter.Value(); 403 404 for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s); 405 !aiter.Done(); aiter.Next()) { 406 fst::StdArc arc = aiter.Value(); 407 arc.olabel = arc.ilabel / max_ilabels; 408 arc.ilabel = arc.ilabel - arc.olabel*max_ilabels; 409 aiter.SetValue( arc); 410 } 411 } 412 return ESR_SUCCESS; 413 } 414 415 /* this is to replace the "fake" extra epsilon input labels, which were 416 put there to disambiguate homonyms */ 417 418 ESR_ReturnCode FstReplaceILabel( fst::StdVectorFst& fst_, int from_ilabel, int into_ilabel) 419 { 420 fst::StdArc::StateId s = fst_.Start(); 421 if (s == fst::kNoStateId) 422 return ESR_INVALID_ARGUMENT; 423 for (fst::StateIterator< fst::StdVectorFst> siter(fst_); 424 !siter.Done(); siter.Next()) { 425 s = siter.Value(); 426 427 for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s); 428 !aiter.Done(); aiter.Next()) { 429 fst::StdArc arc = aiter.Value(); 430 if(arc.ilabel == from_ilabel) { 431 arc.ilabel = into_ilabel; 432 aiter.SetValue( arc); 433 } 434 } 435 } 436 return ESR_SUCCESS; 437 } 438 439 /* this pushes the slot labels forward which gives an opportunity for 440 multiple instances of the slot to be merged, eg. lookup NAME 441 vs lookup contact NAME .. if in separate rules, then they will 442 merge thanks to using 3 arcs for the NAME */ 443 444 ESR_ReturnCode FstPushSlotLikeOLabels( fst::StdVectorFst& fst_, int myMin, int myMax) 445 { 446 int i; 447 ESR_ReturnCode rc = ESR_SUCCESS; 448 char done_for_state[2*65536]; // hope this is enough! 449 memset( &done_for_state[0], 0, sizeof(done_for_state)); 450 451 fst::StdArc::StateId s = fst_.Start(); 452 for (fst::StateIterator< fst::StdVectorFst> siter(fst_); 453 !siter.Done(); siter.Next()) { 454 s = siter.Value(); 455 456 if(done_for_state[ s]) continue; 457 done_for_state[ s]++; 458 459 for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s); 460 !aiter.Done(); aiter.Next()) { 461 fst::StdArc arc = aiter.Value(); 462 if(arc.olabel >= myMin && arc.olabel < myMax) { 463 fst::StdArc::StateId s2 = arc.nextstate; 464 int slotId = arc.olabel; 465 466 if(verbose) 467 std::cout << "info: FstPushSlotLikeOLabels() at state " << s << " arc ilabel " << arc.ilabel << " olabel " << arc.olabel << std::endl; 468 469 arc.ilabel = EPSILON_LABEL; 470 arc.olabel = EPSILON_LABEL; 471 arc.weight = 0; // zero weight 472 aiter.SetValue( arc); 473 done_for_state[ s2]++; 474 for(fst::MutableArcIterator<fst::StdVectorFst> aiter2(&fst_, s2); 475 !aiter2.Done(); aiter2.Next()) { 476 fst::StdArc arc2 = aiter2.Value(); 477 if(arc2.ilabel == WORD_BOUNDARY) { 478 std::cout << "Error: FstPushSlotLikeOLabels() failing, there could be confusion between the slot (hack-pron) and a real-pron, the slot olabel may have been pushed by earlier fst operations!" << std::endl; 479 rc = ESR_INVALID_STATE; 480 } else 481 arc2.ilabel = EPSILON_LABEL; 482 arc2.olabel = slotId; 483 aiter2.SetValue( arc2); 484 } 485 } 486 } 487 } 488 489 /* check */ 490 int *num_pclg_arcs_using_slot = new int[myMax]; 491 for(i=0;i<myMax;i++) num_pclg_arcs_using_slot[i] = 0; 492 for (fst::StateIterator< fst::StdVectorFst> siter(fst_); 493 !siter.Done(); siter.Next()) { 494 s = siter.Value(); 495 496 for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s); 497 !aiter.Done(); aiter.Next()) { 498 fst::StdArc arc = aiter.Value(); 499 if(arc.olabel >= myMin && arc.olabel < myMax) 500 num_pclg_arcs_using_slot[arc.olabel]++; 501 } 502 } 503 for(i=0; i<myMax; i++) { 504 if(num_pclg_arcs_using_slot[i] > 1) { 505 std::cout << "Error: SREC will not support multiply referred slots." << std::endl; 506 std::cout << "Error: Consider re-working your grammar to merge the references into one rule" << std::endl; 507 std::cout << "Error: or use two different slots" << std::endl; 508 rc = ESR_NOT_SUPPORTED; 509 } 510 } 511 delete [] num_pclg_arcs_using_slot; 512 513 return rc; 514 } 515 516 /* gets the range of slot numbers, myMin inclusive, myMax is exclusive */ 517 518 void get_slot_olabel_range( const fst::SymbolTable* syms, int* myMin, int* myMax) 519 { 520 // assumes slots are at the top of the symbol table 521 fst::SymbolTableIterator iter( *syms); 522 *myMin = *myMax = 0; 523 for(iter.Reset(); !iter.Done(); iter.Next() ) { 524 const char* sym = iter.Symbol(); 525 if ( is_slot_symbol( sym)) { 526 if(! (*myMin)) *myMin = iter.Value(); 527 *myMax = iter.Value()+1; 528 } 529 } 530 } 531 532 /* SLOT_COUNTER_OFFSET 533 The cfst is used to turn phonemes into acoustic models, but we're using 534 special phonemes for the slots, and must here add those as pass through 535 in the Cfst, meaning that the slot marker must be unchanged after 536 composition. To do that we find the places in the Cfst where silence is 537 used, and put the slot marker arcs in parallel. This also causes the 538 models before the slot to assume silence to the right, and the models after 539 the slot to assume silence to the left, both of which are reasonable */ 540 541 ESR_ReturnCode FstAddSlotMarkersToCFst( fst::StdVectorFst& cfst_, int myMin, int myMax) 542 { 543 int num_silence_arcs_in_cfst = 0; 544 int mimicPhonemeCode = SILENCE_CODE; 545 546 fst::StdArc::StateId s = cfst_.Start(); 547 if (s == fst::kNoStateId) 548 return ESR_INVALID_ARGUMENT; 549 for (fst::StateIterator< fst::StdVectorFst> siter(cfst_); 550 !siter.Done(); siter.Next()) { 551 s = siter.Value(); 552 553 for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&cfst_, s); 554 !aiter.Done(); aiter.Next()) { 555 fst::StdArc arc = aiter.Value(); 556 if( arc.olabel == mimicPhonemeCode) { 557 num_silence_arcs_in_cfst++; 558 for(int i=myMin; i<myMax; i++) 559 cfst_.AddArc( s, fst::StdArc(SLOT_COUNTER_OFFSET+i /*model*/, 560 SLOT_COUNTER_OFFSET+i /*phoneme*/, 0.0, arc.nextstate)); 561 } 562 } 563 } 564 fst::ArcSort(&cfst_, fst::StdOLabelCompare()); 565 if(!num_silence_arcs_in_cfst) 566 return ESR_INVALID_ARGUMENT; 567 else 568 return ESR_SUCCESS; 569 } 570 571 /* 572 * make the graphs used by the recognition engine during the search. 573 */ 574 575 ESR_ReturnCode make_openfst_graphs( GRXMLDoc* pDoc, 576 const std::string& grxmlBasename, 577 const char* vocabFilename, 578 const char* cfstFilename, 579 const char* modelmapFilename) 580 { 581 SR_Vocabulary *vocab = 0; 582 ESR_ReturnCode rc; 583 584 fst::StdVectorFst l_fst; // .L file, created from the .map and .ok 585 586 int stateSt, stateEn; 587 size_t len; 588 bool do_skip_interword_silence = false; 589 hash_map<string,int> homonym_count; 590 int word_penalty = 0; 591 592 rc = SR_VocabularyLoad(vocabFilename, &vocab); 593 if (rc != ESR_SUCCESS) { 594 cerr << "Error: " << ESR_rc2str(rc) << endl; 595 return ESR_INVALID_ARGUMENT; // goto CLEANUP; 596 } 597 598 std::string word_penalty_str; 599 if( pDoc->findMeta(std::string("word_penalty"),word_penalty_str)) 600 word_penalty = atoi((const char *)word_penalty_str.c_str()); 601 else { 602 rc = ESR_SessionGetInt( L("CREC.Recognizer.wordpen"), &word_penalty); 603 if(rc != ESR_SUCCESS) 604 word_penalty = DEFAULT_WB_COST; 605 word_penalty_str = DEFAULT_WB_COST_STR; 606 pDoc->setMeta( std::string("word_penalty"), word_penalty_str) ; 607 cout << "using word_penalty " << word_penalty << endl; 608 } 609 610 std::string do_skip_interword_silence_str; 611 if( pDoc->findMeta(std::string("do_skip_interword_silence"), do_skip_interword_silence_str)) 612 do_skip_interword_silence = ((do_skip_interword_silence_str != "true") ? false : true); 613 614 /*-----------------------------------------------------------------* 615 * read the .map and .omap created from grxmlcompiler classes * 616 *-----------------------------------------------------------------*/ 617 618 std::string omapFilename = grxmlBasename + std::string(".omap"); 619 std::string imapFilename = grxmlBasename + std::string(".map"); 620 621 cout << "info: reading word symbols " << imapFilename << endl; 622 fst::SymbolTable *word_syms = fst::SymbolTable::ReadText(imapFilename); 623 if(!word_syms) { 624 cerr << "error: reading word_syms" << endl; 625 return ESR_INVALID_ARGUMENT; 626 } 627 cout << "info: reading parser symbols " << omapFilename << endl; 628 fst::SymbolTable *prsr_syms = fst::SymbolTable::ReadText(omapFilename); 629 if(!prsr_syms) { 630 cerr << "error: reading prsr_syms" << endl; 631 return ESR_INVALID_ARGUMENT; 632 } 633 cout << "info: reading model symbols " << modelmapFilename << endl; 634 fst::SymbolTable *model_syms = fst::SymbolTable::ReadText(modelmapFilename); 635 if(!prsr_syms) { 636 cerr << "error: reading prsr_syms" << endl; 637 return ESR_INVALID_ARGUMENT; 638 } 639 int max_model_sym = 0; 640 /* if(1) { 641 fst::SymbolTableIterator iter( *model_syms); 642 for(iter.Reset(); !iter.Done(); iter.Next() ) max_model_sym++; */ 643 644 /*-----------------------------------------------------------------* 645 * create the .L pronunciations transducer * 646 *-----------------------------------------------------------------*/ 647 648 // Adds state 0 to the initially empty FST and make it the start state. 649 stateSt = l_fst.AddState(); 650 stateEn = l_fst.AddState(); 651 l_fst.SetStart(stateSt); // arg is state ID 652 l_fst.SetFinal(stateEn, 0.0); // 1st arg is state ID, 2nd arg weight 653 l_fst.AddArc(stateEn, fst::StdArc(EPSILON_LABEL,EPSILON_LABEL,0.0,stateSt)); 654 655 int num_slots = 0; 656 fst::SymbolTableIterator iter( *word_syms); 657 for(iter.Reset(); !iter.Done(); iter.Next() ) { 658 ESR_ReturnCode rc; 659 LCHAR prons[MAX_PRONS_LENGTH]; 660 const char* phrase = iter.Symbol(); 661 int wordId = iter.Value(); 662 bool wordId_is_silence = false; 663 bool wordId_is_slot = false; 664 /* script or scope marker, skip it */ 665 /* if( is_scope_marker( phrase) || is_script_marker(phrase)) 666 continue; */ 667 /* epsilon */ 668 if(!strcmp( phrase, SILENCE_PREFIX_WORD) 669 || !strcmp(phrase,SILENCE_SUFFIX_WORD)) 670 wordId_is_silence = true; 671 else if( !strcmp( phrase, "eps") && wordId == 0) 672 continue; 673 /* rule markers */ 674 else if( strstr( phrase, ".grxml@")) 675 continue; 676 /* script markers */ 677 else if( phrase[0]=='_' && strspn(phrase+1,"0123456789")==strlen(phrase+1)) 678 continue; 679 else if(is_slot_symbol(phrase)) { 680 cout << "SLOT>> " << phrase << endl; 681 wordId_is_slot = true; 682 num_slots++; 683 } 684 685 if(num_slots > MAX_NUM_SLOTS) { 686 std::cout << "Error: SREC may have trouble with this many slots! (" << num_slots << ")" << std::endl; 687 // return ESR_NOT_SUPPORTED; 688 } 689 690 if(wordId_is_slot) { 691 int stateP = stateSt, statePp1; 692 /* with 2 arcs, we have a better chance to merge the slot if used from 693 different parts of the grammar, see FstPushSlotLikeOLabels elsewhere */ 694 statePp1 = l_fst.AddState(); 695 l_fst.AddArc(stateP, fst::StdArc( wordId+SLOT_COUNTER_OFFSET, wordId, 0.0, statePp1)); 696 stateP = statePp1; 697 statePp1 = l_fst.AddState(); 698 l_fst.AddArc(stateP, fst::StdArc( wordId+SLOT_COUNTER_OFFSET, EPSILON_LABEL, 0.0, statePp1)); 699 stateP = statePp1; 700 l_fst.AddArc(stateP, fst::StdArc( WORD_BOUNDARY, EPSILON_LABEL, 0.0, stateEn)); 701 } else { 702 size_t len_used; 703 LCHAR *pron = 0, *p; 704 /* word is ok, get the pron */ 705 len = MAX_PRONS_LENGTH; 706 rc = SR_VocabularyGetPronunciation(vocab, phrase, prons, &len); 707 if (rc != ESR_SUCCESS) { 708 LPRINTF( "ERROR: SR_VocabularyGetPronunciation(*,%s,*,*) returned %s\n", phrase, ESR_rc2str(rc)); 709 SR_VocabularyDestroy(vocab); 710 return rc; 711 } 712 for(len_used=0; len_used<len; ) { 713 pron = &prons[0]+len_used; 714 len_used += LSTRLEN(pron)+1; 715 if( *pron == 0) break; 716 int stateP = stateSt, statePp1; 717 int olabel = wordId; 718 LPRINTF("%s : %s\n", phrase, pron); 719 /* main pronunciation */ 720 for(p=pron; *p; p++) { 721 statePp1 = l_fst.AddState(); 722 if(*p == OPTSILENCE_CODE) { 723 l_fst.AddArc(stateP, fst::StdArc( SILENCE_CODE, olabel, 0.0, statePp1)); 724 l_fst.AddArc(stateP, fst::StdArc( EPSILON_LABEL, olabel, 0.0, statePp1)); 725 } else { 726 l_fst.AddArc(stateP, fst::StdArc( *p, olabel, 0.0, statePp1)); 727 } 728 stateP = statePp1; 729 olabel = EPSILON_LABEL; 730 } 731 /* add epsilons if this is a homonym */ 732 string pron_string = pron; 733 hash_map<string,int>::const_iterator it = homonym_count.find( pron_string); 734 if(it == homonym_count.end()) { 735 homonym_count[ pron_string] = 0; 736 } else { 737 homonym_count[ pron_string] = homonym_count[ pron_string]+1; 738 } 739 int extra_epsilons_needed = homonym_count[ pron_string] ; 740 if(wordId_is_silence) extra_epsilons_needed = 0; 741 for(int i=0;i<extra_epsilons_needed;i++) { 742 statePp1 = l_fst.AddState(); 743 l_fst.AddArc(stateP, fst::StdArc( EXTRA_EPSILON_LABEL, olabel, 0.0, statePp1)); 744 stateP = statePp1; 745 } 746 /* add optional silence after each word */ 747 if(!do_skip_interword_silence && !wordId_is_silence && !wordId_is_slot) { 748 statePp1 = l_fst.AddState(); 749 l_fst.AddArc(stateP, fst::StdArc( SILENCE_CODE, EPSILON_LABEL, 0.0, statePp1)); 750 l_fst.AddArc(statePp1, fst::StdArc( WORD_BOUNDARY, EPSILON_LABEL, 0.0, stateEn)); 751 l_fst.AddArc(stateP, fst::StdArc( WORD_BOUNDARY, EPSILON_LABEL, 0.0, stateEn)); 752 } else if(wordId_is_silence && !strcmp(phrase, SILENCE_SUFFIX_WORD)) { 753 /* SILENCE_SUFFIX_WORD does not need a terminal .wb */ 754 l_fst.AddArc(stateP, fst::StdArc( EPSILON_LABEL, EPSILON_LABEL, 0.0, stateEn)); 755 } else { 756 l_fst.AddArc(stateP, fst::StdArc( WORD_BOUNDARY, EPSILON_LABEL, 0.0, stateEn)); 757 } 758 } // loop over multiple prons 759 } // slot vs non-slot 760 } /* .map (word_syms) iterator */ 761 762 std::string lfstFilename = grxmlBasename + ".L"; 763 // We can save this FST to a file with: 764 if(debug) l_fst.Write(lfstFilename.c_str()); 765 766 /*-----------------------------------------------------------------* 767 * read the .P.txt created from grxmlcompiler classes * 768 *-----------------------------------------------------------------*/ 769 770 std::string ptxtFilename = grxmlBasename + std::string(".P.txt"); 771 std::ifstream istrm(ptxtFilename.c_str()); 772 if(!istrm) { 773 cerr << "error: reading ptxtFilename" << endl; 774 return ESR_INVALID_ARGUMENT; 775 } 776 777 cout << "info: reading parser from text " << ptxtFilename << endl; 778 fst::FstReader<fst::StdArc> reader( istrm, ptxtFilename, word_syms, prsr_syms, 779 /*state_syms*/ NULL, 780 /*acceptor*/ false, 781 /*ikeep*/ false, 782 /*okeep*/ false, 783 /*nkeep*/ false); 784 // .P file, created from the .P.txt and .omap 785 const fst::StdVectorFst& p_fst = reader.Fst(); 786 787 /*-----------------------------------------------------------------* 788 * make the helper FSTs * 789 *-----------------------------------------------------------------*/ 790 791 cout << "info: creating helper fsts" << endl; 792 fst::StdVectorFst prefix_fst; 793 fst::StdVectorFst suffix_fst; 794 fst::StdVectorFst eps_fst; 795 // int eps_word = StrToId("eps", word_syms, "arc ilabel"); 796 int pau_word = StrToId(SILENCE_PREFIX_WORD, word_syms, "arc ilabel"); 797 int pau2_word = StrToId(SILENCE_SUFFIX_WORD, word_syms, "arc ilabel"); 798 if(pau_word < 0 || pau2_word < 0) 799 return ESR_INVALID_ARGUMENT; 800 801 stateSt = prefix_fst.AddState(); 802 stateEn = prefix_fst.AddState(); 803 prefix_fst.SetStart(stateSt); // arg is state ID 804 prefix_fst.SetFinal(stateEn, 0.0); // 1st arg is state ID, 2nd arg weight 805 prefix_fst.AddArc(stateSt, fst::StdArc(pau_word, pau_word, 0.0, stateEn)); 806 807 stateSt = suffix_fst.AddState(); 808 stateEn = suffix_fst.AddState(); 809 suffix_fst.SetStart(stateSt); // arg is state ID 810 suffix_fst.SetFinal(stateEn, 0.0); // 1st arg is state ID, 2nd arg weight 811 suffix_fst.AddArc(stateSt, fst::StdArc(pau2_word, pau2_word, 0.0, stateEn)); 812 813 stateSt = eps_fst.AddState(); 814 stateEn = stateSt; // stateEn = eps_fst.AddState(); 815 eps_fst.SetStart(stateSt); // arg is state ID 816 eps_fst.SetFinal(stateEn, 0.0); // 1st arg is state ID, 2nd arg weight 817 // eps_fst.AddArc(stateSt, fst::StdArc(eps_word, eps_word, 0.0, stateEn)); 818 819 /*-----------------------------------------------------------------* 820 * make Grev2.det.txt * 821 *-----------------------------------------------------------------*/ 822 cout << "info: creating reverse g fst" << endl; 823 fst::StdVectorFst g_fst = p_fst; // this is a copy!! 824 fst::StdVectorFst grev_fst; // reversed 825 fst::StdVectorFst grev_min_fst; // eps removed and minimized 826 fst::StdVectorFst grev_det_fst; 827 828 fst::Project(&g_fst, fst::PROJECT_INPUT); 829 if(debug) g_fst.Write( grxmlBasename + ".G"); 830 fst::Reverse( g_fst, &grev_fst); 831 if(debug) grev_fst.Write( grxmlBasename + ".Grev"); 832 fst::RmEpsilon( &grev_fst, /*connect?*/ true ); 833 if(debug) grev_fst.Write( grxmlBasename + ".Grevrme"); 834 fst::Determinize(grev_fst, &grev_det_fst); 835 if(debug) grev_det_fst.Write( grxmlBasename + ".Grevrmedet"); 836 if(1) fst::Minimize(&grev_det_fst); 837 if(debug) grev_det_fst.Write( grxmlBasename + ".Grevrmedetmin"); 838 fst::Concat( &eps_fst, grev_det_fst); 839 grev_det_fst = eps_fst; 840 if(debug) grev_det_fst.Write( grxmlBasename + ".Grevrmedetmin2"); 841 std::string grevFilename = grxmlBasename + std::string(".Grev2.det.txt"); 842 843 cout << "info: writing reverse G fst as text " << grevFilename << endl; 844 ostream* ostrm1 = new ofstream( grevFilename.c_str(), ios_base::out); 845 fst::FstPrinter<fst::StdArc> printer1( grev_det_fst, 846 word_syms, word_syms, 847 NULL, /*acceptor?*/ true); 848 printer1.Print( ostrm1, grevFilename); 849 delete ostrm1; 850 851 /*-----------------------------------------------------------------* 852 * make PCLG.txt * 853 *-----------------------------------------------------------------*/ 854 855 fst::StdVectorFst* c_fst; 856 fst::StdVectorFst lg_fst; 857 fst::StdVectorFst clg_fst; 858 fst::StdVectorFst clg_det_fst; 859 860 cout << "info: reading model fst " << cfstFilename << endl; 861 c_fst = fst::StdVectorFst::Read( cfstFilename); 862 863 int slot_olabel_min=0, slot_olabel_max=0; // [min,max) .. ie excludes max 864 get_slot_olabel_range( word_syms, &slot_olabel_min, &slot_olabel_max); 865 if(slot_olabel_max > MAX_NUM_SLOTS) 866 std::cout << "Error: SREC may have trouble with this many slots! (" << slot_olabel_max << ")" << std::endl; 867 868 /* add slot markers as if they were silence phonemes, this makes the context 869 for them as if the slot were silence, which is reasonable, although another 870 reasonable thing would be to allow all contexts. Adding the true context 871 only would add complexity and slow down word addition too much. */ 872 873 rc = FstAddSlotMarkersToCFst( *c_fst, slot_olabel_min, slot_olabel_max); 874 if(rc) return rc; 875 876 fst::Concat( &g_fst, suffix_fst); 877 fst::Concat( &prefix_fst, g_fst); 878 if(debug) prefix_fst.Write( grxmlBasename + ".G2"); 879 fst::ComposeOptions copts( /*connect?*/ true); 880 881 fst::ArcSort(&l_fst, fst::StdOLabelCompare()); 882 fst::ArcSort(&prefix_fst, fst::StdILabelCompare()); 883 884 fst::Compose(l_fst, prefix_fst, &lg_fst, copts); 885 if(debug) lg_fst.Write( grxmlBasename + ".LG"); 886 fst::ArcSort(&lg_fst, fst::StdILabelCompare()); 887 if(debug) lg_fst.Write( grxmlBasename + ".LG2"); 888 889 fst::RmEpsilon( &lg_fst, /*connect?*/ true ); 890 if(debug) lg_fst.Write( grxmlBasename + ".LGrme"); 891 fst::Determinize( lg_fst, &clg_fst); // clg_fst is really lg_det_fst! 892 if(debug) clg_fst.Write( grxmlBasename + ".LGrmedet"); 893 rc = FstReplaceILabel( clg_fst, EXTRA_EPSILON_LABEL, EPSILON_LABEL); 894 fst::Compose( *c_fst, clg_fst, &clg_det_fst, copts); 895 if(debug) clg_det_fst.Write( grxmlBasename + ".CLGrmedet"); 896 897 rc = FstMergeOLabelsToILabels_GetMax( clg_det_fst, /*int&*/max_model_sym); 898 if(verbose) 899 cout << "info: merging into ilabels I=i+" << max_model_sym << "*o" << endl; 900 rc = FstMergeOLabelsToILabels( clg_det_fst, max_model_sym); 901 if(debug) clg_det_fst.Write( grxmlBasename + ".CLGrmedet2"); 902 fst::Minimize( &clg_det_fst); 903 if(debug) clg_det_fst.Write( grxmlBasename + ".CLGrmedet3"); 904 if(verbose) 905 cout << "info: splitting from ilabels" << endl; 906 rc = FstSplitOLabelsFromILabels( clg_det_fst, max_model_sym); 907 if(debug) clg_det_fst.Write( grxmlBasename + ".CLGrmedet4"); 908 909 rc = FstPushSlotLikeOLabels( clg_det_fst, slot_olabel_min, slot_olabel_max); 910 if(rc != ESR_SUCCESS) 911 std::cout << "Error: FstPushSlotLikeOLabels() failed" << std::endl; 912 if(debug) clg_det_fst.Write( grxmlBasename + ".CLG"); 913 914 std::string pclgFilename = grxmlBasename + ".PCLG.txt"; 915 ostream* ostrm = new ofstream( pclgFilename.c_str(), ios_base::out); 916 fst::FstPrinter<fst::StdArc> printer( clg_det_fst, 917 model_syms, word_syms, 918 NULL, /*acceptor?*/ false); 919 printer.Print( ostrm, pclgFilename); 920 delete ostrm; 921 922 delete c_fst; 923 delete word_syms; word_syms = NULL; 924 delete prsr_syms; prsr_syms = NULL; 925 delete model_syms; model_syms = NULL; 926 927 /*-----------------------------------------------------------------* 928 * cleanup * 929 *-----------------------------------------------------------------*/ 930 931 if(vocab) { 932 SR_VocabularyDestroy(vocab); 933 vocab = NULL; 934 } 935 936 return rc; 937 938 } 939 940 941