1 /*---------------------------------------------------------------------------* 2 * run_seq_lts.c * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 21 22 #include <stdlib.h> 23 #include <string.h> 24 #include <math.h> 25 #include <ctype.h> 26 27 #ifndef NO_STDERR 28 #include <stdio.h> 29 #else 30 extern void PrintError(char *msg, unsigned long p1, unsigned long p2, unsigned long p3); 31 #endif 32 33 #include "passert.h" 34 #include "pmemory.h" 35 #include "plog.h" 36 #include "phashtable.h" 37 #include "lts_error.h" 38 #include "lts.h" 39 #include "lts_seq_internal.h" 40 #include "port_fileio.h" 41 #include "platform_utils.h" /* strdup, safe_strtok, etc */ 42 43 #define ASSERT(x) passert(x) 44 45 #ifdef TI_DSP 46 #include "tidsp_defines.h" 47 #endif 48 49 #ifdef _DEBUG 50 #define PRINT_LOAD_TREE_SUMMARY 0 51 #define PRINT_LOAD_TREE 0 52 #define PRINT_CONS_COMB 0 53 #define PRINT_DP_LETTER 0 54 #define PRINT_LTS_WORD 0 55 #define PRINT_DICT_LOOKUP 0 56 #endif 57 58 #define LTS_MARKER_WORD_START "WS" 59 #define LTS_MARKER_PRON_START "PS" 60 #define LTS_MARKER_SYLL_START "SS" 61 #define LTS_MARKER_SYLL_START_DD "SS%d" 62 #define LTS_MARKER_PIPESEP "|" 63 #define LTS_MARKER_PIPESEP_CHAR '|' 64 65 static int load_int(PORT_FILE *fp); 66 static SWIsltsResult load_lquestions(LQUESTION ***pquestions, int *pnum_questions, PORT_FILE *fp); 67 static SWIsltsResult free_lquestions(LQUESTION ** questions, int num_questions); 68 static SWIsltsResult load_letter_mapping(PORT_FILE *fp, LM **ppLetterMap); 69 static SWIsltsResult free_letter_mapping(LM *lm); 70 static SWIsltsResult load_phone_mapping(PORT_FILE *fp, PM **ppPhoneMap); 71 static SWIsltsResult free_phone_mapping(PM *pm); 72 static SWIsltsResult load_outputs(char ***poutputs, char ***pinputs, int *pnum, PORT_FILE *fp); 73 static SWIsltsResult free_outputs(char **outputs, char **inputs, int num); 74 static SWIsltsResult load_trees(RT_LTREE ***ptrees, int *num_letters, 75 LQUESTION ***pquestions, int *num_questions, LM **plm, PORT_FILE *fp); 76 static SWIsltsResult free_trees(RT_LTREE **trees, int num_letters, LQUESTION **questions, int num_questions, LM *lm); 77 static SWIsltsResult load_allowable_cons_comb(LTS *lts, PORT_FILE *fp); 78 static SWIsltsResult free_allowable_cons_comb(LTS *lts); 79 static SWIsltsResult load_question_strings(LTS* lts, PORT_FILE* fp); 80 static SWIsltsResult free_question_strings(LTS* lts); 81 #define find_letter_index( myLet, myLM) (myLM->letter_index_for_letter[ toupper(myLet)]) 82 int find_phone(const char *ph, PM *pm); 83 int find_best_string(const char *str, LTS* lts); 84 int find_best_prefix_string(const char *str, LTS* lts); 85 int fill_up_dp_for_letter(LTS *lts, const char *input_word, int word_len, int index, int root_start, int root_end, int left_phone); 86 #define in_list(myV, myQ) (bitarray_read_bit( myQ->membership, myV)) 87 #define qmatches(myQ, myU) (in_list( myU->properties[ myQ->type], myQ)) 88 int matches(LQUESTION *q1, LQUESTION *q2, int type, LDP *dp) ; 89 int find_output_for_dp(LTS *lts, int *pbackoff_output); 90 int add_output(char *output, char **output_phone_string, int out_len, int max_phone_length); 91 int is_allowable_cons_comb(LTS *lts, const char *cons_string); 92 void adjust_syllable_boundaries(LTS *lts, char **output_phone_string, int num_out, int max_phone_length); 93 SWIsltsResult lts_for_word(LTS *lts, char *word, int word_len, char **output_phone_string, int max_phone_length, int *num_out); 94 95 /*------------ 96 * 97 * bitarray 98 * 99 *-----------*/ 100 101 #define bitarray_read_bit( biTs, iBiT) ( biTs[iBiT/16] & (1<<((iBiT)%16)) ) 102 /* int bitarray_read_bit( unsigned short* bits, int iBit) 103 { // ASSERT( iBit<256); 104 return bits[iBit/16] & (1<<((iBit)%16)); 105 } */ 106 107 void bitarray_write_bit( unsigned short* bits, int iBit, int iVal) 108 { 109 unsigned short sect; 110 ASSERT( iBit<256); 111 sect = bits[iBit/16]; 112 if(iVal) { sect |= (1<<(iBit%16)); } 113 else { sect &= ~(1<<(iBit%16)); } 114 bits[ iBit/16] = sect; 115 } 116 void bitarray_populate_from_list(unsigned short* bits, char* list, int listlen) 117 { 118 unsigned int i; 119 for(i=0; i<UCHAR_MAX/sizeof(unsigned short)/8; i++) 120 bits[i] = 0; 121 for(i=0; i<(unsigned int)listlen; i++) 122 bitarray_write_bit( bits, list[i], 1); 123 } 124 125 /*----------- 126 * 127 * PHashTable 128 * 129 *-----------*/ 130 131 static int HashCmpWord(const LCHAR *key1, const LCHAR *key2) 132 { return strcmp((const char*)key1,(const char*)key2); } 133 static unsigned int HashGetCode(const void *key) 134 { 135 const char* k = (const char*)key; 136 unsigned int i, len, h = 0; 137 len = strlen(k); 138 for (i=0; i<len; i++) h = 31*h + (unsigned int)k[i]; 139 return h; 140 } 141 void* my_PHashTableCreate_FromStrings( const char* strings[], int num_strings, 142 const LCHAR* hashName) 143 { 144 PHashTable* table = NULL; 145 ESR_ReturnCode rc = ESR_SUCCESS; 146 PHashTableArgs hashArgs; 147 int i; 148 hashArgs.capacity = 63; 149 hashArgs.compFunction = HashCmpWord; // PHASH_TABLE_DEFAULT_COMP_FUNCTION; 150 hashArgs.hashFunction = HashGetCode; // PHASH_TABLE_DEFAULT_HASH_FUNCTION; 151 hashArgs.maxLoadFactor = PHASH_TABLE_DEFAULT_MAX_LOAD_FACTOR; 152 rc = PHashTableCreate( &hashArgs, hashName, &table); 153 for(i=0; i<num_strings; i++) { 154 void* old; 155 /* formerly the code used linear lookup, so let's avoid dups to match up */ 156 rc = PHashTableGetValue( table, strings[i], (void**)&old); 157 if(rc != ESR_SUCCESS) { 158 rc = PHashTablePutValue( table, strings[i], (const void *)i, NULL ); 159 } 160 } 161 return table; 162 } 163 164 /*--------- 165 * 166 * i/o 167 * 168 *---------*/ 169 170 static int load_int(PORT_FILE *fp) 171 { 172 int v; 173 174 PORT_FREAD_INT16((uint16 *)&v, sizeof(int), 1, fp); 175 176 return v; 177 } 178 179 static SWIsltsResult load_lquestions(LQUESTION ***pquestions, int *pnum_questions, PORT_FILE *fp) 180 { 181 int i, num_questions; 182 LQUESTION ** questions; 183 SWIsltsResult nRes = SWIsltsSuccess; 184 185 num_questions = load_int(fp); 186 187 #if PRINT_LOAD_TREE_SUMMARY 188 pfprintf(PSTDOUT,"loading %d questions\n", num_questions); 189 #endif 190 191 *pquestions = questions = (LQUESTION**) lts_alloc(num_questions, sizeof(LQUESTION*)); 192 if (questions == NULL) { 193 nRes = SWIsltsErrAllocResource; 194 goto CLEAN_UP; 195 } 196 197 for (i=0;i<num_questions;i++) { 198 questions[i] = (LQUESTION*) lts_alloc(1, sizeof(LQUESTION)); 199 if (questions[i] == NULL) { 200 nRes = SWIsltsErrAllocResource; 201 goto CLEAN_UP; 202 } 203 204 #if PRINT_LOAD_TREE 205 pfprintf(PSTDOUT,"LOAD_TREE: loading question %d\n", i); 206 #endif 207 208 PORT_FREAD_CHAR(&(questions[i]->type), sizeof(char), 1, fp); 209 PORT_FREAD_CHAR(&(questions[i]->num_list), sizeof(char), 1, fp); 210 211 questions[i]->list = (unsigned char*) lts_alloc(questions[i]->num_list, sizeof(unsigned char)); 212 if (questions[i]->list == NULL) { 213 nRes = SWIsltsErrAllocResource; 214 goto CLEAN_UP; 215 } 216 217 PORT_FREAD_CHAR(questions[i]->list, sizeof(char), (questions[i]->num_list), fp); 218 219 bitarray_populate_from_list( questions[i]->membership, (char*) questions[i]->list, questions[i]->num_list); 220 } 221 222 *pnum_questions = num_questions; 223 return SWIsltsSuccess; 224 225 CLEAN_UP: 226 227 free_lquestions(questions, num_questions); 228 *pnum_questions = 0; 229 *pquestions = NULL; 230 return nRes; 231 } 232 233 /* deallocate questions */ 234 static SWIsltsResult free_lquestions(LQUESTION ** questions, int num_questions) 235 { 236 SWIsltsResult nRes = SWIsltsSuccess; 237 int i; 238 239 if (questions) { 240 for (i=0; i<num_questions; i++) { 241 if (questions[i]->list) { 242 FREE(questions[i]->list); 243 questions[i]->list = NULL; 244 } 245 FREE(questions[i]); 246 questions[i] = NULL; 247 } 248 FREE(questions); 249 } 250 return nRes; 251 } 252 253 static SWIsltsResult load_letter_mapping(PORT_FILE *fp, LM **ppLetterMap) 254 { 255 SWIsltsResult nRes = SWIsltsSuccess; 256 unsigned char len; 257 LM * lm; 258 int i; 259 260 /* pfprintf(PSTDOUT,"got len %d\n", len);*/ 261 lm = (LM*) lts_alloc(1, sizeof(LM)); 262 if (lm == NULL) { 263 nRes = SWIsltsErrAllocResource; 264 goto CLEAN_UP; 265 } 266 267 PORT_FREAD_CHAR(&len, sizeof(char), 1, fp); 268 lm->num_letters = len; 269 270 lm->letters = (char*) lts_alloc(len, sizeof(char)); 271 if (lm->letters == NULL) { 272 nRes = SWIsltsErrAllocResource; 273 goto CLEAN_UP; 274 } 275 276 lm->type = (char*) lts_alloc(len, sizeof(char)); 277 if (lm->type == NULL) { 278 nRes = SWIsltsErrAllocResource; 279 goto CLEAN_UP; 280 } 281 282 PORT_FREAD_CHAR(lm->letters, sizeof(char), len, fp); 283 PORT_FREAD_CHAR(lm->type, sizeof(char), len, fp); 284 285 { 286 unsigned int letter; 287 for (letter=0; letter <= UCHAR_MAX; letter++) 288 lm->letter_index_for_letter[letter] = LTS_MAXCHAR; 289 } 290 291 for (i=0;i<len;i++) { 292 char letter = toupper(lm->letters[i]); 293 lm->letters[i] = letter; 294 lm->letter_index_for_letter[(unsigned char)letter] = i; 295 } 296 *ppLetterMap = lm; 297 return SWIsltsSuccess; 298 299 CLEAN_UP: 300 free_letter_mapping(lm); 301 *ppLetterMap = NULL; 302 return nRes; 303 } 304 305 /* deallocate letter mapping */ 306 static SWIsltsResult free_letter_mapping(LM *lm) 307 { 308 SWIsltsResult nRes = SWIsltsSuccess; 309 310 if (lm) { 311 if (lm->letters) { 312 FREE(lm->letters); 313 lm->letters = NULL; 314 } 315 if (lm->type) { 316 FREE(lm->type); 317 lm->type = NULL; 318 } 319 lm->num_letters = 0; 320 FREE(lm); 321 } 322 return nRes; 323 } 324 325 static SWIsltsResult load_phone_mapping(PORT_FILE *fp, PM **ppPhoneMap) 326 { 327 SWIsltsResult nRes = SWIsltsSuccess; 328 PM * pm; 329 int i; 330 unsigned char len; 331 char * ph; 332 333 pm = (PM*) lts_alloc(1, sizeof(PM)); 334 if (pm == NULL) { 335 nRes = SWIsltsErrAllocResource; 336 goto CLEAN_UP; 337 } 338 339 pm->num_phones = load_int(fp); 340 341 pm->phones = (char**) lts_alloc(pm->num_phones, sizeof(char*)); 342 if (pm->phones == NULL) { 343 nRes = SWIsltsErrAllocResource; 344 goto CLEAN_UP; 345 } 346 347 for (i=0;i<pm->num_phones;i++) { 348 PORT_FREAD_CHAR(&len, sizeof(unsigned char), 1, fp); 349 350 pm->phoneH = NULL; 351 pm->phones[i] = ph = (char*) lts_alloc(len+1, sizeof(char)); 352 if (ph == NULL) { 353 nRes = SWIsltsErrAllocResource; 354 goto CLEAN_UP; 355 } 356 357 PORT_FREAD_CHAR(ph, sizeof(char), len, fp); 358 ph[len] = '\0'; 359 } 360 pm->phoneH = my_PHashTableCreate_FromStrings( (const char**)pm->phones, 361 pm->num_phones, 362 L("lts.phoneH")); 363 if(pm->phoneH == NULL) { 364 nRes = SWIsltsErrAllocResource; 365 goto CLEAN_UP; 366 } 367 *ppPhoneMap = pm; 368 return SWIsltsSuccess; 369 370 CLEAN_UP: 371 free_phone_mapping(pm); 372 *ppPhoneMap = NULL; 373 374 return nRes; 375 } 376 377 /* deallocate phone mapping */ 378 static SWIsltsResult free_phone_mapping(PM *pm) 379 { 380 SWIsltsResult nRes = SWIsltsSuccess; 381 int i; 382 383 if (pm) { 384 if (pm->phones) { 385 for (i=0; i<pm->num_phones; i++) { 386 if (pm->phones[i]) { 387 FREE(pm->phones[i]); 388 pm->phones[i] = NULL; 389 } 390 } 391 FREE(pm->phones); 392 pm->phones = NULL; 393 } 394 if(pm->phoneH) 395 PHashTableDestroy( (PHashTable*)pm->phoneH); 396 pm->phoneH = NULL; 397 FREE(pm); 398 } 399 return nRes; 400 } 401 402 403 static SWIsltsResult load_outputs(char ***poutputs, char ***pinputs, int *pnum, PORT_FILE *fp) 404 { 405 SWIsltsResult nRes = SWIsltsSuccess; 406 int i; 407 char ** outputs = NULL; 408 char ** inputs = NULL; 409 int num; 410 unsigned char olen; 411 char * out; 412 unsigned char ilen; 413 char * in; 414 415 num = load_int(fp); 416 417 *poutputs = outputs = (char **) lts_alloc(num, sizeof(char*)); 418 if (outputs == NULL) { 419 nRes = SWIsltsErrAllocResource; 420 goto CLEAN_UP; 421 } 422 423 *pinputs = inputs = (char **) lts_alloc(num, sizeof(char*)); 424 if (inputs == NULL) { 425 nRes = SWIsltsErrAllocResource; 426 goto CLEAN_UP; 427 } 428 429 for (i=0;i<num;i++) { 430 PORT_FREAD_CHAR(&olen, sizeof(char), 1, fp); 431 out = outputs[i] = lts_alloc(olen + 1, sizeof(char)); 432 if (out == NULL) { 433 nRes = SWIsltsErrAllocResource; 434 goto CLEAN_UP; 435 } 436 437 if (olen > 0) { 438 PORT_FREAD_CHAR(out, sizeof(char), olen, fp); 439 } 440 out[olen] = '\0'; 441 PORT_FREAD_CHAR(&ilen, sizeof(char), 1, fp); 442 in = inputs[i] = lts_alloc(ilen + 1, sizeof(char)); 443 if (in == NULL) { 444 nRes = SWIsltsErrAllocResource; 445 goto CLEAN_UP; 446 } 447 448 if (ilen > 0) { 449 PORT_FREAD_CHAR(in, sizeof(char), ilen, fp); 450 } 451 in[ilen] = '\0'; 452 #if PRINT_LOAD_TREE 453 if (ilen > 0) pfprintf(PSTDOUT,"LOAD_TREE: got input %s out %s\n", in, outputs[i]); 454 pfprintf(PSTDOUT,"LOAD_TREE: outputs[%d] len %d out %x out %s\n", i, olen, outputs[i], outputs[i]); 455 #endif 456 } 457 458 *pnum = num; 459 return SWIsltsSuccess; 460 461 CLEAN_UP: 462 463 free_outputs(outputs, inputs, num); 464 *poutputs = NULL; 465 *pinputs = NULL; 466 *pnum = 0; 467 468 return nRes; 469 } 470 471 static SWIsltsResult free_outputs(char **outputs, char **inputs, int num) 472 { 473 SWIsltsResult nRes = SWIsltsSuccess; 474 int i; 475 476 if (outputs) { 477 for (i=0; i<num; i++) { 478 if (outputs[i]) { 479 FREE(outputs[i]); 480 outputs[i] = NULL; 481 } 482 } 483 FREE(outputs); 484 } 485 486 if (inputs) { 487 for (i=0; i<num; i++) { 488 if (inputs[i]) { 489 FREE(inputs[i]); 490 inputs[i] = NULL; 491 } 492 } 493 FREE(inputs); 494 } 495 return nRes; 496 } 497 498 static SWIsltsResult load_trees(RT_LTREE ***ptrees, int *num_letters, 499 LQUESTION ***pquestions, int *num_questions, LM **plm, PORT_FILE *fp) 500 { 501 SWIsltsResult nRes = SWIsltsSuccess; 502 int let, i; 503 RT_LTREE * tree = NULL; 504 RT_LTREE ** trees = NULL; 505 506 #if PRINT_LOAD_TREE_SUMMARY 507 pfprintf(PSTDOUT,"loading letter mapping\n"); 508 #endif 509 *ptrees = NULL; 510 *pquestions = NULL; 511 *plm = NULL; 512 513 nRes = load_letter_mapping(fp, plm); 514 if (nRes != SWIsltsSuccess) { 515 goto CLEAN_UP; 516 } 517 518 #if PRINT_LOAD_TREE_SUMMARY 519 pfprintf(PSTDOUT,"loading questions\n"); 520 #endif 521 522 nRes = load_lquestions(pquestions, num_questions, fp); 523 if (nRes != SWIsltsSuccess) { 524 goto CLEAN_UP; 525 } 526 527 *num_letters = load_int(fp); 528 529 if (*num_letters != (*plm)->num_letters) { 530 #ifndef NO_STDERR 531 PLogError(L("Error loading data, num_letters %d doesn't match num from mapping %d\n"), 532 *num_letters, (*plm)->num_letters); 533 #endif 534 nRes = SWIsltsInternalErr; 535 goto CLEAN_UP; 536 } 537 538 *ptrees = trees = (RT_LTREE**) lts_alloc(*num_letters, sizeof(RT_LTREE*)); 539 if (trees == NULL) { 540 nRes = SWIsltsErrAllocResource; 541 goto CLEAN_UP; 542 } 543 544 for (let=0;let<*num_letters;let++) { 545 /* pfprintf(PSTDOUT,"loading for t %d\n", t);*/ 546 547 trees[let] = tree = (RT_LTREE*) lts_alloc(1, sizeof(RT_LTREE)); 548 if (tree == NULL) { 549 nRes = SWIsltsErrAllocResource; 550 goto CLEAN_UP; 551 } 552 553 tree->num_nodes = load_int(fp); 554 555 tree->values_or_question1 = (short*) lts_alloc(tree->num_nodes, sizeof(short)); 556 if (tree->values_or_question1 == NULL) { 557 nRes = SWIsltsErrAllocResource; 558 goto CLEAN_UP; 559 } 560 561 tree->question2 = (short*) lts_alloc(tree->num_nodes, sizeof(short)); 562 if (tree->question2 == NULL) { 563 nRes = SWIsltsErrAllocResource; 564 goto CLEAN_UP; 565 } 566 567 tree->left_nodes = (short *) lts_alloc(tree->num_nodes, sizeof(short)); 568 if (tree->left_nodes == NULL) { 569 nRes = SWIsltsErrAllocResource; 570 goto CLEAN_UP; 571 } 572 573 #if PRINT_LOAD_TREE 574 pfprintf(PSTDOUT,"LOAD_TREE: Tree for let %d num_nodes %d\n", let, tree->num_nodes); 575 #endif 576 577 for (i=0;i<tree->num_nodes;i++) { 578 PORT_FREAD_INT16(&(tree->left_nodes[i]), sizeof(short), 1, fp); 579 PORT_FREAD_INT16(&(tree->values_or_question1[i]), sizeof(short), 1, fp); 580 581 #if PRINT_LOAD_TREE 582 pfprintf(PSTDOUT,"LOAD_TREE: node[%d] %d %d", i, tree->left_nodes[i], tree->values_or_question1[i]); 583 #endif 584 585 PORT_FREAD_INT16(&(tree->question2[i]), sizeof(short), 1, fp); 586 if (tree->left_nodes[i] != NO_NODE) { 587 if (tree->question2[i] == -1) tree->question2[i] = 0; 588 #if PRINT_LOAD_TREE 589 pfprintf(PSTDOUT," %x", (unsigned short) tree->question2[i]); 590 #endif 591 } 592 593 #if PRINT_LOAD_TREE 594 pfprintf(PSTDOUT,"\n"); 595 #endif 596 } 597 } 598 599 return SWIsltsSuccess; 600 601 CLEAN_UP: 602 603 free_trees(trees, *num_letters, *pquestions, *num_questions, *plm); 604 *ptrees = NULL; 605 *pquestions = NULL; 606 *plm = NULL; 607 *num_letters = 0; 608 *num_questions = 0; 609 610 return nRes; 611 } 612 613 /* deallocate trees */ 614 static SWIsltsResult free_trees(RT_LTREE **trees, int num_letters, 615 LQUESTION **questions, int num_questions, LM *lm) 616 { 617 SWIsltsResult nRes = SWIsltsSuccess; 618 int i; 619 RT_LTREE * tree; 620 621 if (lm) { 622 free_letter_mapping(lm); 623 } 624 if (questions) { 625 free_lquestions(questions, num_questions); 626 } 627 628 if (trees) { 629 for (i=0; i<num_letters; i++) { 630 if (trees[i]) { 631 tree = trees[i]; 632 if (tree->values_or_question1) { 633 FREE(tree->values_or_question1); 634 tree->values_or_question1 = NULL; 635 } 636 if (tree->question2) { 637 FREE(tree->question2); 638 tree->question2 = NULL; 639 } 640 if (tree->left_nodes) { 641 FREE(tree->left_nodes); 642 tree->left_nodes = NULL; 643 } 644 FREE(trees[i]); 645 trees[i] = NULL; 646 } 647 } 648 FREE(trees); 649 } 650 return nRes; 651 } 652 653 static SWIsltsResult load_allowable_cons_comb(LTS *lts, PORT_FILE *fp) 654 { 655 SWIsltsResult nRes = SWIsltsSuccess; 656 char line[50]; 657 char tempstr[50]; 658 char * tok; 659 int i, toklen; 660 int count; 661 char seps[] = " \n"; 662 663 lts->num_cons_comb = 0; 664 lts->allowable_cons_combH = NULL; 665 666 while (PORT_FGETS(line, 50, fp)) { 667 668 #ifndef TI_DSP 669 670 /*need to get rid of sme crud at the end of the line because it is being read in binary mode*/ 671 for (i=strlen(line)-1;i>=0;i--) { 672 if (!isalpha(line[i])) line[i] = ' '; 673 } 674 #endif 675 count = 0; 676 tok = safe_strtok(line, seps, &toklen); 677 tempstr[0] = '\0'; 678 679 /* get all available sequence of tokens */ 680 while(tok && toklen > 0){ 681 count += toklen; 682 strncat(tempstr, tok, toklen); 683 tempstr[count+1] = '\0'; 684 strcat(tempstr, " "); 685 count++; 686 687 tok = safe_strtok(tok+toklen, seps, &toklen); 688 } 689 if (count > 0) { 690 691 /* delete the final space */ 692 tempstr[count-1] = '\0'; 693 694 lts->allowable_cons_comb[lts->num_cons_comb] = (char*) lts_alloc(strlen(tempstr)+1, sizeof(char)); 695 if (lts->allowable_cons_comb[lts->num_cons_comb] == NULL) { 696 nRes = SWIsltsErrAllocResource; 697 goto CLEAN_UP; 698 } 699 700 strcpy(lts->allowable_cons_comb[lts->num_cons_comb], tempstr); 701 702 #if PRINT_CONS_COMB 703 pfprintf(PSTDOUT,"LOAD_TREE: allowable_cons_comb[%d]: %s\n", lts->num_cons_comb, tempstr); 704 #endif 705 706 lts->num_cons_comb++; 707 if (lts->num_cons_comb >= MAX_CONS_COMB) { 708 #ifndef NO_STDERR 709 PLogError(L("MAX_CONS_COMB %d exceeded\n"), MAX_CONS_COMB); 710 #endif 711 nRes = SWIsltsInternalErr; 712 goto CLEAN_UP; 713 } 714 } 715 } 716 if (lts->num_cons_comb == 0) { 717 #ifndef NO_STDERR 718 PLogError(L("Warning: the data file is missing consonant combinations - syllable boundaries will be incorrect\n")); 719 #endif 720 } 721 lts->allowable_cons_combH = my_PHashTableCreate_FromStrings( (const char**)lts->allowable_cons_comb, lts->num_cons_comb, L("lts.allowable_cons_combH")); 722 if(lts->allowable_cons_combH == NULL) { 723 nRes = SWIsltsErrAllocResource; 724 goto CLEAN_UP; 725 } 726 727 #if PRINT_LOAD_TREE_SUMMARY 728 pfprintf(PSTDOUT,"loaded %d cons combinations\n", lts->num_cons_comb); 729 #endif 730 731 return SWIsltsSuccess; 732 733 CLEAN_UP: 734 735 free_allowable_cons_comb(lts); 736 737 return nRes; 738 } 739 740 static SWIsltsResult free_allowable_cons_comb(LTS *lts) 741 { 742 SWIsltsResult nRes = SWIsltsSuccess; 743 int i; 744 745 for (i=0; i<lts->num_cons_comb; i++) { 746 if (lts->allowable_cons_comb[i]) { 747 FREE(lts->allowable_cons_comb[i]); 748 lts->allowable_cons_comb[i] = NULL; 749 } 750 } 751 if(lts->allowable_cons_combH) 752 PHashTableDestroy( (PHashTable*)lts->allowable_cons_combH); 753 lts->allowable_cons_combH = NULL; 754 return nRes; 755 } 756 757 static SWIsltsResult load_question_strings(LTS* lts, PORT_FILE* fp) 758 { 759 SWIsltsResult nRes = SWIsltsSuccess; 760 int i; 761 int num; 762 unsigned char len; 763 char ** strings; 764 char * str; 765 766 num = load_int(fp); 767 768 lts->strings = strings = (char **) lts_alloc(num, sizeof(char*)); 769 lts->string_lens = (char*)lts_alloc(num, sizeof(char)); 770 771 if (strings == NULL || lts->string_lens == NULL ) { 772 nRes = SWIsltsErrAllocResource; 773 goto CLEAN_UP; 774 } 775 776 for (i=0;i<num;i++) { 777 PORT_FREAD_CHAR(&len, sizeof(char), 1, fp); 778 779 str = strings[i] = lts_alloc(len + 1, sizeof(char)); 780 if (str == NULL) { 781 nRes = SWIsltsErrAllocResource; 782 goto CLEAN_UP; 783 } 784 785 if (len > 0) { 786 PORT_FREAD_CHAR(str, sizeof(char), len, fp); 787 } 788 str[len] = '\0'; 789 790 bitarray_populate_from_list( lts->membership, lts->strings[i], len); 791 lts->string_lens[i] = strlen(lts->strings[i]); 792 } 793 794 // *pnum = num; 795 lts->num_strings = num; 796 797 return SWIsltsSuccess; 798 799 CLEAN_UP: 800 801 free_question_strings(lts); 802 803 return nRes; 804 } 805 806 /* deallocate question strings */ 807 static SWIsltsResult free_question_strings(LTS* lts) 808 { 809 SWIsltsResult nRes = SWIsltsSuccess; 810 int i; 811 812 if (lts->strings) { 813 for (i=0;i<lts->num_strings;i++) { 814 if (lts->strings[i]) { 815 FREE(lts->strings[i]); 816 lts->strings[i] = NULL; 817 } 818 } 819 FREE(lts->strings); 820 if(lts->string_lens) FREE(lts->string_lens); 821 lts->strings = NULL; 822 lts->string_lens = NULL; 823 } 824 return nRes; 825 } 826 827 828 SWIsltsResult create_lts(char *data_filename, LTS_HANDLE *phLts) 829 { 830 SWIsltsResult nRes = SWIsltsSuccess; 831 LTS * lts; 832 833 #ifdef USE_STATIC_SLTS 834 /* TODO: language-specific ID here? */ 835 lts = &g_lts; 836 837 #else /* !USE_STATIC_SLTS */ 838 839 PORT_FILE *fp; 840 841 lts = (LTS*) lts_alloc(1, sizeof(LTS)); 842 if (lts == NULL) { 843 nRes = SWIsltsErrAllocResource; 844 goto CLEAN_UP; 845 } 846 847 fp = PORT_FOPEN(data_filename, "rb"); 848 if (fp == NULL) { 849 #ifndef NO_STDERR 850 PLogError(L("Cannot open %s\n"), data_filename); 851 #endif 852 nRes = SWIsltsFileOpenErr; 853 goto CLEAN_UP; 854 } 855 nRes = load_phone_mapping(fp, <s->phone_mapping); 856 if (nRes != SWIsltsSuccess) { 857 PLogError(L("SWIsltsErr: load_phone_mapping() failed: Err_code = %d\n"), nRes); 858 goto CLEAN_UP; 859 } 860 861 nRes = load_question_strings(lts, fp); 862 if (nRes != SWIsltsSuccess) { 863 PLogError(L("SWIsltsErr: load_question_strings() failed: Err_code = %d\n"), nRes); 864 goto CLEAN_UP; 865 } 866 867 nRes = load_outputs(&(lts->outputs), &(lts->input_for_output), <s->num_outputs, fp); 868 if (nRes != SWIsltsSuccess) { 869 PLogError(L("SWIsltsErr: load_outputs() failed: Err_code = %d\n"), nRes); 870 goto CLEAN_UP; 871 } 872 873 #if PRINT_LOAD_TREE 874 pfprintf(PSTDOUT,"LOAD_TREE: got %d outputs, loading trees\n", lts->num_outputs); 875 #endif 876 877 nRes = load_trees(&(lts->trees), &(lts->num_letters), 878 &(lts->questions), &(lts->num_questions), 879 &(lts->letter_mapping), 880 fp); 881 if (nRes != SWIsltsSuccess) { 882 PLogError(L("SWIsltsErr: load_trees() failed: Err_code = %d\n"), nRes); 883 goto CLEAN_UP; 884 } 885 886 nRes = load_allowable_cons_comb(lts, fp); 887 if (nRes != SWIsltsSuccess) { 888 PLogError(L("SWIsltsErr: load_allowable_cons_comb() failed: Err_code = %d\n"), nRes); 889 goto CLEAN_UP; 890 } 891 892 PORT_FCLOSE(fp); 893 894 #endif /* !USE_STATIC_SLTS */ 895 896 *phLts = lts; 897 return SWIsltsSuccess; 898 899 CLEAN_UP: 900 901 free_lts(lts); 902 *phLts = NULL; 903 return nRes; 904 } 905 906 /* deallocates LTS */ 907 SWIsltsResult free_lts(LTS_HANDLE hlts) 908 { 909 SWIsltsResult nRes = SWIsltsSuccess; 910 LTS * lts = (LTS *)hlts; 911 912 if (lts) { 913 914 #ifndef USE_STATIC_SLTS 915 free_phone_mapping(lts->phone_mapping); 916 free_question_strings(lts); 917 lts->strings = NULL; 918 lts->phone_mapping = NULL; 919 920 free_outputs(lts->outputs, lts->input_for_output, lts->num_outputs); 921 lts->input_for_output = lts->outputs = NULL; 922 923 free_trees(lts->trees, lts->num_letters, 924 lts->questions, lts->num_questions, 925 lts->letter_mapping); 926 lts->trees = NULL; 927 lts->questions = NULL; 928 lts->letter_mapping = NULL; 929 930 free_allowable_cons_comb(lts); 931 FREE(lts); 932 #endif /* !USE_STATIC_LTS */ 933 } 934 935 return nRes; 936 } 937 938 939 int find_phone(const char *ph, PM *pm) 940 { 941 ESR_ReturnCode rc; 942 int iRet = -1; 943 rc = PHashTableGetValue((PHashTable*)pm->phoneH, ph, (void**)(void*)&iRet); 944 if (rc != ESR_SUCCESS) 945 PLogError("error while in find_phone(%s,%x)\n", ph, pm); 946 return iRet; 947 } 948 949 int find_best_string(const char *str, LTS* lts) 950 { 951 int i, maxlen, maxi, len; 952 int len_str; 953 954 if(str[0] == '\0') return -1; 955 len_str = strlen(str); 956 957 maxi = -1; 958 maxlen = 0; 959 960 for (i=0;i<lts->num_strings;i++) { 961 len = lts->string_lens[i]; 962 if( len > len_str) 963 continue; /* no point in comparison */ 964 if (strncmp(str, lts->strings[i], len) == 0) { 965 if (len > maxlen) { 966 maxlen = len; 967 maxi = i; 968 } 969 } 970 } 971 return maxi; 972 } 973 974 int find_best_prefix_string(const char *str, LTS* lts) 975 { 976 int i; 977 int maxlen; 978 int maxi; 979 int len; 980 int prelen; 981 982 maxi = -1; 983 maxlen = 0; 984 985 prelen = strlen(str); 986 987 for (i=0;i<lts->num_strings;i++) { 988 len = lts->string_lens[i]; 989 if (len <= prelen) { 990 if (strncmp(str + (prelen - len), lts->strings[i], len) == 0) { 991 if (len > maxlen) { 992 maxlen = len; 993 maxi = i; 994 } 995 } 996 } 997 } 998 return maxi; 999 } 1000 1001 int fill_up_dp_for_letter(LTS *lts, const char *input_word, int word_len, int index, int root_start, int root_end, int left_phone) 1002 { 1003 int i,j; 1004 LDP *dp; 1005 unsigned char letter; 1006 int hit_wb; 1007 LM *lm; 1008 unsigned char word[MAX_WORD_LEN]; 1009 char tempstr[MAX_WORD_LEN]; 1010 int first_syl_end; 1011 int last_syl_start; 1012 1013 dp = &(lts->dp); 1014 lm = lts->letter_mapping; 1015 1016 /* the LTS decision tree does not seem to be well trained at all for 1017 the letter ' when followed by "s" ... It seems to result in the 1018 phoneme 'm', which is wrong. "'t" seems to be OK though. 1019 BAD: Kevin's : k6v6nmz ... pal's : palmz ... paul's : p{lz 1020 BAD: janice's : jan6s6mz ... tom's house : t)mmz&h?s ... tonya's : t)ny6mz 1021 BAD: jake's house : jAk6mz&h?s 1022 Ignoring ' as below we get ... 1023 BETTER: Kevin's : kev6nz ... pal's : palz ... paul's : p{lz 1024 BETTER: janice's : jan6s6s ... tom's house : t)mz&h?s ... tonya's : t)ny6s 1025 BETTER: jake's house : jAk6s&h?s 1026 The proper solution requires a legitimate text normalizer with special 1027 handling of cases like 's which would always put a "z" there, 1028 except if preceded by an unvoiced stop (ptk) which requires a "s" there. 1029 For now let's just skip the ' letter, which testing shows to be generally 1030 safe (janice's, jake's etc are better but still not quite right). */ 1031 1032 if(input_word[index] == '\'') 1033 return 1; // same as unknown character 1034 1035 letter = find_letter_index(input_word[index], lm); 1036 1037 if (letter == LTS_MAXCHAR) { 1038 /* lisa - we need to decide how to handle this case. Do we just silently skip unknown 1039 characters or warn the app or user somehow*/ 1040 #ifdef NO_STDERR 1041 PrintError("unknown character on input %c - skipping\n", input_word[index], NULL, NULL); 1042 #else 1043 PLogError(L("unknown character on input %c - skipping\n"), input_word[index]); 1044 #endif 1045 return 1; 1046 } 1047 1048 hit_wb = 0; 1049 1050 /*pfprintf(PSTDOUT,"left context\n");*/ 1051 1052 for (j=0;j<5;j++) { 1053 if (hit_wb) { 1054 dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm); 1055 } else { 1056 i = index - (j+1); 1057 if (i < 0) dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm); 1058 else { 1059 dp->properties[ Left1+j] = find_letter_index(input_word[i], lm); 1060 if (dp->properties[ Left1+j] == LTS_MAXCHAR) { /*assume an unknown character is a word boundary*/ 1061 dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm); 1062 hit_wb = 1; 1063 } 1064 } 1065 } 1066 } 1067 1068 /*pfprintf(PSTDOUT,"right context\n");*/ 1069 1070 hit_wb = 0; 1071 for (j=0;j<5;j++) { 1072 if (hit_wb) { 1073 dp->properties[ Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm); 1074 } else { 1075 i = index + (j+1); 1076 if (i >= word_len) dp->properties[Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm); 1077 else { 1078 dp->properties[ Right1+j] = find_letter_index(input_word[i], lm); 1079 if (dp->properties[ Right1+j] == LTS_MAXCHAR) { /*assume an unknown character is a word boundary*/ 1080 dp->properties[ Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm); 1081 hit_wb = 1; 1082 } 1083 } 1084 } 1085 } 1086 1087 dp->letter = letter; // properties[ Letter] = letter; 1088 1089 dp->properties[ LeftPhone1] = left_phone; 1090 1091 /*pfprintf(PSTDOUT,"word stuff\n"); */ 1092 1093 /*find word start and end - use unknown character as word boundaries*/ 1094 1095 dp->properties[ WordLen] = word_len; 1096 1097 if (index == 0) dp->properties[ LetInWord] = 0; 1098 else if (index == word_len-1) dp->properties[ LetInWord] = 2; 1099 else dp->properties[ LetInWord] = 1; 1100 1101 for (i=0;i<word_len;i++) { 1102 word[i] = find_letter_index(input_word[i], lm); 1103 } 1104 1105 /*figure out syllable in word - not really syllables - just looks to see if is or at first or last vowel*/ 1106 /* pfprintf(PSTDOUT,"syl stuff\n");*/ 1107 1108 first_syl_end = word_len; 1109 for (i=0;i<word_len;i++) { 1110 if (lm->type[word[i]] == 1) { 1111 for (j=i+1;j<word_len;j++) { 1112 if (lm->type[word[j]] != 1) break; 1113 } 1114 first_syl_end = j; 1115 break; 1116 } 1117 } 1118 last_syl_start = 0; 1119 for (i=word_len-1;i>=0;i--) { 1120 if (lm->type[word[i]] == 1) { 1121 for (j=i-1;j>=0;j--) { 1122 if (lm->type[word[j]] != 1) break; 1123 } 1124 last_syl_start = j; 1125 break; 1126 } 1127 } 1128 1129 #if PRINT_DP_LETTER 1130 pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start); 1131 #endif 1132 1133 if (index > last_syl_start) dp->properties[ SylInWord] = 2; 1134 else if (index < first_syl_end) dp->properties[ SylInWord] = 0; 1135 else dp->properties[ SylInWord] = 1; 1136 1137 first_syl_end = word_len; 1138 for (i=0;i<word_len;i++) { 1139 if (lm->type[word[i]] == 1) { 1140 for (j=i+1;j<word_len;j++) { 1141 if (lm->type[word[j]] != 1) break; 1142 } 1143 for (;j<word_len;j++) { 1144 if (lm->type[word[j]] == 1) break; 1145 } 1146 first_syl_end = j; 1147 break; 1148 } 1149 } 1150 last_syl_start = 0; 1151 for (i=word_len-1;i>=0;i--) { 1152 if (lm->type[word[i]] == 1) { 1153 for (j=i-1;j>=0;j--) { 1154 if (lm->type[word[j]] != 1) break; 1155 } 1156 for (;j>=0;j--) { 1157 if (lm->type[word[j]] == 1) break; 1158 } 1159 last_syl_start = j; 1160 break; 1161 } 1162 } 1163 1164 #if PRINT_DP_LETTER 1165 pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start); 1166 #endif 1167 1168 if (index > last_syl_start) dp->properties[ Syl2InWord] = 2; 1169 else if (index < first_syl_end) dp->properties[ Syl2InWord] = 0; 1170 else dp->properties[Syl2InWord] = 1; 1171 1172 1173 first_syl_end = word_len; 1174 for (i=root_start;i<root_end;i++) { 1175 if (lm->type[word[i]] == 1) { 1176 for (j=i+1;j<word_len;j++) { 1177 if (lm->type[word[j]] != 1) break; 1178 } 1179 first_syl_end = j; 1180 break; 1181 } 1182 } 1183 last_syl_start = 0; 1184 for (i=root_end-1;i>=root_start;i--) { 1185 if (lm->type[word[i]] == 1) { 1186 for (j=i-1;j>=0;j--) { 1187 if (lm->type[word[j]] != 1) break; 1188 } 1189 last_syl_start = j; 1190 break; 1191 } 1192 } 1193 1194 #if PRINT_DP_LETTER 1195 pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start); 1196 #endif 1197 1198 if (index > last_syl_start) dp->properties[SylInRoot] = 2; 1199 else if (index < first_syl_end) dp->properties[ SylInRoot] = 0; 1200 else dp->properties[ SylInRoot] = 1; 1201 1202 first_syl_end = word_len; 1203 for (i=root_start;i<root_end;i++) { 1204 if (lm->type[word[i]] == 1) { 1205 for (j=i+1;j<word_len;j++) { 1206 if (lm->type[word[j]] != 1) break; 1207 } 1208 for (;j<word_len;j++) { 1209 if (lm->type[word[j]] == 1) break; 1210 } 1211 first_syl_end = j; 1212 break; 1213 } 1214 } 1215 last_syl_start = 0; 1216 for (i=root_end-1;i>=root_start;i--) { 1217 if (lm->type[word[i]] == 1) { 1218 for (j=i-1;j>=0;j--) { 1219 if (lm->type[word[j]] != 1) break; 1220 } 1221 for (;j>=0;j--) { 1222 if (lm->type[word[j]] == 1) break; 1223 } 1224 last_syl_start = j; 1225 break; 1226 } 1227 } 1228 1229 #if PRINT_DP_LETTER 1230 pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start); 1231 #endif 1232 1233 if (index > last_syl_start) dp->properties[Syl2InRoot] = 2; 1234 else if (index < first_syl_end) dp->properties[Syl2InRoot] = 0; 1235 else dp->properties[Syl2InRoot] = 1; 1236 1237 1238 dp->properties[Left_DFRE] = index - root_start; 1239 dp->properties[Right_DFRE] = (root_end - index) - 1; 1240 1241 1242 /* pfprintf(PSTDOUT,"strings\n");*/ 1243 #if PRINT_DP_LETTER 1244 pfprintf(PSTDOUT,"input word %s num_strings %d\n", input_word, lts->num_strings); 1245 #endif 1246 1247 dp->properties[RightString] = find_best_string(input_word+index+1, lts); 1248 strcpy(tempstr, input_word); 1249 tempstr[index] = '\0'; 1250 1251 dp->properties[LeftString] = find_best_prefix_string(tempstr, lts); 1252 1253 #if PRINT_DP_LETTER 1254 pfprintf(PSTDOUT,"dp %c ", lm->letters[dp->letter]); 1255 1256 for (i=0;i<word_len;i++) { 1257 pfprintf(PSTDOUT,"%c", lm->letters[word[i]]); 1258 } 1259 pfprintf(PSTDOUT," %c%c%c {%c} %c%c%c liw %d siw %d s2iw %d nw %d sir %d s2ir %d left_DFRE %d right_DFRE %d\n", 1260 lm->letters[dp->left_context[2]], 1261 lm->letters[dp->left_context[1]], 1262 lm->letters[dp->left_context[0]], 1263 lm->letters[dp->letter], 1264 lm->letters[dp->right_context[0]], 1265 lm->letters[dp->right_context[1]], 1266 lm->letters[dp->right_context[2]], 1267 dp->let_in_word, 1268 dp->syl_in_word, 1269 dp->syl2_in_word, 1270 dp->word_len, 1271 dp->syl_in_root, 1272 dp->syl2_in_root, 1273 dp->left_DFRE, dp->right_DFRE); 1274 #endif 1275 1276 return 0; 1277 } 1278 1279 int matches(LQUESTION *q1, LQUESTION *q2, int type, LDP *dp) 1280 { 1281 int m1, m2; 1282 switch(type) { 1283 case 0: 1284 return qmatches(q1, dp); 1285 case 1: 1286 m1 = qmatches(q1, dp); 1287 m2 = qmatches(q2, dp); 1288 return(m1 && m2); 1289 case 2: 1290 m1 = qmatches(q1, dp); 1291 m2 = qmatches(q2, dp); 1292 return(m1 && !m2); 1293 case 3: 1294 m1 = qmatches(q1, dp); 1295 m2 = qmatches(q2, dp); 1296 return(!m1 && m2); 1297 case 4: 1298 m1 = qmatches(q1, dp); 1299 m2 = qmatches(q2, dp); 1300 return(!m1 && !m2); 1301 default: 1302 return -1; 1303 } 1304 /* should not come here */ 1305 return -1; 1306 } 1307 1308 int find_output_for_dp(LTS *lts, int *pbackoff_output) 1309 { 1310 LDP *dp; 1311 int index; 1312 RT_LTREE *tree; 1313 LQUESTION *q1; 1314 LQUESTION *q2; 1315 int comb_type; 1316 int q2_index; 1317 int left_index; 1318 1319 dp = &(lts->dp); 1320 tree = lts->trees[dp->letter]; // properties[Letter]]; 1321 1322 index = 0; 1323 1324 while (1) { 1325 left_index = tree->left_nodes[index]; 1326 1327 if (left_index == NO_NODE) { /*means its a leaf node*/ 1328 *pbackoff_output = tree->question2[index]; 1329 return tree->values_or_question1[index]; 1330 } 1331 q1 = lts->questions[tree->values_or_question1[index]]; 1332 q2_index = tree->question2[index] & 0x1FFF; 1333 comb_type = (tree->question2[index] & 0xE000) >> 13; 1334 1335 q2 = lts->questions[q2_index]; 1336 1337 if (matches(q1, q2, comb_type, dp)) { 1338 index = left_index; 1339 } else { 1340 index = left_index+1; 1341 } 1342 } 1343 } 1344 int add_output(char *output, char **output_phone_string, int out_len, int max_phone_length) 1345 { 1346 char *tok; 1347 int toklen; 1348 char seps[] = " "; 1349 1350 if (strlen(output) == 0) return out_len; 1351 1352 tok = safe_strtok(output, seps, &toklen); 1353 while (tok && toklen) { 1354 if ((toklen > 0) && (strncmp(tok, "null", 4) != 0)) { 1355 1356 if (isdigit(tok[toklen-1])) { 1357 /*means it's a vowel. So, add a syllable boundary. It's position 1358 gets adjusted later by adjust_syllable_boundaries()*/ 1359 strcpy(output_phone_string[out_len++], LTS_MARKER_SYLL_START); 1360 if (out_len >= max_phone_length) return max_phone_length; 1361 } 1362 strncpy(output_phone_string[out_len], tok, toklen); 1363 output_phone_string[out_len++][toklen] = '\0'; 1364 if (out_len >= max_phone_length) return max_phone_length; 1365 } 1366 tok = safe_strtok(tok+toklen, seps, &toklen); 1367 } 1368 return out_len; 1369 } 1370 1371 int is_allowable_cons_comb(LTS *lts, const char *cons_string) 1372 { 1373 /* int i; 1374 for (i=0;i<lts->num_cons_comb;i++) { 1375 #if PRINT_CONS_COMB 1376 pfprintf(PSTDOUT,"checking {%s} vs c[%d] {%s}\n", cons_string, i, lts->allowable_cons_comb[i]); 1377 #endif 1378 if (strcmp(cons_string, lts->allowable_cons_comb[i]) == 0) return 1; 1379 } 1380 return 0; 1381 */ 1382 ESR_ReturnCode rc; 1383 void* iVal = NULL; 1384 rc = PHashTableGetValue( (PHashTable*)lts->allowable_cons_combH, cons_string, &iVal); 1385 if(rc == ESR_SUCCESS) 1386 return 1; 1387 else 1388 return 0; 1389 } 1390 1391 1392 1393 1394 1395 void adjust_syllable_boundaries(LTS *lts, char **output_phone_string, int num_out, int max_phone_length) 1396 { 1397 char *out; 1398 int i,j; 1399 int syl_start; 1400 int stress = 0; 1401 int first_syl_bound; 1402 1403 char tempstr[20]; 1404 1405 /*there should already be a syllable boundary before each vowel (add_output put one there)*/ 1406 /*so just find these, then shift back by allowable consonant combinations and move the syllable mark*/ 1407 1408 for (i=0;i<num_out;i++) { 1409 out = output_phone_string[i]; 1410 if (strcmp(out, LTS_MARKER_SYLL_START) == 0) { /*means there is a syllable boundary 1411 find start of allowable sequence*/ 1412 1413 syl_start = 0; 1414 1415 for (j=i-1;j>0;j--) { 1416 out = output_phone_string[j]; 1417 if (isdigit(out[strlen(out)-1])) { 1418 syl_start = j+1; 1419 break; /*means it's a vowel*/ 1420 } 1421 if (strcmp(out, LTS_MARKER_WORD_START) == 0) { 1422 syl_start = j+1; 1423 break; /*don't push syl boundaries before word boundaries*/ 1424 } 1425 if (strcmp(out, LTS_MARKER_PRON_START) == 0) { 1426 syl_start = j+1; 1427 break; /*don't push syl boundaries before phrase boundaries*/ 1428 } 1429 1430 /* for sequences longer than 2, 1431 check 3-syllable onset first, then check 2-syllable onset */ 1432 if(j > 1){ 1433 sprintf(tempstr, "%s %s %s", output_phone_string[j-2], output_phone_string[j-1], 1434 output_phone_string[j]); 1435 if (!is_allowable_cons_comb(lts, tempstr)) { 1436 sprintf(tempstr, "%s %s", output_phone_string[j-1], output_phone_string[j]); 1437 if (!is_allowable_cons_comb(lts, tempstr)) { 1438 #if PRINT_CONS_COMB 1439 pfprintf(PSTDOUT,"cons comb %s %s not allowed\n", output_phone_string[j-1], 1440 output_phone_string[j]); 1441 #endif 1442 syl_start = j; 1443 break; 1444 } 1445 } 1446 } 1447 /* for sequences shorter than 2 */ 1448 else 1449 { 1450 sprintf(tempstr, "%s %s", output_phone_string[j-1], output_phone_string[j]); 1451 if (!is_allowable_cons_comb(lts, tempstr)) { 1452 #if PRINT_CONS_COMB 1453 pfprintf(PSTDOUT,"cons comb %s %s not allowed\n", output_phone_string[j-1], 1454 output_phone_string[j]); 1455 #endif 1456 syl_start = j; 1457 break; 1458 } 1459 } 1460 } /* end for j=i-1 */ 1461 1462 /*shift over stuff between syl_start a gap*/ 1463 for (j=i;j>syl_start;j--) { 1464 strcpy(output_phone_string[j], output_phone_string[j-1]); 1465 } 1466 /*now find stress level from phone (and remove it) and add it to syl bound*/ 1467 1468 if (i<num_out-1) { 1469 out = output_phone_string[i+1]; 1470 1471 if (isdigit(out[strlen(out)-1])) { 1472 stress = atoi(out + strlen(out)-1); 1473 } else { 1474 stress = 0; /*should not happen*/ 1475 } 1476 } else { 1477 stress = 0; /*should not happen*/ 1478 } 1479 1480 sprintf(output_phone_string[syl_start], LTS_MARKER_SYLL_START_DD, stress); 1481 } /* end if (strcmp(out, LTS_MARKER_SYLL_START) == 0) */ 1482 } /* end for i=0 */ 1483 1484 /*remove all the stress marking from the vowels*/ 1485 for (i=0;i<num_out;i++) { 1486 out = output_phone_string[i]; 1487 if ((strncmp(out, LTS_MARKER_SYLL_START, 2) != 0) && isdigit(out[strlen(out)-1])) { 1488 out[strlen(out)-1] = '\0'; /*remove the stress from the vowel*/ 1489 } 1490 } 1491 1492 /* word boundary must be followed by syllable boundary 1493 if no syllable boundary exists after a word boundary, move the first 1494 syllable boundary to after the word boundary */ 1495 first_syl_bound = -1; 1496 syl_start = -1; 1497 for (i=1;i<num_out;i++) { 1498 if ((strcmp(output_phone_string[i-1], LTS_MARKER_WORD_START) == 0) && 1499 (strncmp(output_phone_string[i], LTS_MARKER_SYLL_START, 2) != 0)) { 1500 1501 syl_start = i; 1502 /* search for first occurance of syllable boundary */ 1503 for(j=syl_start+1;j<num_out; j++){ 1504 out = output_phone_string[j]; 1505 if(strncmp(out, LTS_MARKER_SYLL_START, 2) == 0 && isdigit(out[strlen(out)-1])){ 1506 stress = atoi(out + strlen(out)-1); 1507 first_syl_bound = j; 1508 break; 1509 } 1510 } 1511 1512 /* swap entries until syl bound reaches word bound */ 1513 if(first_syl_bound >= 0){ 1514 for(; j>syl_start; j--){ 1515 strcpy(output_phone_string[j], output_phone_string[j-1]); 1516 } 1517 /* put syllable boundary after word boundary */ 1518 sprintf(output_phone_string[syl_start], LTS_MARKER_SYLL_START_DD, stress); 1519 1520 /* advance i, reset variables */ 1521 i = first_syl_bound; 1522 first_syl_bound = syl_start = -1; 1523 1524 } 1525 } 1526 } 1527 1528 } 1529 1530 1531 SWIsltsResult lts_for_word(LTS *lts, char *word, int word_len, char **output_phone_string, int max_phone_length, int *pnum_out) 1532 { 1533 SWIsltsResult nRes = SWIsltsSuccess; 1534 int i,j; 1535 int root_start; 1536 int root_end; 1537 int output_index; 1538 int left_phone; 1539 char * input_seq; 1540 int found_match; 1541 int start_num_out; 1542 int backoff_output; 1543 int num_out; 1544 1545 start_num_out = num_out = *pnum_out; 1546 1547 root_start = 0; 1548 root_end = word_len; 1549 1550 for (i=0;i<word_len;i++) { 1551 1552 if ((i == 0) || (num_out == 0)) { 1553 /* pfprintf(PSTDOUT,"about to call find_phone1\n");*/ 1554 left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping); 1555 1556 #if PRINT_LTS_WORD 1557 pfprintf(PSTDOUT,"got phone %d for initial | (LTS_MARKER_PIPESEP)\n", left_phone); 1558 #endif 1559 if (left_phone < 0) { 1560 1561 #ifdef NO_STDERR 1562 PrintError("Error, cannot find | in phone mappings\n", NULL, NULL, NULL); 1563 #else 1564 PLogError(L("Error, cannot find | in phone mappings\n")); 1565 #endif 1566 nRes = SWIsltsInternalErr; 1567 goto CLEAN_UP; 1568 } 1569 } else { 1570 1571 #if PRINT_LTS_WORD 1572 pfprintf(PSTDOUT,"about to call find_phone2 num_out %d\n", num_out); 1573 pfprintf(PSTDOUT,"out[%d] %s\n", num_out-1, output_phone_string[num_out-1]); 1574 #endif 1575 1576 if (strcmp(output_phone_string[num_out-1], LTS_MARKER_PRON_START) == 0) left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping); 1577 else if (strcmp(output_phone_string[num_out-1], LTS_MARKER_WORD_START) == 0) left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping); 1578 else left_phone = find_phone(output_phone_string[num_out-1], lts->phone_mapping); 1579 1580 #if PRINT_LTS_WORD 1581 pfprintf(PSTDOUT,"got phone %d for %s\n", left_phone, output_phone_string[num_out-1]); 1582 #endif 1583 1584 if (left_phone < 0) { 1585 1586 #ifdef NO_STDERR 1587 PrintError("Error, cannot find %s in phone mappings\n", (unsigned long)output_phone_string[num_out-1], NULL, NULL); 1588 #else 1589 PLogError(L("Error, cannot find %s in phone mappings\n"), output_phone_string[num_out-1]); 1590 #endif 1591 nRes = SWIsltsInternalErr; 1592 goto CLEAN_UP; 1593 } 1594 } 1595 1596 /* pfprintf(PSTDOUT,"calling fill up dp\n");*/ 1597 if (fill_up_dp_for_letter(lts, word, word_len, i, root_start, root_end, left_phone)) continue; 1598 1599 /* pfprintf(PSTDOUT,"calling find output\n");*/ 1600 output_index = find_output_for_dp(lts, &backoff_output); 1601 1602 #if PRINT_LTS_WORD 1603 pfprintf(PSTDOUT,"got output %d\n", output_index); 1604 #endif 1605 1606 found_match = 1; 1607 1608 if (strlen(lts->input_for_output[output_index]) > 0) { 1609 /*some extra input string to use up*/ 1610 #if PRINT_LTS_WORD 1611 pfprintf(PSTDOUT,"GOT INPUT %s for %s letter %c\n", lts->input_for_output[output_index], word, word[i]); 1612 #endif 1613 1614 input_seq = lts->input_for_output[output_index]; 1615 if (input_seq[0] == '=') { 1616 root_end = i; 1617 input_seq = input_seq+1; /*skip suffix indicator*/ 1618 } 1619 for (j=i+1;;j++) { 1620 if (input_seq[j-(i+1)] == '\0') break; 1621 if (input_seq[j-(i+1)] == '-') { 1622 root_start = j; 1623 break; 1624 } 1625 if (j >= word_len) { 1626 found_match = 0; 1627 break; 1628 } 1629 1630 if (input_seq[j-(i+1)] != word[j]) { 1631 found_match = 0; 1632 break; 1633 } 1634 } 1635 if (found_match) { 1636 i = j-1; 1637 } 1638 } 1639 1640 if (!found_match) { 1641 #if PRINT_LTS_WORD 1642 pfprintf(PSTDOUT,"using backoff output %s instead of regular %s\n", 1643 lts->outputs[backoff_output], 1644 ts->outputs[output_index]); 1645 #endif 1646 1647 num_out = add_output(lts->outputs[backoff_output], output_phone_string, num_out, max_phone_length); 1648 } 1649 else { 1650 num_out = add_output(lts->outputs[output_index], output_phone_string, num_out, max_phone_length); 1651 } 1652 if (num_out >= max_phone_length) { 1653 nRes = SWIsltsMaxInputExceeded; 1654 goto CLEAN_UP; 1655 } 1656 } 1657 1658 *pnum_out = num_out; 1659 return SWIsltsSuccess; 1660 1661 CLEAN_UP: 1662 1663 *pnum_out = 0; 1664 return nRes; 1665 } 1666 1667 1668 1669 SWIsltsResult run_lts(LTS_HANDLE h, FSM_DICT_HANDLE hdict, char *input_sentence, char **output_phone_string, int *phone_length) 1670 { 1671 SWIsltsResult nRes = SWIsltsSuccess; 1672 int i; 1673 int len; 1674 int num_out = 0; 1675 LTS * lts; 1676 int was_in_phrase; 1677 char word[MAX_WORD_LEN]; 1678 int num_in_word; 1679 int max_phone_length; 1680 int pron_len; 1681 1682 max_phone_length = *phone_length; 1683 1684 len = strlen(input_sentence); 1685 1686 lts = (LTS*) h; 1687 1688 was_in_phrase = 0; 1689 1690 /*add a phrase start then word start at beginning*/ 1691 1692 strcpy(output_phone_string[num_out++], LTS_MARKER_PRON_START); 1693 if (num_out >= max_phone_length) { 1694 nRes = SWIsltsMaxInputExceeded; 1695 goto CLEAN_UP; 1696 } 1697 1698 num_in_word = 0; 1699 pron_len = 1; // for the first time through 1700 1701 for (i=0;i<=len;i++) { 1702 1703 #if PRINT_LTS_WORD 1704 pfprintf(PSTDOUT,"WORKING on letter %d %c\n", i, input_sentence[i]); 1705 #endif 1706 1707 /* Treat hyphen as word delimiter. Not quite right for German 1708 hyphenated compounds, but still an improvement. */ 1709 if ((input_sentence[i] == ' ') || (input_sentence[i] == '-') || (input_sentence[i] == '\t') || (i == len)) { 1710 if (num_in_word>0 ) { 1711 strcpy(output_phone_string[num_out++], LTS_MARKER_WORD_START); 1712 if (num_out >= max_phone_length) { 1713 nRes = SWIsltsMaxInputExceeded; 1714 goto CLEAN_UP; 1715 } 1716 1717 word[num_in_word] = '\0'; 1718 1719 if (1) { 1720 1721 #if PRINT_DICT_LOOKUP 1722 pfprintf(PSTDOUT,"Did not find %s in dictionary\n", word); 1723 #endif 1724 pron_len = -num_out; 1725 nRes = lts_for_word(lts, word, num_in_word, output_phone_string, max_phone_length, &num_out); 1726 pron_len += num_out; // now pron_len is the number of phonemes/markers added 1727 if(pron_len == 0) 1728 num_out--; // to backspace on the LTS_MARKER_WORD_START !! 1729 if (nRes != SWIsltsSuccess) { 1730 goto CLEAN_UP; 1731 } 1732 } 1733 num_in_word = 0; 1734 } 1735 } 1736 else if ( (input_sentence[i] == '.') 1737 || (input_sentence[i] == ',') 1738 || (input_sentence[i] == '!') 1739 || (input_sentence[i] == '?') 1740 || (input_sentence[i] == '\n')) { 1741 if (was_in_phrase) { 1742 /*add a phrase boundary after lts is called*/ 1743 if (num_in_word > 0) { 1744 strcpy(output_phone_string[num_out++], LTS_MARKER_WORD_START); 1745 if (num_out >= max_phone_length) { 1746 nRes = SWIsltsMaxInputExceeded; 1747 goto CLEAN_UP; 1748 } 1749 1750 word[num_in_word] = '\0'; 1751 1752 if (1) { 1753 nRes = lts_for_word(lts, word, num_in_word, output_phone_string, max_phone_length, &num_out); 1754 if (nRes != SWIsltsSuccess) { 1755 goto CLEAN_UP; 1756 } 1757 } 1758 num_in_word = 0; 1759 } 1760 strcpy(output_phone_string[num_out++], LTS_MARKER_PRON_START); 1761 if (num_out >= max_phone_length) { 1762 nRes = SWIsltsMaxInputExceeded; 1763 goto CLEAN_UP; 1764 } 1765 was_in_phrase = 0; 1766 } 1767 } 1768 else { 1769 if (num_in_word < MAX_WORD_LEN-1) { 1770 word[num_in_word++] = toupper(input_sentence[i]); 1771 was_in_phrase = 1; 1772 } 1773 } 1774 } 1775 /*adjust syllable boundaries*/ 1776 adjust_syllable_boundaries(lts, output_phone_string, num_out, max_phone_length); 1777 1778 *phone_length = num_out; 1779 return SWIsltsSuccess; 1780 1781 CLEAN_UP: 1782 1783 *phone_length = 0; 1784 return nRes; 1785 } 1786 1787 #ifdef USE_STATIC_SLTS 1788 void *lts_alloc(int num, int size) 1789 { 1790 #ifdef NO_STDERR 1791 PrintError("USE_STATIC_SLTS: lts_alloc should not be called", NULL, NULL, NULL); 1792 #else 1793 PLogError(L("USE_STATIC_SLTS: lts_alloc should not be called")); 1794 #endif 1795 return NULL; 1796 } 1797 #else 1798 1799 void *lts_alloc(int num, int size) 1800 { 1801 void *p; 1802 p = CALLOC(num, size, MTAG); 1803 return p; 1804 } 1805 #endif /* USE_STATIC_SLTS */ 1806