Home | History | Annotate | Download | only in src
      1 /*---------------------------------------------------------------------------*
      2  *  run_seq_lts.c  *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 
     21 
     22 #include <stdlib.h>
     23 #include <string.h>
     24 #include <math.h>
     25 #include <ctype.h>
     26 
     27 #ifndef NO_STDERR
     28 #include <stdio.h>
     29 #else
     30 extern void PrintError(char *msg, unsigned long p1, unsigned long p2, unsigned long p3);
     31 #endif
     32 
     33 #include "passert.h"
     34 #include "pmemory.h"
     35 #include "plog.h"
     36 #include "phashtable.h"
     37 #include "lts_error.h"
     38 #include "lts.h"
     39 #include "lts_seq_internal.h"
     40 #include "port_fileio.h"
     41 #include "platform_utils.h" /* strdup, safe_strtok, etc */
     42 
     43 #define ASSERT(x) passert(x)
     44 
     45 #ifdef TI_DSP
     46 #include "tidsp_defines.h"
     47 #endif
     48 
     49 #ifdef _DEBUG
     50 #define PRINT_LOAD_TREE_SUMMARY 0
     51 #define PRINT_LOAD_TREE 0
     52 #define PRINT_CONS_COMB 0
     53 #define PRINT_DP_LETTER 0
     54 #define PRINT_LTS_WORD 0
     55 #define PRINT_DICT_LOOKUP 0
     56 #endif
     57 
     58 #define LTS_MARKER_WORD_START "WS"
     59 #define LTS_MARKER_PRON_START "PS"
     60 #define LTS_MARKER_SYLL_START "SS"
     61 #define LTS_MARKER_SYLL_START_DD "SS%d"
     62 #define LTS_MARKER_PIPESEP "|"
     63 #define LTS_MARKER_PIPESEP_CHAR '|'
     64 
     65 static int load_int(PORT_FILE *fp);
     66 static SWIsltsResult load_lquestions(LQUESTION ***pquestions, int *pnum_questions, PORT_FILE *fp);
     67 static SWIsltsResult free_lquestions(LQUESTION ** questions, int num_questions);
     68 static SWIsltsResult load_letter_mapping(PORT_FILE *fp, LM **ppLetterMap);
     69 static SWIsltsResult free_letter_mapping(LM *lm);
     70 static SWIsltsResult load_phone_mapping(PORT_FILE *fp, PM **ppPhoneMap);
     71 static SWIsltsResult free_phone_mapping(PM *pm);
     72 static SWIsltsResult load_outputs(char ***poutputs, char ***pinputs, int *pnum, PORT_FILE *fp);
     73 static SWIsltsResult free_outputs(char **outputs, char **inputs, int num);
     74 static SWIsltsResult load_trees(RT_LTREE ***ptrees, int *num_letters,
     75                               LQUESTION ***pquestions, int *num_questions, LM **plm, PORT_FILE *fp);
     76 static SWIsltsResult free_trees(RT_LTREE **trees, int num_letters, LQUESTION **questions, int num_questions, LM *lm);
     77 static SWIsltsResult load_allowable_cons_comb(LTS *lts, PORT_FILE *fp);
     78 static SWIsltsResult free_allowable_cons_comb(LTS *lts);
     79 static SWIsltsResult load_question_strings(LTS* lts, PORT_FILE* fp);
     80 static SWIsltsResult free_question_strings(LTS* lts);
     81 #define find_letter_index( myLet, myLM) (myLM->letter_index_for_letter[ toupper(myLet)])
     82 int find_phone(const char *ph, PM *pm);
     83 int find_best_string(const char *str, LTS* lts);
     84 int find_best_prefix_string(const char *str, LTS* lts);
     85 int fill_up_dp_for_letter(LTS *lts, const char *input_word, int word_len, int index, int root_start, int root_end, int left_phone);
     86 #define in_list(myV, myQ)   (bitarray_read_bit( myQ->membership, myV))
     87 #define qmatches(myQ, myU)  (in_list( myU->properties[ myQ->type], myQ))
     88 int matches(LQUESTION *q1, LQUESTION *q2, int type, LDP *dp) ;
     89 int find_output_for_dp(LTS *lts, int *pbackoff_output);
     90 int add_output(char *output, char **output_phone_string, int out_len, int max_phone_length);
     91 int is_allowable_cons_comb(LTS *lts, const char *cons_string);
     92 void adjust_syllable_boundaries(LTS *lts, char **output_phone_string, int num_out, int max_phone_length);
     93 SWIsltsResult lts_for_word(LTS *lts, char *word, int word_len, char **output_phone_string, int max_phone_length, int *num_out);
     94 
     95 /*------------
     96  *
     97  * bitarray
     98  *
     99  *-----------*/
    100 
    101 #define bitarray_read_bit( biTs, iBiT) ( biTs[iBiT/16] & (1<<((iBiT)%16)) )
    102 /* int bitarray_read_bit( unsigned short* bits, int iBit)
    103    {  // ASSERT( iBit<256);
    104    return bits[iBit/16] & (1<<((iBit)%16));
    105    } */
    106 
    107 void bitarray_write_bit( unsigned short* bits, int iBit, int iVal)
    108 {
    109   unsigned short sect;
    110   ASSERT( iBit<256);
    111   sect = bits[iBit/16];
    112   if(iVal) { sect |= (1<<(iBit%16)); }
    113   else { sect &= ~(1<<(iBit%16)); }
    114   bits[ iBit/16] = sect;
    115 }
    116 void bitarray_populate_from_list(unsigned short* bits, char* list, int listlen)
    117 {
    118   unsigned int i;
    119   for(i=0; i<UCHAR_MAX/sizeof(unsigned short)/8; i++)
    120     bits[i] = 0;
    121   for(i=0; i<(unsigned int)listlen; i++)
    122     bitarray_write_bit( bits, list[i], 1);
    123 }
    124 
    125 /*-----------
    126  *
    127  * PHashTable
    128  *
    129  *-----------*/
    130 
    131 static int HashCmpWord(const LCHAR *key1, const LCHAR *key2)
    132 { return strcmp((const char*)key1,(const char*)key2); }
    133 static unsigned int HashGetCode(const void *key)
    134 {
    135   const char* k = (const char*)key;
    136   unsigned int i, len, h = 0;
    137   len = strlen(k);
    138   for (i=0; i<len; i++) h = 31*h + (unsigned int)k[i];
    139   return h;
    140 }
    141 void* my_PHashTableCreate_FromStrings( const char* strings[], int num_strings,
    142 				       const LCHAR* hashName)
    143 {
    144   PHashTable* table = NULL;
    145   ESR_ReturnCode       rc = ESR_SUCCESS;
    146   PHashTableArgs       hashArgs;
    147   int i;
    148   hashArgs.capacity = 63;
    149   hashArgs.compFunction = HashCmpWord; // PHASH_TABLE_DEFAULT_COMP_FUNCTION;
    150   hashArgs.hashFunction = HashGetCode; // PHASH_TABLE_DEFAULT_HASH_FUNCTION;
    151   hashArgs.maxLoadFactor = PHASH_TABLE_DEFAULT_MAX_LOAD_FACTOR;
    152   rc = PHashTableCreate( &hashArgs, hashName, &table);
    153   for(i=0; i<num_strings; i++) {
    154     void* old;
    155     /* formerly the code used linear lookup, so let's avoid dups to match up */
    156     rc = PHashTableGetValue( table, strings[i], (void**)&old);
    157     if(rc != ESR_SUCCESS) {
    158       rc = PHashTablePutValue( table, strings[i], (const void *)i, NULL );
    159     }
    160   }
    161   return table;
    162 }
    163 
    164 /*---------
    165  *
    166  * i/o
    167  *
    168  *---------*/
    169 
    170 static int load_int(PORT_FILE *fp)
    171 {
    172   int v;
    173 
    174   PORT_FREAD_INT16((uint16 *)&v, sizeof(int), 1, fp);
    175 
    176   return v;
    177 }
    178 
    179 static SWIsltsResult load_lquestions(LQUESTION ***pquestions, int *pnum_questions, PORT_FILE *fp)
    180 {
    181   int                  i, num_questions;
    182   LQUESTION         ** questions;
    183   SWIsltsResult          nRes = SWIsltsSuccess;
    184 
    185   num_questions = load_int(fp);
    186 
    187 #if PRINT_LOAD_TREE_SUMMARY
    188   pfprintf(PSTDOUT,"loading %d questions\n", num_questions);
    189 #endif
    190 
    191   *pquestions = questions = (LQUESTION**) lts_alloc(num_questions, sizeof(LQUESTION*));
    192   if (questions == NULL) {
    193     nRes = SWIsltsErrAllocResource;
    194     goto CLEAN_UP;
    195   }
    196 
    197   for (i=0;i<num_questions;i++) {
    198     questions[i] = (LQUESTION*) lts_alloc(1, sizeof(LQUESTION));
    199     if (questions[i] == NULL) {
    200       nRes = SWIsltsErrAllocResource;
    201       goto CLEAN_UP;
    202     }
    203 
    204 #if PRINT_LOAD_TREE
    205     pfprintf(PSTDOUT,"LOAD_TREE: loading question %d\n", i);
    206 #endif
    207 
    208     PORT_FREAD_CHAR(&(questions[i]->type), sizeof(char), 1, fp);
    209     PORT_FREAD_CHAR(&(questions[i]->num_list), sizeof(char), 1, fp);
    210 
    211     questions[i]->list = (unsigned char*) lts_alloc(questions[i]->num_list, sizeof(unsigned char));
    212     if (questions[i]->list == NULL) {
    213       nRes = SWIsltsErrAllocResource;
    214       goto CLEAN_UP;
    215     }
    216 
    217     PORT_FREAD_CHAR(questions[i]->list, sizeof(char), (questions[i]->num_list), fp);
    218 
    219     bitarray_populate_from_list( questions[i]->membership, (char*) questions[i]->list, questions[i]->num_list);
    220   }
    221 
    222   *pnum_questions = num_questions;
    223   return SWIsltsSuccess;
    224 
    225  CLEAN_UP:
    226 
    227   free_lquestions(questions, num_questions);
    228   *pnum_questions = 0;
    229   *pquestions = NULL;
    230   return nRes;
    231 }
    232 
    233 /* deallocate questions */
    234 static SWIsltsResult free_lquestions(LQUESTION ** questions, int num_questions)
    235 {
    236   SWIsltsResult          nRes = SWIsltsSuccess;
    237   int                  i;
    238 
    239   if (questions) {
    240     for (i=0; i<num_questions; i++) {
    241       if (questions[i]->list) {
    242         FREE(questions[i]->list);
    243         questions[i]->list = NULL;
    244       }
    245       FREE(questions[i]);
    246       questions[i] = NULL;
    247     }
    248     FREE(questions);
    249   }
    250   return nRes;
    251 }
    252 
    253 static SWIsltsResult load_letter_mapping(PORT_FILE *fp, LM **ppLetterMap)
    254 {
    255   SWIsltsResult          nRes = SWIsltsSuccess;
    256   unsigned char        len;
    257   LM                 * lm;
    258   int                  i;
    259 
    260   /*  pfprintf(PSTDOUT,"got len %d\n", len);*/
    261   lm = (LM*) lts_alloc(1, sizeof(LM));
    262   if (lm == NULL) {
    263     nRes = SWIsltsErrAllocResource;
    264     goto CLEAN_UP;
    265   }
    266 
    267   PORT_FREAD_CHAR(&len, sizeof(char), 1, fp);
    268   lm->num_letters = len;
    269 
    270   lm->letters = (char*) lts_alloc(len, sizeof(char));
    271   if (lm->letters == NULL) {
    272     nRes = SWIsltsErrAllocResource;
    273     goto CLEAN_UP;
    274   }
    275 
    276   lm->type = (char*) lts_alloc(len, sizeof(char));
    277   if (lm->type == NULL) {
    278     nRes = SWIsltsErrAllocResource;
    279     goto CLEAN_UP;
    280   }
    281 
    282   PORT_FREAD_CHAR(lm->letters, sizeof(char), len, fp);
    283   PORT_FREAD_CHAR(lm->type, sizeof(char), len, fp);
    284 
    285   {
    286     unsigned int letter;
    287     for (letter=0; letter <= UCHAR_MAX; letter++)
    288       lm->letter_index_for_letter[letter] = LTS_MAXCHAR;
    289   }
    290 
    291   for (i=0;i<len;i++) {
    292     char letter = toupper(lm->letters[i]);
    293     lm->letters[i] = letter;
    294     lm->letter_index_for_letter[(unsigned char)letter] = i;
    295   }
    296   *ppLetterMap = lm;
    297   return SWIsltsSuccess;
    298 
    299  CLEAN_UP:
    300   free_letter_mapping(lm);
    301   *ppLetterMap = NULL;
    302   return nRes;
    303 }
    304 
    305 /* deallocate letter mapping */
    306 static SWIsltsResult free_letter_mapping(LM *lm)
    307 {
    308   SWIsltsResult          nRes = SWIsltsSuccess;
    309 
    310   if (lm) {
    311     if (lm->letters) {
    312       FREE(lm->letters);
    313       lm->letters = NULL;
    314     }
    315     if (lm->type) {
    316       FREE(lm->type);
    317       lm->type = NULL;
    318     }
    319     lm->num_letters = 0;
    320     FREE(lm);
    321   }
    322   return nRes;
    323 }
    324 
    325 static SWIsltsResult load_phone_mapping(PORT_FILE *fp, PM **ppPhoneMap)
    326 {
    327   SWIsltsResult          nRes = SWIsltsSuccess;
    328   PM                 * pm;
    329   int                  i;
    330   unsigned char        len;
    331   char               * ph;
    332 
    333   pm = (PM*) lts_alloc(1, sizeof(PM));
    334   if (pm == NULL) {
    335     nRes = SWIsltsErrAllocResource;
    336     goto CLEAN_UP;
    337   }
    338 
    339   pm->num_phones = load_int(fp);
    340 
    341   pm->phones = (char**) lts_alloc(pm->num_phones, sizeof(char*));
    342   if (pm->phones == NULL) {
    343     nRes = SWIsltsErrAllocResource;
    344     goto CLEAN_UP;
    345   }
    346 
    347   for (i=0;i<pm->num_phones;i++) {
    348     PORT_FREAD_CHAR(&len, sizeof(unsigned char), 1, fp);
    349 
    350     pm->phoneH = NULL;
    351     pm->phones[i] = ph = (char*) lts_alloc(len+1, sizeof(char));
    352     if (ph == NULL) {
    353       nRes = SWIsltsErrAllocResource;
    354       goto CLEAN_UP;
    355     }
    356 
    357     PORT_FREAD_CHAR(ph, sizeof(char), len, fp);
    358     ph[len] = '\0';
    359   }
    360   pm->phoneH = my_PHashTableCreate_FromStrings( (const char**)pm->phones,
    361 						pm->num_phones,
    362 						L("lts.phoneH"));
    363   if(pm->phoneH == NULL) {
    364     nRes = SWIsltsErrAllocResource;
    365     goto CLEAN_UP;
    366   }
    367   *ppPhoneMap = pm;
    368   return SWIsltsSuccess;
    369 
    370  CLEAN_UP:
    371   free_phone_mapping(pm);
    372   *ppPhoneMap = NULL;
    373 
    374   return nRes;
    375 }
    376 
    377 /* deallocate phone mapping */
    378 static SWIsltsResult free_phone_mapping(PM *pm)
    379 {
    380   SWIsltsResult          nRes = SWIsltsSuccess;
    381   int                  i;
    382 
    383   if (pm) {
    384     if (pm->phones) {
    385       for (i=0; i<pm->num_phones; i++) {
    386         if (pm->phones[i]) {
    387           FREE(pm->phones[i]);
    388           pm->phones[i] = NULL;
    389         }
    390       }
    391       FREE(pm->phones);
    392       pm->phones = NULL;
    393     }
    394     if(pm->phoneH)
    395       PHashTableDestroy( (PHashTable*)pm->phoneH);
    396     pm->phoneH = NULL;
    397     FREE(pm);
    398   }
    399   return nRes;
    400 }
    401 
    402 
    403 static SWIsltsResult load_outputs(char ***poutputs, char ***pinputs, int *pnum, PORT_FILE *fp)
    404 {
    405   SWIsltsResult        nRes = SWIsltsSuccess;
    406   int                  i;
    407   char              ** outputs = NULL;
    408   char              ** inputs = NULL;
    409   int                  num;
    410   unsigned char        olen;
    411   char               * out;
    412   unsigned char        ilen;
    413   char               * in;
    414 
    415   num = load_int(fp);
    416 
    417   *poutputs = outputs = (char **) lts_alloc(num, sizeof(char*));
    418   if (outputs == NULL) {
    419     nRes = SWIsltsErrAllocResource;
    420     goto CLEAN_UP;
    421   }
    422 
    423   *pinputs = inputs = (char **) lts_alloc(num, sizeof(char*));
    424   if (inputs == NULL) {
    425     nRes = SWIsltsErrAllocResource;
    426     goto CLEAN_UP;
    427   }
    428 
    429   for (i=0;i<num;i++) {
    430     PORT_FREAD_CHAR(&olen, sizeof(char), 1, fp);
    431     out = outputs[i] = lts_alloc(olen + 1, sizeof(char));
    432     if (out == NULL) {
    433       nRes = SWIsltsErrAllocResource;
    434       goto CLEAN_UP;
    435     }
    436 
    437     if (olen > 0) {
    438       PORT_FREAD_CHAR(out, sizeof(char), olen, fp);
    439     }
    440     out[olen] = '\0';
    441     PORT_FREAD_CHAR(&ilen, sizeof(char), 1, fp);
    442     in = inputs[i] = lts_alloc(ilen + 1, sizeof(char));
    443     if (in == NULL) {
    444       nRes = SWIsltsErrAllocResource;
    445       goto CLEAN_UP;
    446     }
    447 
    448     if (ilen > 0) {
    449       PORT_FREAD_CHAR(in, sizeof(char), ilen, fp);
    450     }
    451     in[ilen] = '\0';
    452 #if PRINT_LOAD_TREE
    453     if (ilen > 0) pfprintf(PSTDOUT,"LOAD_TREE: got input %s out %s\n", in, outputs[i]);
    454     pfprintf(PSTDOUT,"LOAD_TREE: outputs[%d] len %d out %x out %s\n", i, olen, outputs[i], outputs[i]);
    455 #endif
    456   }
    457 
    458   *pnum = num;
    459   return SWIsltsSuccess;
    460 
    461  CLEAN_UP:
    462 
    463   free_outputs(outputs, inputs, num);
    464   *poutputs = NULL;
    465   *pinputs = NULL;
    466   *pnum = 0;
    467 
    468   return nRes;
    469 }
    470 
    471 static SWIsltsResult free_outputs(char **outputs, char **inputs, int num)
    472 {
    473   SWIsltsResult          nRes = SWIsltsSuccess;
    474   int                  i;
    475 
    476   if (outputs) {
    477     for (i=0; i<num; i++) {
    478       if (outputs[i]) {
    479         FREE(outputs[i]);
    480         outputs[i] = NULL;
    481       }
    482     }
    483     FREE(outputs);
    484   }
    485 
    486   if (inputs) {
    487     for (i=0; i<num; i++) {
    488       if (inputs[i]) {
    489         FREE(inputs[i]);
    490         inputs[i] = NULL;
    491       }
    492     }
    493     FREE(inputs);
    494   }
    495   return nRes;
    496 }
    497 
    498 static SWIsltsResult load_trees(RT_LTREE ***ptrees, int *num_letters,
    499                       LQUESTION ***pquestions, int *num_questions, LM **plm, PORT_FILE *fp)
    500 {
    501   SWIsltsResult          nRes = SWIsltsSuccess;
    502   int                  let, i;
    503   RT_LTREE           * tree = NULL;
    504   RT_LTREE          ** trees = NULL;
    505 
    506 #if PRINT_LOAD_TREE_SUMMARY
    507   pfprintf(PSTDOUT,"loading letter mapping\n");
    508 #endif
    509   *ptrees = NULL;
    510   *pquestions = NULL;
    511   *plm = NULL;
    512 
    513   nRes = load_letter_mapping(fp, plm);
    514   if (nRes != SWIsltsSuccess) {
    515     goto CLEAN_UP;
    516   }
    517 
    518 #if PRINT_LOAD_TREE_SUMMARY
    519   pfprintf(PSTDOUT,"loading questions\n");
    520 #endif
    521 
    522   nRes = load_lquestions(pquestions, num_questions, fp);
    523   if (nRes != SWIsltsSuccess) {
    524     goto CLEAN_UP;
    525   }
    526 
    527   *num_letters = load_int(fp);
    528 
    529   if (*num_letters != (*plm)->num_letters) {
    530 #ifndef NO_STDERR
    531     PLogError(L("Error loading data, num_letters %d doesn't match num from mapping %d\n"),
    532             *num_letters, (*plm)->num_letters);
    533 #endif
    534     nRes = SWIsltsInternalErr;
    535     goto CLEAN_UP;
    536   }
    537 
    538   *ptrees = trees = (RT_LTREE**) lts_alloc(*num_letters, sizeof(RT_LTREE*));
    539   if (trees == NULL) {
    540     nRes = SWIsltsErrAllocResource;
    541     goto CLEAN_UP;
    542   }
    543 
    544   for (let=0;let<*num_letters;let++) {
    545     /*    pfprintf(PSTDOUT,"loading for t %d\n", t);*/
    546 
    547     trees[let] = tree = (RT_LTREE*) lts_alloc(1, sizeof(RT_LTREE));
    548     if (tree == NULL) {
    549       nRes = SWIsltsErrAllocResource;
    550       goto CLEAN_UP;
    551     }
    552 
    553     tree->num_nodes = load_int(fp);
    554 
    555     tree->values_or_question1 = (short*) lts_alloc(tree->num_nodes, sizeof(short));
    556     if (tree->values_or_question1 == NULL) {
    557       nRes = SWIsltsErrAllocResource;
    558       goto CLEAN_UP;
    559     }
    560 
    561     tree->question2 = (short*) lts_alloc(tree->num_nodes, sizeof(short));
    562     if (tree->question2 == NULL) {
    563       nRes = SWIsltsErrAllocResource;
    564       goto CLEAN_UP;
    565     }
    566 
    567     tree->left_nodes = (short *) lts_alloc(tree->num_nodes, sizeof(short));
    568     if (tree->left_nodes == NULL) {
    569       nRes = SWIsltsErrAllocResource;
    570       goto CLEAN_UP;
    571     }
    572 
    573 #if PRINT_LOAD_TREE
    574     pfprintf(PSTDOUT,"LOAD_TREE: Tree for let %d num_nodes %d\n", let, tree->num_nodes);
    575 #endif
    576 
    577     for (i=0;i<tree->num_nodes;i++) {
    578       PORT_FREAD_INT16(&(tree->left_nodes[i]), sizeof(short), 1, fp);
    579       PORT_FREAD_INT16(&(tree->values_or_question1[i]), sizeof(short), 1, fp);
    580 
    581 #if PRINT_LOAD_TREE
    582       pfprintf(PSTDOUT,"LOAD_TREE:  node[%d] %d %d", i, tree->left_nodes[i], tree->values_or_question1[i]);
    583 #endif
    584 
    585       PORT_FREAD_INT16(&(tree->question2[i]), sizeof(short), 1, fp);
    586       if (tree->left_nodes[i] != NO_NODE) {
    587         if (tree->question2[i] == -1) tree->question2[i] = 0;
    588 #if PRINT_LOAD_TREE
    589         pfprintf(PSTDOUT," %x", (unsigned short) tree->question2[i]);
    590 #endif
    591       }
    592 
    593 #if PRINT_LOAD_TREE
    594       pfprintf(PSTDOUT,"\n");
    595 #endif
    596     }
    597   }
    598 
    599   return SWIsltsSuccess;
    600 
    601  CLEAN_UP:
    602 
    603   free_trees(trees, *num_letters, *pquestions, *num_questions, *plm);
    604   *ptrees = NULL;
    605   *pquestions = NULL;
    606   *plm = NULL;
    607   *num_letters = 0;
    608   *num_questions = 0;
    609 
    610   return nRes;
    611 }
    612 
    613 /* deallocate trees */
    614 static SWIsltsResult free_trees(RT_LTREE **trees, int num_letters,
    615                        LQUESTION **questions, int num_questions, LM *lm)
    616 {
    617   SWIsltsResult          nRes = SWIsltsSuccess;
    618   int                  i;
    619   RT_LTREE           * tree;
    620 
    621   if (lm) {
    622     free_letter_mapping(lm);
    623   }
    624   if (questions) {
    625     free_lquestions(questions, num_questions);
    626   }
    627 
    628   if (trees) {
    629     for (i=0; i<num_letters; i++) {
    630       if (trees[i]) {
    631         tree = trees[i];
    632         if (tree->values_or_question1) {
    633           FREE(tree->values_or_question1);
    634           tree->values_or_question1 = NULL;
    635         }
    636         if (tree->question2) {
    637           FREE(tree->question2);
    638           tree->question2 = NULL;
    639         }
    640         if (tree->left_nodes) {
    641           FREE(tree->left_nodes);
    642           tree->left_nodes = NULL;
    643         }
    644         FREE(trees[i]);
    645         trees[i] = NULL;
    646       }
    647     }
    648     FREE(trees);
    649   }
    650   return nRes;
    651 }
    652 
    653 static SWIsltsResult load_allowable_cons_comb(LTS *lts, PORT_FILE *fp)
    654 {
    655   SWIsltsResult          nRes = SWIsltsSuccess;
    656   char                line[50];
    657   char                tempstr[50];
    658   char              * tok;
    659   int                 i, toklen;
    660   int                 count;
    661   char          seps[] = " 	\n";
    662 
    663   lts->num_cons_comb = 0;
    664   lts->allowable_cons_combH = NULL;
    665 
    666   while (PORT_FGETS(line, 50, fp)) {
    667 
    668 #ifndef TI_DSP
    669 
    670     /*need to get rid of sme crud at the end of the line because it is being read in binary mode*/
    671     for (i=strlen(line)-1;i>=0;i--) {
    672       if (!isalpha(line[i])) line[i] = ' ';
    673     }
    674 #endif
    675     count = 0;
    676     tok = safe_strtok(line, seps, &toklen);
    677     tempstr[0] = '\0';
    678 
    679     /* get all available sequence of tokens */
    680     while(tok && toklen > 0){
    681       count += toklen;
    682       strncat(tempstr, tok, toklen);
    683       tempstr[count+1] = '\0';
    684       strcat(tempstr, " ");
    685       count++;
    686 
    687       tok = safe_strtok(tok+toklen, seps, &toklen);
    688     }
    689     if (count > 0) {
    690 
    691         /* delete the final space */
    692         tempstr[count-1] = '\0';
    693 
    694         lts->allowable_cons_comb[lts->num_cons_comb] = (char*) lts_alloc(strlen(tempstr)+1, sizeof(char));
    695         if (lts->allowable_cons_comb[lts->num_cons_comb] == NULL) {
    696           nRes = SWIsltsErrAllocResource;
    697           goto CLEAN_UP;
    698         }
    699 
    700         strcpy(lts->allowable_cons_comb[lts->num_cons_comb], tempstr);
    701 
    702 #if PRINT_CONS_COMB
    703         pfprintf(PSTDOUT,"LOAD_TREE: allowable_cons_comb[%d]: %s\n", lts->num_cons_comb, tempstr);
    704 #endif
    705 
    706         lts->num_cons_comb++;
    707         if (lts->num_cons_comb >= MAX_CONS_COMB) {
    708 #ifndef NO_STDERR
    709             PLogError(L("MAX_CONS_COMB %d exceeded\n"), MAX_CONS_COMB);
    710 #endif
    711           nRes = SWIsltsInternalErr;
    712           goto CLEAN_UP;
    713         }
    714     }
    715   }
    716   if (lts->num_cons_comb == 0) {
    717 #ifndef NO_STDERR
    718     PLogError(L("Warning: the data file is missing consonant combinations - syllable boundaries will be incorrect\n"));
    719 #endif
    720   }
    721   lts->allowable_cons_combH = my_PHashTableCreate_FromStrings( (const char**)lts->allowable_cons_comb, lts->num_cons_comb, L("lts.allowable_cons_combH"));
    722   if(lts->allowable_cons_combH == NULL) {
    723     nRes = SWIsltsErrAllocResource;
    724     goto CLEAN_UP;
    725   }
    726 
    727 #if PRINT_LOAD_TREE_SUMMARY
    728   pfprintf(PSTDOUT,"loaded %d cons combinations\n", lts->num_cons_comb);
    729 #endif
    730 
    731   return SWIsltsSuccess;
    732 
    733  CLEAN_UP:
    734 
    735   free_allowable_cons_comb(lts);
    736 
    737   return nRes;
    738 }
    739 
    740 static SWIsltsResult free_allowable_cons_comb(LTS *lts)
    741 {
    742   SWIsltsResult          nRes = SWIsltsSuccess;
    743   int                  i;
    744 
    745   for (i=0; i<lts->num_cons_comb; i++) {
    746     if (lts->allowable_cons_comb[i]) {
    747       FREE(lts->allowable_cons_comb[i]);
    748       lts->allowable_cons_comb[i] = NULL;
    749     }
    750   }
    751   if(lts->allowable_cons_combH)
    752     PHashTableDestroy( (PHashTable*)lts->allowable_cons_combH);
    753   lts->allowable_cons_combH = NULL;
    754   return nRes;
    755 }
    756 
    757 static SWIsltsResult load_question_strings(LTS* lts, PORT_FILE* fp)
    758 {
    759   SWIsltsResult          nRes = SWIsltsSuccess;
    760   int                  i;
    761   int                  num;
    762   unsigned char        len;
    763   char              ** strings;
    764   char               * str;
    765 
    766   num = load_int(fp);
    767 
    768   lts->strings = strings = (char **) lts_alloc(num, sizeof(char*));
    769   lts->string_lens = (char*)lts_alloc(num, sizeof(char));
    770 
    771   if (strings == NULL || lts->string_lens == NULL ) {
    772     nRes = SWIsltsErrAllocResource;
    773     goto CLEAN_UP;
    774   }
    775 
    776   for (i=0;i<num;i++) {
    777     PORT_FREAD_CHAR(&len, sizeof(char), 1, fp);
    778 
    779     str = strings[i] = lts_alloc(len + 1, sizeof(char));
    780     if (str == NULL) {
    781       nRes = SWIsltsErrAllocResource;
    782       goto CLEAN_UP;
    783     }
    784 
    785     if (len > 0) {
    786       PORT_FREAD_CHAR(str, sizeof(char), len, fp);
    787     }
    788     str[len] = '\0';
    789 
    790     bitarray_populate_from_list( lts->membership, lts->strings[i], len);
    791     lts->string_lens[i] = strlen(lts->strings[i]);
    792   }
    793 
    794   // *pnum = num;
    795   lts->num_strings = num;
    796 
    797   return SWIsltsSuccess;
    798 
    799  CLEAN_UP:
    800 
    801   free_question_strings(lts);
    802 
    803   return nRes;
    804 }
    805 
    806 /* deallocate question strings */
    807 static SWIsltsResult free_question_strings(LTS* lts)
    808 {
    809   SWIsltsResult          nRes = SWIsltsSuccess;
    810   int                  i;
    811 
    812   if (lts->strings) {
    813     for (i=0;i<lts->num_strings;i++) {
    814       if (lts->strings[i]) {
    815         FREE(lts->strings[i]);
    816         lts->strings[i] = NULL;
    817       }
    818     }
    819     FREE(lts->strings);
    820     if(lts->string_lens) FREE(lts->string_lens);
    821     lts->strings = NULL;
    822     lts->string_lens = NULL;
    823   }
    824   return nRes;
    825 }
    826 
    827 
    828 SWIsltsResult create_lts(char *data_filename, LTS_HANDLE *phLts)
    829 {
    830   SWIsltsResult          nRes = SWIsltsSuccess;
    831   LTS                * lts;
    832 
    833 #ifdef USE_STATIC_SLTS
    834   /* TODO: language-specific ID here? */
    835   lts = &g_lts;
    836 
    837 #else /* !USE_STATIC_SLTS */
    838 
    839   PORT_FILE *fp;
    840 
    841   lts = (LTS*) lts_alloc(1, sizeof(LTS));
    842   if (lts == NULL) {
    843     nRes = SWIsltsErrAllocResource;
    844     goto CLEAN_UP;
    845   }
    846 
    847   fp = PORT_FOPEN(data_filename, "rb");
    848   if (fp == NULL) {
    849 #ifndef NO_STDERR
    850     PLogError(L("Cannot open %s\n"), data_filename);
    851 #endif
    852     nRes = SWIsltsFileOpenErr;
    853     goto CLEAN_UP;
    854   }
    855    nRes = load_phone_mapping(fp, &lts->phone_mapping);
    856    if (nRes != SWIsltsSuccess) {
    857      PLogError(L("SWIsltsErr: load_phone_mapping() failed: Err_code = %d\n"), nRes);
    858      goto CLEAN_UP;
    859    }
    860 
    861    nRes = load_question_strings(lts, fp);
    862    if (nRes != SWIsltsSuccess) {
    863      PLogError(L("SWIsltsErr: load_question_strings() failed: Err_code = %d\n"), nRes);
    864      goto CLEAN_UP;
    865    }
    866 
    867    nRes  = load_outputs(&(lts->outputs), &(lts->input_for_output), &lts->num_outputs, fp);
    868    if (nRes != SWIsltsSuccess) {
    869      PLogError(L("SWIsltsErr: load_outputs() failed: Err_code = %d\n"), nRes);
    870      goto CLEAN_UP;
    871    }
    872 
    873 #if PRINT_LOAD_TREE
    874   pfprintf(PSTDOUT,"LOAD_TREE: got %d outputs, loading trees\n", lts->num_outputs);
    875 #endif
    876 
    877   nRes = load_trees(&(lts->trees), &(lts->num_letters),
    878                  &(lts->questions), &(lts->num_questions),
    879                  &(lts->letter_mapping),
    880                  fp);
    881   if (nRes != SWIsltsSuccess) {
    882     PLogError(L("SWIsltsErr: load_trees() failed: Err_code = %d\n"), nRes);
    883     goto CLEAN_UP;
    884   }
    885 
    886   nRes = load_allowable_cons_comb(lts, fp);
    887   if (nRes != SWIsltsSuccess) {
    888     PLogError(L("SWIsltsErr: load_allowable_cons_comb() failed: Err_code = %d\n"), nRes);
    889     goto CLEAN_UP;
    890   }
    891 
    892   PORT_FCLOSE(fp);
    893 
    894 #endif /* !USE_STATIC_SLTS */
    895 
    896   *phLts = lts;
    897   return SWIsltsSuccess;
    898 
    899  CLEAN_UP:
    900 
    901   free_lts(lts);
    902   *phLts = NULL;
    903   return nRes;
    904 }
    905 
    906 /* deallocates LTS */
    907 SWIsltsResult free_lts(LTS_HANDLE hlts)
    908 {
    909   SWIsltsResult          nRes = SWIsltsSuccess;
    910   LTS                * lts = (LTS *)hlts;
    911 
    912   if (lts) {
    913 
    914 #ifndef USE_STATIC_SLTS
    915     free_phone_mapping(lts->phone_mapping);
    916     free_question_strings(lts);
    917     lts->strings = NULL;
    918     lts->phone_mapping = NULL;
    919 
    920     free_outputs(lts->outputs, lts->input_for_output, lts->num_outputs);
    921     lts->input_for_output = lts->outputs = NULL;
    922 
    923     free_trees(lts->trees, lts->num_letters,
    924                lts->questions, lts->num_questions,
    925                lts->letter_mapping);
    926     lts->trees = NULL;
    927     lts->questions = NULL;
    928     lts->letter_mapping = NULL;
    929 
    930     free_allowable_cons_comb(lts);
    931     FREE(lts);
    932 #endif /* !USE_STATIC_LTS */
    933   }
    934 
    935   return nRes;
    936 }
    937 
    938 
    939 int find_phone(const char *ph, PM *pm)
    940 {
    941   ESR_ReturnCode rc;
    942   int iRet = -1;
    943   rc = PHashTableGetValue((PHashTable*)pm->phoneH, ph, (void**)(void*)&iRet);
    944   if (rc != ESR_SUCCESS)
    945     PLogError("error while in find_phone(%s,%x)\n", ph, pm);
    946   return iRet;
    947 }
    948 
    949 int find_best_string(const char *str, LTS* lts)
    950 {
    951   int i, maxlen, maxi, len;
    952   int len_str;
    953 
    954   if(str[0] == '\0')   return -1;
    955   len_str = strlen(str);
    956 
    957   maxi = -1;
    958   maxlen = 0;
    959 
    960   for (i=0;i<lts->num_strings;i++) {
    961     len = lts->string_lens[i];
    962     if( len > len_str)
    963       continue; /* no point in comparison */
    964     if (strncmp(str, lts->strings[i], len) == 0) {
    965       if (len > maxlen) {
    966 	maxlen = len;
    967         maxi = i;
    968       }
    969     }
    970   }
    971   return maxi;
    972 }
    973 
    974 int find_best_prefix_string(const char *str, LTS* lts)
    975 {
    976   int i;
    977   int maxlen;
    978   int maxi;
    979   int len;
    980   int prelen;
    981 
    982   maxi = -1;
    983   maxlen = 0;
    984 
    985   prelen = strlen(str);
    986 
    987   for (i=0;i<lts->num_strings;i++) {
    988     len = lts->string_lens[i];
    989     if (len <= prelen) {
    990       if (strncmp(str + (prelen - len), lts->strings[i], len) == 0) {
    991         if (len > maxlen) {
    992           maxlen = len;
    993           maxi = i;
    994         }
    995       }
    996     }
    997   }
    998   return maxi;
    999 }
   1000 
   1001 int fill_up_dp_for_letter(LTS *lts, const char *input_word, int word_len, int index, int root_start, int root_end, int left_phone)
   1002 {
   1003   int i,j;
   1004   LDP *dp;
   1005   unsigned char letter;
   1006   int hit_wb;
   1007   LM *lm;
   1008   unsigned char word[MAX_WORD_LEN];
   1009   char tempstr[MAX_WORD_LEN];
   1010   int first_syl_end;
   1011   int last_syl_start;
   1012 
   1013   dp = &(lts->dp);
   1014   lm = lts->letter_mapping;
   1015 
   1016   /* the LTS decision tree does not seem to be well trained at all for
   1017      the letter ' when followed by "s"  ... It seems to result in the
   1018 	 phoneme 'm', which is wrong.   "'t" seems to be OK though.
   1019 	 BAD: Kevin's : k6v6nmz ...  pal's : palmz ... paul's : p{lz
   1020 	 BAD: janice's : jan6s6mz ... tom's house : t)mmz&h?s ... tonya's : t)ny6mz
   1021 	 BAD: jake's house : jAk6mz&h?s
   1022 	 Ignoring ' as below we get ...
   1023      BETTER: Kevin's : kev6nz  ... pal's : palz ... paul's : p{lz
   1024 	 BETTER: janice's : jan6s6s ... tom's house : t)mz&h?s ... tonya's : t)ny6s
   1025 	 BETTER: jake's house : jAk6s&h?s
   1026 	 The proper solution requires a legitimate text normalizer with special
   1027 	 handling of cases like 's which would always put a "z" there,
   1028 	 except if preceded by an unvoiced stop (ptk) which requires a "s" there.
   1029 	 For now let's just skip the ' letter, which testing shows to be generally
   1030 	 safe (janice's, jake's etc are better but still not quite right). */
   1031 
   1032   if(input_word[index] == '\'')
   1033     return 1; // same as unknown character
   1034 
   1035   letter = find_letter_index(input_word[index], lm);
   1036 
   1037   if (letter == LTS_MAXCHAR) {
   1038   /* lisa - we need to decide how to handle this case.  Do we just silently skip unknown
   1039     characters or warn the app or user somehow*/
   1040 #ifdef NO_STDERR
   1041     PrintError("unknown character on input %c - skipping\n", input_word[index], NULL, NULL);
   1042 #else
   1043     PLogError(L("unknown character on input %c - skipping\n"), input_word[index]);
   1044 #endif
   1045     return 1;
   1046   }
   1047 
   1048   hit_wb = 0;
   1049 
   1050   /*pfprintf(PSTDOUT,"left context\n");*/
   1051 
   1052   for (j=0;j<5;j++) {
   1053     if (hit_wb) {
   1054       dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
   1055     } else {
   1056       i = index - (j+1);
   1057       if (i < 0) dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
   1058       else {
   1059         dp->properties[ Left1+j] = find_letter_index(input_word[i], lm);
   1060         if (dp->properties[ Left1+j] == LTS_MAXCHAR) { /*assume an unknown character is a word boundary*/
   1061           dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
   1062           hit_wb = 1;
   1063         }
   1064       }
   1065     }
   1066   }
   1067 
   1068   /*pfprintf(PSTDOUT,"right context\n");*/
   1069 
   1070   hit_wb = 0;
   1071   for (j=0;j<5;j++) {
   1072     if (hit_wb) {
   1073       dp->properties[ Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
   1074     } else {
   1075       i = index + (j+1);
   1076       if (i >= word_len) dp->properties[Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
   1077       else {
   1078         dp->properties[ Right1+j] = find_letter_index(input_word[i], lm);
   1079         if (dp->properties[ Right1+j] == LTS_MAXCHAR) { /*assume an unknown character is a word boundary*/
   1080           dp->properties[ Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
   1081           hit_wb = 1;
   1082         }
   1083       }
   1084     }
   1085   }
   1086 
   1087   dp->letter = letter; // properties[ Letter] = letter;
   1088 
   1089   dp->properties[ LeftPhone1] = left_phone;
   1090 
   1091   /*pfprintf(PSTDOUT,"word stuff\n"); */
   1092 
   1093   /*find word start and end - use unknown character as word boundaries*/
   1094 
   1095   dp->properties[ WordLen] = word_len;
   1096 
   1097   if (index == 0) dp->properties[ LetInWord] = 0;
   1098   else if (index == word_len-1) dp->properties[ LetInWord] = 2;
   1099   else dp->properties[ LetInWord] = 1;
   1100 
   1101   for (i=0;i<word_len;i++) {
   1102     word[i] = find_letter_index(input_word[i], lm);
   1103   }
   1104 
   1105   /*figure out syllable in word - not really syllables - just looks to see if is or at first or last vowel*/
   1106   /*  pfprintf(PSTDOUT,"syl stuff\n");*/
   1107 
   1108   first_syl_end = word_len;
   1109   for (i=0;i<word_len;i++) {
   1110     if (lm->type[word[i]] == 1) {
   1111       for (j=i+1;j<word_len;j++) {
   1112         if (lm->type[word[j]] != 1) break;
   1113       }
   1114       first_syl_end = j;
   1115       break;
   1116     }
   1117   }
   1118   last_syl_start = 0;
   1119   for (i=word_len-1;i>=0;i--) {
   1120     if (lm->type[word[i]] == 1) {
   1121       for (j=i-1;j>=0;j--) {
   1122         if (lm->type[word[j]] != 1) break;
   1123       }
   1124       last_syl_start = j;
   1125       break;
   1126     }
   1127   }
   1128 
   1129 #if PRINT_DP_LETTER
   1130   pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
   1131 #endif
   1132 
   1133   if (index > last_syl_start) dp->properties[ SylInWord] = 2;
   1134   else if (index < first_syl_end) dp->properties[ SylInWord] = 0;
   1135   else dp->properties[ SylInWord] = 1;
   1136 
   1137   first_syl_end = word_len;
   1138   for (i=0;i<word_len;i++) {
   1139     if (lm->type[word[i]] == 1) {
   1140       for (j=i+1;j<word_len;j++) {
   1141         if (lm->type[word[j]] != 1) break;
   1142       }
   1143       for (;j<word_len;j++) {
   1144         if (lm->type[word[j]] == 1) break;
   1145       }
   1146       first_syl_end = j;
   1147       break;
   1148     }
   1149   }
   1150   last_syl_start = 0;
   1151   for (i=word_len-1;i>=0;i--) {
   1152     if (lm->type[word[i]] == 1) {
   1153       for (j=i-1;j>=0;j--) {
   1154         if (lm->type[word[j]] != 1) break;
   1155       }
   1156       for (;j>=0;j--) {
   1157         if (lm->type[word[j]] == 1) break;
   1158       }
   1159       last_syl_start = j;
   1160       break;
   1161     }
   1162   }
   1163 
   1164 #if PRINT_DP_LETTER
   1165   pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
   1166 #endif
   1167 
   1168   if (index > last_syl_start) dp->properties[ Syl2InWord] = 2;
   1169   else if (index  < first_syl_end) dp->properties[ Syl2InWord] = 0;
   1170   else dp->properties[Syl2InWord] = 1;
   1171 
   1172 
   1173   first_syl_end = word_len;
   1174   for (i=root_start;i<root_end;i++) {
   1175     if (lm->type[word[i]] == 1) {
   1176       for (j=i+1;j<word_len;j++) {
   1177         if (lm->type[word[j]] != 1) break;
   1178       }
   1179       first_syl_end = j;
   1180       break;
   1181     }
   1182   }
   1183   last_syl_start = 0;
   1184   for (i=root_end-1;i>=root_start;i--) {
   1185     if (lm->type[word[i]] == 1) {
   1186       for (j=i-1;j>=0;j--) {
   1187         if (lm->type[word[j]] != 1) break;
   1188       }
   1189       last_syl_start = j;
   1190       break;
   1191     }
   1192   }
   1193 
   1194 #if PRINT_DP_LETTER
   1195   pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
   1196 #endif
   1197 
   1198   if (index > last_syl_start) dp->properties[SylInRoot] = 2;
   1199   else if (index < first_syl_end) dp->properties[ SylInRoot] = 0;
   1200   else dp->properties[ SylInRoot] = 1;
   1201 
   1202   first_syl_end = word_len;
   1203   for (i=root_start;i<root_end;i++) {
   1204     if (lm->type[word[i]] == 1) {
   1205       for (j=i+1;j<word_len;j++) {
   1206         if (lm->type[word[j]] != 1) break;
   1207       }
   1208       for (;j<word_len;j++) {
   1209         if (lm->type[word[j]] == 1) break;
   1210       }
   1211       first_syl_end = j;
   1212       break;
   1213     }
   1214   }
   1215   last_syl_start = 0;
   1216   for (i=root_end-1;i>=root_start;i--) {
   1217     if (lm->type[word[i]] == 1) {
   1218       for (j=i-1;j>=0;j--) {
   1219         if (lm->type[word[j]] != 1) break;
   1220       }
   1221       for (;j>=0;j--) {
   1222         if (lm->type[word[j]] == 1) break;
   1223       }
   1224       last_syl_start = j;
   1225       break;
   1226     }
   1227   }
   1228 
   1229 #if PRINT_DP_LETTER
   1230   pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
   1231 #endif
   1232 
   1233   if (index > last_syl_start) dp->properties[Syl2InRoot] = 2;
   1234   else if (index  < first_syl_end) dp->properties[Syl2InRoot] = 0;
   1235   else dp->properties[Syl2InRoot] = 1;
   1236 
   1237 
   1238   dp->properties[Left_DFRE] = index - root_start;
   1239   dp->properties[Right_DFRE] = (root_end - index) - 1;
   1240 
   1241 
   1242   /*  pfprintf(PSTDOUT,"strings\n");*/
   1243 #if PRINT_DP_LETTER
   1244   pfprintf(PSTDOUT,"input word %s num_strings %d\n", input_word, lts->num_strings);
   1245 #endif
   1246 
   1247   dp->properties[RightString] = find_best_string(input_word+index+1, lts);
   1248   strcpy(tempstr, input_word);
   1249   tempstr[index] = '\0';
   1250 
   1251   dp->properties[LeftString] = find_best_prefix_string(tempstr, lts);
   1252 
   1253 #if PRINT_DP_LETTER
   1254   pfprintf(PSTDOUT,"dp %c ", lm->letters[dp->letter]);
   1255 
   1256   for (i=0;i<word_len;i++) {
   1257     pfprintf(PSTDOUT,"%c", lm->letters[word[i]]);
   1258   }
   1259   pfprintf(PSTDOUT," %c%c%c {%c} %c%c%c liw %d siw %d s2iw %d nw %d sir %d s2ir %d left_DFRE %d right_DFRE %d\n",
   1260          lm->letters[dp->left_context[2]],
   1261          lm->letters[dp->left_context[1]],
   1262          lm->letters[dp->left_context[0]],
   1263          lm->letters[dp->letter],
   1264          lm->letters[dp->right_context[0]],
   1265          lm->letters[dp->right_context[1]],
   1266          lm->letters[dp->right_context[2]],
   1267          dp->let_in_word,
   1268          dp->syl_in_word,
   1269          dp->syl2_in_word,
   1270          dp->word_len,
   1271          dp->syl_in_root,
   1272          dp->syl2_in_root,
   1273          dp->left_DFRE, dp->right_DFRE);
   1274 #endif
   1275 
   1276   return 0;
   1277 }
   1278 
   1279 int matches(LQUESTION *q1, LQUESTION *q2, int type, LDP *dp)
   1280 {
   1281   int m1, m2;
   1282   switch(type) {
   1283   case 0:
   1284     return qmatches(q1, dp);
   1285   case 1:
   1286     m1 = qmatches(q1, dp);
   1287     m2 = qmatches(q2, dp);
   1288     return(m1 && m2);
   1289   case 2:
   1290     m1 = qmatches(q1, dp);
   1291     m2 = qmatches(q2, dp);
   1292     return(m1 && !m2);
   1293   case 3:
   1294     m1 = qmatches(q1, dp);
   1295     m2 = qmatches(q2, dp);
   1296     return(!m1 && m2);
   1297   case 4:
   1298     m1 = qmatches(q1, dp);
   1299     m2 = qmatches(q2, dp);
   1300     return(!m1 && !m2);
   1301   default:
   1302     return -1;
   1303   }
   1304   /* should not come here */
   1305   return -1;
   1306 }
   1307 
   1308 int find_output_for_dp(LTS *lts, int *pbackoff_output)
   1309 {
   1310   LDP *dp;
   1311   int index;
   1312   RT_LTREE *tree;
   1313   LQUESTION *q1;
   1314   LQUESTION *q2;
   1315   int comb_type;
   1316   int q2_index;
   1317   int left_index;
   1318 
   1319   dp = &(lts->dp);
   1320   tree = lts->trees[dp->letter]; // properties[Letter]];
   1321 
   1322   index = 0;
   1323 
   1324   while (1) {
   1325     left_index = tree->left_nodes[index];
   1326 
   1327     if (left_index == NO_NODE) { /*means its a leaf node*/
   1328       *pbackoff_output = tree->question2[index];
   1329       return tree->values_or_question1[index];
   1330     }
   1331     q1 = lts->questions[tree->values_or_question1[index]];
   1332     q2_index = tree->question2[index] & 0x1FFF;
   1333     comb_type = (tree->question2[index] & 0xE000) >> 13;
   1334 
   1335     q2 = lts->questions[q2_index];
   1336 
   1337     if (matches(q1, q2, comb_type, dp)) {
   1338       index = left_index;
   1339     } else {
   1340       index = left_index+1;
   1341     }
   1342   }
   1343 }
   1344 int add_output(char *output, char **output_phone_string, int out_len, int max_phone_length)
   1345 {
   1346   char *tok;
   1347   int toklen;
   1348   char seps[] = " ";
   1349 
   1350   if (strlen(output) == 0) return out_len;
   1351 
   1352   tok = safe_strtok(output, seps, &toklen);
   1353   while (tok && toklen) {
   1354     if ((toklen > 0) && (strncmp(tok, "null", 4) != 0)) {
   1355 
   1356       if (isdigit(tok[toklen-1])) {
   1357         /*means it's a vowel.  So, add a syllable boundary.  It's position
   1358           gets adjusted later by adjust_syllable_boundaries()*/
   1359         strcpy(output_phone_string[out_len++], LTS_MARKER_SYLL_START);
   1360         if (out_len >= max_phone_length) return max_phone_length;
   1361       }
   1362       strncpy(output_phone_string[out_len], tok, toklen);
   1363       output_phone_string[out_len++][toklen] = '\0';
   1364       if (out_len >= max_phone_length) return max_phone_length;
   1365     }
   1366     tok = safe_strtok(tok+toklen, seps, &toklen);
   1367   }
   1368   return out_len;
   1369 }
   1370 
   1371 int is_allowable_cons_comb(LTS *lts, const char *cons_string)
   1372 {
   1373   /* int i;
   1374      for (i=0;i<lts->num_cons_comb;i++) {
   1375      #if PRINT_CONS_COMB
   1376      pfprintf(PSTDOUT,"checking {%s} vs c[%d] {%s}\n", cons_string, i, lts->allowable_cons_comb[i]);
   1377      #endif
   1378      if (strcmp(cons_string, lts->allowable_cons_comb[i]) == 0) return 1;
   1379      }
   1380      return 0;
   1381   */
   1382   ESR_ReturnCode rc;
   1383   void* iVal = NULL;
   1384   rc = PHashTableGetValue( (PHashTable*)lts->allowable_cons_combH, cons_string, &iVal);
   1385   if(rc == ESR_SUCCESS)
   1386     return 1;
   1387   else
   1388     return 0;
   1389 }
   1390 
   1391 
   1392 
   1393 
   1394 
   1395 void adjust_syllable_boundaries(LTS *lts, char **output_phone_string, int num_out, int max_phone_length)
   1396 {
   1397   char *out;
   1398   int i,j;
   1399   int syl_start;
   1400   int stress = 0;
   1401   int first_syl_bound;
   1402 
   1403   char tempstr[20];
   1404 
   1405   /*there should already be a syllable boundary before each vowel (add_output put one there)*/
   1406   /*so just find these, then shift back by allowable consonant combinations and move the syllable mark*/
   1407 
   1408   for (i=0;i<num_out;i++) {
   1409     out = output_phone_string[i];
   1410     if (strcmp(out, LTS_MARKER_SYLL_START) == 0) { /*means there is a syllable boundary
   1411       														 find start of allowable sequence*/
   1412 
   1413       syl_start = 0;
   1414 
   1415       for (j=i-1;j>0;j--) {
   1416         out = output_phone_string[j];
   1417         if (isdigit(out[strlen(out)-1])) {
   1418           syl_start = j+1;
   1419           break; /*means it's a vowel*/
   1420         }
   1421         if (strcmp(out, LTS_MARKER_WORD_START) == 0) {
   1422           syl_start = j+1;
   1423           break; /*don't push syl boundaries before word boundaries*/
   1424         }
   1425         if (strcmp(out, LTS_MARKER_PRON_START) == 0) {
   1426           syl_start = j+1;
   1427           break; /*don't push syl boundaries before phrase boundaries*/
   1428         }
   1429 
   1430         /* for sequences longer than 2,
   1431            check 3-syllable onset first, then check 2-syllable onset */
   1432         if(j > 1){
   1433           sprintf(tempstr, "%s %s %s", output_phone_string[j-2], output_phone_string[j-1],
   1434             output_phone_string[j]);
   1435           if (!is_allowable_cons_comb(lts, tempstr)) {
   1436             sprintf(tempstr, "%s %s", output_phone_string[j-1], output_phone_string[j]);
   1437             if (!is_allowable_cons_comb(lts, tempstr)) {
   1438 #if PRINT_CONS_COMB
   1439               pfprintf(PSTDOUT,"cons comb %s %s not allowed\n", output_phone_string[j-1],
   1440                 output_phone_string[j]);
   1441 #endif
   1442               syl_start = j;
   1443               break;
   1444             }
   1445           }
   1446         }
   1447         /* for sequences shorter than 2 */
   1448         else
   1449         {
   1450           sprintf(tempstr, "%s %s", output_phone_string[j-1], output_phone_string[j]);
   1451           if (!is_allowable_cons_comb(lts, tempstr)) {
   1452 #if PRINT_CONS_COMB
   1453             pfprintf(PSTDOUT,"cons comb %s %s not allowed\n", output_phone_string[j-1],
   1454               output_phone_string[j]);
   1455 #endif
   1456             syl_start = j;
   1457             break;
   1458           }
   1459         }
   1460       } /* end for j=i-1 */
   1461 
   1462       /*shift over stuff between syl_start a gap*/
   1463       for (j=i;j>syl_start;j--) {
   1464         strcpy(output_phone_string[j], output_phone_string[j-1]);
   1465       }
   1466       /*now find stress level from phone (and remove it) and add it to syl bound*/
   1467 
   1468       if (i<num_out-1) {
   1469         out = output_phone_string[i+1];
   1470 
   1471         if (isdigit(out[strlen(out)-1])) {
   1472           stress = atoi(out + strlen(out)-1);
   1473         } else {
   1474           stress = 0; /*should not happen*/
   1475         }
   1476       } else {
   1477         stress = 0; /*should not happen*/
   1478       }
   1479 
   1480       sprintf(output_phone_string[syl_start], LTS_MARKER_SYLL_START_DD, stress);
   1481     } /* end if (strcmp(out, LTS_MARKER_SYLL_START) == 0) */
   1482   } /* end for i=0 */
   1483 
   1484   /*remove all the stress marking from the vowels*/
   1485   for (i=0;i<num_out;i++) {
   1486     out = output_phone_string[i];
   1487     if ((strncmp(out, LTS_MARKER_SYLL_START, 2) != 0) && isdigit(out[strlen(out)-1])) {
   1488       out[strlen(out)-1] = '\0'; /*remove the stress from the vowel*/
   1489     }
   1490   }
   1491 
   1492   /* word boundary must be followed by syllable boundary
   1493     if no syllable boundary exists after a word boundary, move the first
   1494     syllable boundary to after the word boundary */
   1495   first_syl_bound = -1;
   1496   syl_start = -1;
   1497   for (i=1;i<num_out;i++) {
   1498     if ((strcmp(output_phone_string[i-1], LTS_MARKER_WORD_START) == 0) &&
   1499       (strncmp(output_phone_string[i], LTS_MARKER_SYLL_START, 2) != 0)) {
   1500 
   1501       syl_start = i;
   1502       /* search for first occurance of syllable boundary */
   1503       for(j=syl_start+1;j<num_out; j++){
   1504         out = output_phone_string[j];
   1505         if(strncmp(out, LTS_MARKER_SYLL_START, 2) == 0 && isdigit(out[strlen(out)-1])){
   1506             stress = atoi(out + strlen(out)-1);
   1507             first_syl_bound = j;
   1508             break;
   1509         }
   1510       }
   1511 
   1512       /* swap entries until syl bound reaches word bound */
   1513       if(first_syl_bound >= 0){
   1514         for(; j>syl_start; j--){
   1515           strcpy(output_phone_string[j], output_phone_string[j-1]);
   1516         }
   1517         /* put syllable boundary after word boundary */
   1518         sprintf(output_phone_string[syl_start], LTS_MARKER_SYLL_START_DD, stress);
   1519 
   1520         /* advance i, reset variables */
   1521         i = first_syl_bound;
   1522         first_syl_bound = syl_start = -1;
   1523 
   1524       }
   1525     }
   1526   }
   1527 
   1528 }
   1529 
   1530 
   1531 SWIsltsResult lts_for_word(LTS *lts, char *word, int word_len, char **output_phone_string, int max_phone_length, int *pnum_out)
   1532 {
   1533   SWIsltsResult          nRes = SWIsltsSuccess;
   1534   int                  i,j;
   1535   int                  root_start;
   1536   int                  root_end;
   1537   int                  output_index;
   1538   int                  left_phone;
   1539   char               * input_seq;
   1540   int                  found_match;
   1541   int                  start_num_out;
   1542   int                  backoff_output;
   1543   int                  num_out;
   1544 
   1545   start_num_out = num_out = *pnum_out;
   1546 
   1547   root_start = 0;
   1548   root_end = word_len;
   1549 
   1550   for (i=0;i<word_len;i++) {
   1551 
   1552     if ((i == 0) || (num_out == 0)) {
   1553       /*      pfprintf(PSTDOUT,"about to call find_phone1\n");*/
   1554       left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping);
   1555 
   1556 #if PRINT_LTS_WORD
   1557       pfprintf(PSTDOUT,"got phone %d for initial | (LTS_MARKER_PIPESEP)\n", left_phone);
   1558 #endif
   1559       if (left_phone < 0) {
   1560 
   1561 #ifdef NO_STDERR
   1562         PrintError("Error, cannot find | in phone mappings\n", NULL, NULL, NULL);
   1563 #else
   1564         PLogError(L("Error, cannot find | in phone mappings\n"));
   1565 #endif
   1566         nRes = SWIsltsInternalErr;
   1567         goto CLEAN_UP;
   1568       }
   1569     } else {
   1570 
   1571 #if PRINT_LTS_WORD
   1572       pfprintf(PSTDOUT,"about to call find_phone2 num_out %d\n", num_out);
   1573       pfprintf(PSTDOUT,"out[%d] %s\n", num_out-1, output_phone_string[num_out-1]);
   1574 #endif
   1575 
   1576       if (strcmp(output_phone_string[num_out-1], LTS_MARKER_PRON_START) == 0) left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping);
   1577       else if (strcmp(output_phone_string[num_out-1], LTS_MARKER_WORD_START) == 0) left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping);
   1578       else left_phone = find_phone(output_phone_string[num_out-1], lts->phone_mapping);
   1579 
   1580 #if PRINT_LTS_WORD
   1581       pfprintf(PSTDOUT,"got phone %d for %s\n", left_phone, output_phone_string[num_out-1]);
   1582 #endif
   1583 
   1584       if (left_phone < 0) {
   1585 
   1586 #ifdef NO_STDERR
   1587         PrintError("Error, cannot find %s in phone mappings\n", (unsigned long)output_phone_string[num_out-1], NULL, NULL);
   1588 #else
   1589         PLogError(L("Error, cannot find %s in phone mappings\n"), output_phone_string[num_out-1]);
   1590 #endif
   1591         nRes = SWIsltsInternalErr;
   1592         goto CLEAN_UP;
   1593       }
   1594     }
   1595 
   1596     /*    pfprintf(PSTDOUT,"calling fill up dp\n");*/
   1597     if (fill_up_dp_for_letter(lts, word, word_len, i, root_start, root_end, left_phone)) continue;
   1598 
   1599     /*    pfprintf(PSTDOUT,"calling find output\n");*/
   1600     output_index = find_output_for_dp(lts, &backoff_output);
   1601 
   1602 #if PRINT_LTS_WORD
   1603     pfprintf(PSTDOUT,"got output %d\n", output_index);
   1604 #endif
   1605 
   1606     found_match = 1;
   1607 
   1608     if (strlen(lts->input_for_output[output_index]) > 0) {
   1609         /*some extra input string to use up*/
   1610 #if PRINT_LTS_WORD
   1611       pfprintf(PSTDOUT,"GOT INPUT %s for %s letter %c\n", lts->input_for_output[output_index], word, word[i]);
   1612 #endif
   1613 
   1614       input_seq = lts->input_for_output[output_index];
   1615       if (input_seq[0] == '=') {
   1616         root_end = i;
   1617         input_seq = input_seq+1; /*skip suffix indicator*/
   1618       }
   1619       for (j=i+1;;j++) {
   1620         if (input_seq[j-(i+1)] == '\0') break;
   1621         if (input_seq[j-(i+1)] == '-') {
   1622           root_start = j;
   1623           break;
   1624         }
   1625         if (j >= word_len) {
   1626           found_match = 0;
   1627           break;
   1628         }
   1629 
   1630         if (input_seq[j-(i+1)] != word[j]) {
   1631           found_match = 0;
   1632           break;
   1633         }
   1634       }
   1635       if (found_match) {
   1636         i = j-1;
   1637       }
   1638     }
   1639 
   1640     if (!found_match) {
   1641 #if PRINT_LTS_WORD
   1642       pfprintf(PSTDOUT,"using backoff output %s instead of regular %s\n",
   1643                lts->outputs[backoff_output],
   1644                ts->outputs[output_index]);
   1645 #endif
   1646 
   1647       num_out = add_output(lts->outputs[backoff_output], output_phone_string, num_out, max_phone_length);
   1648     }
   1649     else {
   1650       num_out = add_output(lts->outputs[output_index], output_phone_string, num_out, max_phone_length);
   1651     }
   1652     if (num_out >= max_phone_length) {
   1653       nRes = SWIsltsMaxInputExceeded;
   1654       goto CLEAN_UP;
   1655     }
   1656   }
   1657 
   1658   *pnum_out = num_out;
   1659   return SWIsltsSuccess;
   1660 
   1661  CLEAN_UP:
   1662 
   1663   *pnum_out = 0;
   1664   return nRes;
   1665 }
   1666 
   1667 
   1668 
   1669 SWIsltsResult run_lts(LTS_HANDLE h, FSM_DICT_HANDLE hdict, char *input_sentence, char **output_phone_string, int *phone_length)
   1670 {
   1671   SWIsltsResult            nRes = SWIsltsSuccess;
   1672   int                    i;
   1673   int                    len;
   1674   int                    num_out = 0;
   1675   LTS                  * lts;
   1676   int                    was_in_phrase;
   1677   char                   word[MAX_WORD_LEN];
   1678   int                    num_in_word;
   1679   int                    max_phone_length;
   1680   int                    pron_len;
   1681 
   1682   max_phone_length = *phone_length;
   1683 
   1684   len = strlen(input_sentence);
   1685 
   1686   lts = (LTS*) h;
   1687 
   1688   was_in_phrase = 0;
   1689 
   1690   /*add a phrase start then word start at beginning*/
   1691 
   1692   strcpy(output_phone_string[num_out++], LTS_MARKER_PRON_START);
   1693   if (num_out >= max_phone_length) {
   1694     nRes = SWIsltsMaxInputExceeded;
   1695     goto CLEAN_UP;
   1696   }
   1697 
   1698   num_in_word = 0;
   1699   pron_len = 1;    // for the first time through
   1700 
   1701   for (i=0;i<=len;i++) {
   1702 
   1703 #if PRINT_LTS_WORD
   1704     pfprintf(PSTDOUT,"WORKING on letter %d %c\n", i, input_sentence[i]);
   1705 #endif
   1706 
   1707     /* Treat hyphen as word delimiter.  Not quite right for German
   1708        hyphenated compounds, but still an improvement. */
   1709     if ((input_sentence[i] == ' ') || (input_sentence[i] == '-') || (input_sentence[i] == '\t') || (i == len)) {
   1710       if (num_in_word>0 ) {
   1711         strcpy(output_phone_string[num_out++], LTS_MARKER_WORD_START);
   1712         if (num_out >= max_phone_length) {
   1713           nRes = SWIsltsMaxInputExceeded;
   1714           goto CLEAN_UP;
   1715         }
   1716 
   1717         word[num_in_word] = '\0';
   1718 
   1719         if (1) {
   1720 
   1721 #if PRINT_DICT_LOOKUP
   1722           pfprintf(PSTDOUT,"Did not find %s in dictionary\n", word);
   1723 #endif
   1724 		  pron_len = -num_out;
   1725           nRes = lts_for_word(lts, word, num_in_word, output_phone_string, max_phone_length, &num_out);
   1726 		  pron_len += num_out; // now pron_len is the number of phonemes/markers added
   1727 		  if(pron_len == 0)
   1728 			  num_out--; // to backspace on the LTS_MARKER_WORD_START !!
   1729           if (nRes != SWIsltsSuccess) {
   1730             goto CLEAN_UP;
   1731           }
   1732         }
   1733         num_in_word = 0;
   1734       }
   1735     }
   1736     else if ( (input_sentence[i] == '.')
   1737                 || (input_sentence[i] == ',')
   1738                 || (input_sentence[i] == '!')
   1739                 || (input_sentence[i] == '?')
   1740                 || (input_sentence[i] == '\n')) {
   1741       if (was_in_phrase) {
   1742         /*add a phrase boundary after lts is called*/
   1743         if (num_in_word > 0) {
   1744           strcpy(output_phone_string[num_out++], LTS_MARKER_WORD_START);
   1745           if (num_out >= max_phone_length) {
   1746             nRes = SWIsltsMaxInputExceeded;
   1747             goto CLEAN_UP;
   1748           }
   1749 
   1750           word[num_in_word] = '\0';
   1751 
   1752           if (1) {
   1753             nRes = lts_for_word(lts, word, num_in_word, output_phone_string, max_phone_length, &num_out);
   1754             if (nRes != SWIsltsSuccess) {
   1755               goto CLEAN_UP;
   1756             }
   1757           }
   1758           num_in_word = 0;
   1759         }
   1760         strcpy(output_phone_string[num_out++], LTS_MARKER_PRON_START);
   1761         if (num_out >= max_phone_length) {
   1762           nRes = SWIsltsMaxInputExceeded;
   1763           goto CLEAN_UP;
   1764         }
   1765         was_in_phrase = 0;
   1766       }
   1767     }
   1768     else {
   1769       if (num_in_word < MAX_WORD_LEN-1) {
   1770         word[num_in_word++] = toupper(input_sentence[i]);
   1771         was_in_phrase = 1;
   1772       }
   1773     }
   1774   }
   1775   /*adjust syllable boundaries*/
   1776   adjust_syllable_boundaries(lts, output_phone_string, num_out, max_phone_length);
   1777 
   1778   *phone_length = num_out;
   1779   return SWIsltsSuccess;
   1780 
   1781  CLEAN_UP:
   1782 
   1783   *phone_length = 0;
   1784   return nRes;
   1785 }
   1786 
   1787 #ifdef USE_STATIC_SLTS
   1788 void *lts_alloc(int num, int size)
   1789 {
   1790 #ifdef NO_STDERR
   1791     PrintError("USE_STATIC_SLTS: lts_alloc should not be called", NULL, NULL, NULL);
   1792 #else
   1793     PLogError(L("USE_STATIC_SLTS: lts_alloc should not be called"));
   1794 #endif
   1795   return NULL;
   1796 }
   1797 #else
   1798 
   1799 void *lts_alloc(int num, int size)
   1800 {
   1801   void *p;
   1802   p = CALLOC(num, size, MTAG);
   1803   return p;
   1804 }
   1805 #endif /* USE_STATIC_SLTS */
   1806