1 /*---------------------------------------------------------------------------* 2 * Vocabulary.c * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 #include "plog.h" 21 #include "SR_Vocabulary.h" 22 #include "SR_VocabularyImpl.h" 23 24 25 ESR_ReturnCode SR_VocabularyCreate(ESR_Locale locale, SR_Vocabulary** self) 26 { 27 SR_Vocabulary* Interface; 28 SR_VocabularyImpl* impl; 29 ESR_ReturnCode rc; 30 31 CHK(rc, SR_VocabularyCreateImpl(&Interface)); 32 impl = (SR_VocabularyImpl*) Interface; 33 impl->locale = locale; 34 impl->ttp_lang = TTP_LANG(locale); 35 36 #ifdef USE_TTP 37 /* impl->ttp_lang should be set to the current language before G2P is created */ 38 rc = SR_CreateG2P(Interface); 39 if (rc != ESR_SUCCESS) 40 { 41 SR_VocabularyDestroyImpl(Interface); 42 goto CLEANUP; 43 } 44 #endif 45 46 *self = Interface; 47 return ESR_SUCCESS; 48 CLEANUP: 49 return rc; 50 } 51 52 ESR_ReturnCode SR_VocabularyLoad(const LCHAR* filename, SR_Vocabulary** self) 53 { 54 SR_Vocabulary* Interface; 55 ESR_ReturnCode rc; 56 57 CHK(rc, SR_VocabularyLoadImpl(filename, &Interface)); 58 59 *self = Interface; 60 return ESR_SUCCESS; 61 CLEANUP: 62 return rc; 63 } 64 65 ESR_ReturnCode SR_VocabularySave(SR_Vocabulary* self, const LCHAR* filename) 66 { 67 if (self==NULL) 68 { 69 PLogError(L("ESR_INVALID_ARGUMENT")); 70 return ESR_INVALID_ARGUMENT; 71 } 72 return self->save(self, filename); 73 } 74 75 ESR_ReturnCode SR_VocabularyGetLanguage(SR_Vocabulary* self, ESR_Locale* locale) 76 { 77 if (self==NULL) 78 { 79 PLogError(L("ESR_INVALID_ARGUMENT")); 80 return ESR_INVALID_ARGUMENT; 81 } 82 return self->getLanguage(self, locale); 83 } 84 85 ESR_ReturnCode SR_VocabularyDestroy(SR_Vocabulary* self) 86 { 87 if (self==NULL) 88 { 89 PLogError(L("ESR_INVALID_ARGUMENT")); 90 return ESR_INVALID_ARGUMENT; 91 } 92 return self->destroy(self); 93 } 94 95 ESR_ReturnCode SR_VocabularyGetPronunciation(SR_Vocabulary* self, const LCHAR* word, LCHAR* phoneme, size_t* len) 96 { 97 if (self==NULL) 98 { 99 PLogError(L("ESR_INVALID_ARGUMENT")); 100 return ESR_INVALID_ARGUMENT; 101 } 102 return self->getPronunciation(self, word, phoneme, len); 103 } 104 105 /**************************** 106 * ETI to INFINITIVE Phoneme conversion stuff 107 */ 108 109 static const int CH_MAX = 128; 110 111 static ESR_ReturnCode getTable(ESR_Locale locale, const LCHAR* m[]) 112 { 113 int i; 114 for(i = 0; i< CH_MAX; i++) m[i] = ""; 115 116 switch (locale) 117 { 118 case ESR_LOCALE_EN_US: 119 case ESR_LOCALE_EN_GB: 120 /* enu_d2f_fray_g.pht */ 121 m['}']="um"; m['?']="OW"; m['~']="un"; m['@']="uh"; m['A']="EY"; 122 m['C']="ch"; m['D']="dh"; m['E']="EE"; m['I']="AY"; m['J']="jnk"; 123 m['L']="ul"; m['N']="ng"; m['O']="OH"; m['P']="ur"; m['S']="sh"; 124 m['T']="th"; m['U']="OOH"; m['V']="UR"; m['Z']="zh"; m[']']="oh"; 125 m['^']="ENV"; m['#']="sil"; m['a']="AA"; m['b']="b"; m['c']="eh"; 126 m['d']="d"; m['e']="EH"; m['f']="f"; m[')']="AH"; m['g']="g"; 127 m['h']="h"; m['i']="IH"; m['j']="j"; m[',']="AE"; m['k']="k"; 128 m['l']="l"; m['m']="m"; m['/']="ee"; m['n']="n"; m['o']="AW"; 129 m['p']="p"; m['q']="OO"; m['r']="r"; m['s']="s"; m['t']="t"; 130 m['6']="ih"; m['u']="UH"; m['v']="v"; m['w']="w"; m['y']="y"; 131 m['z']="z"; m['<']="OY"; m['{']="AWH"; 132 break; 133 case ESR_LOCALE_FR_FR: 134 /* fra_t22_m.pht */ 135 m['A']="ACI"; m[3]="OEE"; m[6]="OEN"; m['E']="EAC"; m['J']="jnk"; 136 m['M']="gn"; m[16]="QQ"; m['N']="ng"; m['O']="OCI"; m[19]="AE"; 137 m['S']="sh"; m['U']="UY"; m['W']="yw"; m['Y']="EN"; m['Z']="ge"; 138 m[31]="OE"; m['^']="ENV"; m['#']="sil"; m['a']="AGR"; m['b']="b"; 139 m['d']="d"; m['e']="ECI"; m['f']="f"; m[')']="AN"; m['g']="g"; 140 m['i']="II"; m['k']="k"; m['l']="l"; m['m']="m"; m['n']="n"; 141 m['o']="OO"; m['p']="p"; m['r']="r"; m['s']="s"; m['t']="t"; 142 m['u']="UGR"; m['v']="v"; m['w']="w"; m['y']="y"; m['z']="z"; 143 m['{']="ON"; 144 break; 145 146 case ESR_LOCALE_DE_DE: 147 m['@']="utt"; m['A']="AH"; m[4]="eu"; m['C']="ich"; m[6]="EU"; 148 m['E']="EH"; m['H']="ue"; m['I']="IH"; m['J']="jnk"; m['K']="ach"; 149 m['N']="ng"; m['O']="OH"; m['S']="sch"; m['T']="hr"; m['U']="UH"; 150 m['V']="UEH"; m['W']="wu"; m['Z']="zh"; m['[']="ott"; m['^']="ENV"; 151 m['!']="att"; m['#']="sil"; m['a']="ATT"; m['b']="b"; m['c']="ett"; 152 m['d']="d"; m['e']="ETT"; m['f']="f"; m['g']="g"; m['h']="h"; 153 m['i']="ITT"; m['j']="j"; m[',']="AEH"; m['k']="k"; m['l']="l"; 154 m['m']="m"; m['n']="n"; m['o']="OTT"; m['p']="p"; m['q']="UE"; 155 m['r']="r"; m['s']="s"; m['t']="t"; m['6']="itt"; m['u']="UTT"; 156 m['w']="w"; m['x']="@@"; m[':']="oe"; m['z']="z"; m['<']="OE"; 157 m['{']="OEH"; 158 break; 159 case ESR_LOCALE_ES_ES: 160 m['@']="uu"; m['C']="ch"; m['D']="rr"; m['E']="EY"; m['J']="jnk"; 161 m['M']="ks"; m['N']="nn"; m['T']="Z"; m['[']="oo"; m['^']="ENV"; 162 m['!']="aa"; m['#']="sil"; m['a']="AA"; m['b']="b"; m['c']="ee"; 163 m['d']="d"; m['e']="EE"; m['f']="f"; m[')']="AU"; m['g']="g"; 164 m['i']="II"; m['j']="j"; m['k']="k"; m['l']="l"; m['m']="m"; 165 m['n']="n"; m['o']="OO"; m['p']="p"; m['r']="r"; m['s']="s"; 166 m['6']="ii"; m['t']="t"; m['u']="UU"; m['w']="w"; m['y']="y"; 167 break; 168 case ESR_LOCALE_NL_NL: 169 m['S']="S"; m['a']="a"; m['N']="nK"; m['d']="d"; m['E']="E"; 170 m['2']="ep"; m['j']="j"; m['y']="y"; m['Z']="Z"; m['u']="u"; 171 m['1']="AA"; m['k']="k"; m['g']="g"; m['t']="t"; m['e']="e"; 172 m['J']="jnk"; m['v']="v"; m['s']="s"; m['^']="ENV"; m['b']="b"; 173 m['I']="I"; m['G']="G"; m['z']="z"; m['w']="w"; m['$']="$"; 174 m['r']="r"; m['x']="x"; m['h']="h"; m['f']="f"; m['i']="i"; 175 m['A']="A"; m['6']="A%t"; m['O']="O"; m['n']="n"; m['3']="Ei"; 176 m['#']="sil"; m['m']="m"; m['8']="O%t"; m['l']="l"; m['4']="yy"; 177 m['p']="p"; m['5']="Au"; m['o']="o"; 178 break; 179 case ESR_LOCALE_IT_IT: 180 m['@']="uu"; m['A']="AI"; m['C']="ci"; m['E']="EI"; m['J']="jnk"; 181 m['K']="rr"; m['M']="gi"; m['N']="gn"; m['O']="OI"; m[21]="gl"; 182 m['S']="sci"; m['Y']="ETT"; m['[']="oo"; m['^']="ENV"; m['!']="aa"; 183 m['#']="sil"; m['a']="AA"; m['b']="b"; m['c']="ee"; m['d']="d"; 184 m['e']="EE"; m['f']="f"; m[')']="AU"; m['g']="g"; m['i']="II"; 185 m['j']="j"; m['k']="k"; m['l']="l"; m['m']="m"; m['n']="n"; 186 m['o']="OO"; m['p']="p"; m['r']="r"; m['s']="s"; m['t']="t"; 187 m['6']="ii"; m['u']="UU"; m['v']="v"; m['w']="w"; m['z']="z"; 188 m['{']="OTT"; 189 break; 190 case ESR_LOCALE_PT_PT: 191 m['A']="ao"; m['B']="ojn"; m['E']="eh"; m['I']="ix"; m['J']="jnk"; 192 m['L']="lj"; m['N']="nj"; m['O']="on"; m['R']="rr"; m['S']="sh"; 193 m['U']="un"; m['Z']="zh"; m['^']="ENV"; m['#']="sil"; m['a']="a"; 194 m['b']="b"; m['c']="ew"; m['d']="d"; m['e']="e"; m['f']="f"; 195 m['g']="g"; m['h']="in"; m['i']="i"; m['j']="j"; m['k']="k"; 196 m['l']="l"; m['m']="m"; m['n']="n"; m['1']="aj"; m['o']="o"; 197 m['p']="p"; m['2']="ajn"; m['3']="an"; m['q']="iw"; m['r']="r"; 198 m['4']="aw"; m['s']="s"; m['5']="awn"; m['t']="t"; m['6']="ax"; 199 m['u']="u"; m['7']="axn"; m['v']="v"; m['8']="ej"; m['w']="w"; 200 m['9']="en"; m['x']="ls"; m['y']="oj"; m['z']="z"; 201 break; 202 case ESR_LOCALE_JA_JP: 203 return ESR_NOT_SUPPORTED; 204 break; 205 } 206 m['#']="iwt"; m['&']="&"; 207 208 return ESR_SUCCESS; 209 } 210 211 ESR_ReturnCode SR_Vocabulary_etiinf_conv_multichar(ESR_Locale locale, const LCHAR* single, LCHAR* multi, size_t max_len) 212 { 213 const LCHAR* m[CH_MAX]; 214 215 ESR_ReturnCode rc = getTable(locale, m); 216 if (rc != ESR_SUCCESS) return rc; 217 218 for (*multi='\0'; *single; ++single) 219 { 220 LSTRCAT(multi, m[(int)*single]); 221 if (*(single+1)) LSTRCAT(multi, " "); 222 } 223 return ESR_SUCCESS; 224 } 225 226 ESR_ReturnCode SR_Vocabulary_etiinf_conv_from_multichar(ESR_Locale locale, const LCHAR* multi, LCHAR* single) 227 { 228 const LCHAR* m[CH_MAX]; 229 int i; 230 231 ESR_ReturnCode rc = getTable(locale, m); 232 if (rc != ESR_SUCCESS) return rc; 233 234 for (i = 0; i < CH_MAX; i++) { 235 if (!LSTRCMP(m[i], multi)) { 236 *single = (LCHAR)i; 237 return ESR_SUCCESS; 238 } 239 } 240 return ESR_NO_MATCH_ERROR; 241 } 242