Home | History | Annotate | Download | only in src
      1 /*---------------------------------------------------------------------------*
      2  *  Vocabulary.c                                                             *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 #include "plog.h"
     21 #include "SR_Vocabulary.h"
     22 #include "SR_VocabularyImpl.h"
     23 
     24 
     25 ESR_ReturnCode SR_VocabularyCreate(ESR_Locale locale, SR_Vocabulary** self)
     26 {
     27      SR_Vocabulary* Interface;
     28      SR_VocabularyImpl* impl;
     29      ESR_ReturnCode rc;
     30 
     31      CHK(rc, SR_VocabularyCreateImpl(&Interface));
     32      impl = (SR_VocabularyImpl*) Interface;
     33      impl->locale = locale;
     34      impl->ttp_lang = TTP_LANG(locale);
     35 
     36 #ifdef USE_TTP
     37      /* impl->ttp_lang should be set to the current language before G2P is created */
     38      rc = SR_CreateG2P(Interface);
     39      if (rc != ESR_SUCCESS)
     40      {
     41           SR_VocabularyDestroyImpl(Interface);
     42           goto CLEANUP;
     43      }
     44 #endif
     45 
     46      *self = Interface;
     47      return ESR_SUCCESS;
     48  CLEANUP:
     49      return rc;
     50 }
     51 
     52 ESR_ReturnCode SR_VocabularyLoad(const LCHAR* filename, SR_Vocabulary** self)
     53 {
     54      SR_Vocabulary* Interface;
     55      ESR_ReturnCode rc;
     56 
     57      CHK(rc, SR_VocabularyLoadImpl(filename, &Interface));
     58 
     59      *self = Interface;
     60      return ESR_SUCCESS;
     61  CLEANUP:
     62      return rc;
     63 }
     64 
     65 ESR_ReturnCode SR_VocabularySave(SR_Vocabulary* self, const LCHAR* filename)
     66 {
     67   if (self==NULL)
     68   {
     69     PLogError(L("ESR_INVALID_ARGUMENT"));
     70     return ESR_INVALID_ARGUMENT;
     71   }
     72   return self->save(self, filename);
     73 }
     74 
     75 ESR_ReturnCode SR_VocabularyGetLanguage(SR_Vocabulary* self, ESR_Locale* locale)
     76 {
     77   if (self==NULL)
     78   {
     79     PLogError(L("ESR_INVALID_ARGUMENT"));
     80     return ESR_INVALID_ARGUMENT;
     81   }
     82   return self->getLanguage(self, locale);
     83 }
     84 
     85 ESR_ReturnCode SR_VocabularyDestroy(SR_Vocabulary* self)
     86 {
     87   if (self==NULL)
     88   {
     89     PLogError(L("ESR_INVALID_ARGUMENT"));
     90     return ESR_INVALID_ARGUMENT;
     91   }
     92   return self->destroy(self);
     93 }
     94 
     95 ESR_ReturnCode SR_VocabularyGetPronunciation(SR_Vocabulary* self, const LCHAR* word, LCHAR* phoneme, size_t* len)
     96 {
     97   if (self==NULL)
     98   {
     99     PLogError(L("ESR_INVALID_ARGUMENT"));
    100     return ESR_INVALID_ARGUMENT;
    101   }
    102   return self->getPronunciation(self, word, phoneme, len);
    103 }
    104 
    105 /****************************
    106  * ETI to INFINITIVE Phoneme conversion stuff
    107  */
    108 
    109 static const int CH_MAX = 128;
    110 
    111 static ESR_ReturnCode getTable(ESR_Locale locale, const LCHAR* m[])
    112 {
    113      int i;
    114      for(i = 0; i< CH_MAX; i++) m[i] = "";
    115 
    116      switch (locale)
    117      {
    118      case ESR_LOCALE_EN_US:
    119      case ESR_LOCALE_EN_GB:
    120           /* enu_d2f_fray_g.pht */
    121           m['}']="um";  m['?']="OW";  m['~']="un";  m['@']="uh";  m['A']="EY";
    122           m['C']="ch";  m['D']="dh";  m['E']="EE";  m['I']="AY";  m['J']="jnk";
    123           m['L']="ul";  m['N']="ng";  m['O']="OH";  m['P']="ur";  m['S']="sh";
    124           m['T']="th";  m['U']="OOH"; m['V']="UR";  m['Z']="zh";  m[']']="oh";
    125           m['^']="ENV"; m['#']="sil"; m['a']="AA";  m['b']="b";   m['c']="eh";
    126           m['d']="d";   m['e']="EH";  m['f']="f";   m[')']="AH";  m['g']="g";
    127           m['h']="h";   m['i']="IH";  m['j']="j";   m[',']="AE";  m['k']="k";
    128           m['l']="l";   m['m']="m";   m['/']="ee";  m['n']="n";   m['o']="AW";
    129           m['p']="p";   m['q']="OO";  m['r']="r";   m['s']="s";   m['t']="t";
    130           m['6']="ih";  m['u']="UH";  m['v']="v";   m['w']="w";   m['y']="y";
    131           m['z']="z";   m['<']="OY";  m['{']="AWH";
    132           break;
    133      case ESR_LOCALE_FR_FR:
    134           /* fra_t22_m.pht */
    135           m['A']="ACI"; m[3]="OEE";   m[6]="OEN";   m['E']="EAC"; m['J']="jnk";
    136           m['M']="gn";  m[16]="QQ";   m['N']="ng";  m['O']="OCI"; m[19]="AE";
    137           m['S']="sh";  m['U']="UY";  m['W']="yw";  m['Y']="EN";  m['Z']="ge";
    138           m[31]="OE";   m['^']="ENV"; m['#']="sil"; m['a']="AGR"; m['b']="b";
    139           m['d']="d";   m['e']="ECI"; m['f']="f";   m[')']="AN";  m['g']="g";
    140           m['i']="II";  m['k']="k";   m['l']="l";   m['m']="m";   m['n']="n";
    141           m['o']="OO";  m['p']="p";   m['r']="r";   m['s']="s";   m['t']="t";
    142           m['u']="UGR"; m['v']="v";   m['w']="w";   m['y']="y";   m['z']="z";
    143           m['{']="ON";
    144           break;
    145 
    146      case ESR_LOCALE_DE_DE:
    147           m['@']="utt"; m['A']="AH";  m[4]="eu";    m['C']="ich"; m[6]="EU";
    148           m['E']="EH";  m['H']="ue";  m['I']="IH";  m['J']="jnk"; m['K']="ach";
    149           m['N']="ng";  m['O']="OH";  m['S']="sch"; m['T']="hr";  m['U']="UH";
    150           m['V']="UEH"; m['W']="wu";  m['Z']="zh";  m['[']="ott"; m['^']="ENV";
    151           m['!']="att"; m['#']="sil"; m['a']="ATT"; m['b']="b";   m['c']="ett";
    152           m['d']="d";   m['e']="ETT"; m['f']="f";   m['g']="g";   m['h']="h";
    153           m['i']="ITT"; m['j']="j";   m[',']="AEH"; m['k']="k";   m['l']="l";
    154           m['m']="m";   m['n']="n";   m['o']="OTT"; m['p']="p";   m['q']="UE";
    155           m['r']="r";   m['s']="s";   m['t']="t";   m['6']="itt"; m['u']="UTT";
    156           m['w']="w";   m['x']="@@";  m[':']="oe";  m['z']="z";   m['<']="OE";
    157           m['{']="OEH";
    158           break;
    159      case ESR_LOCALE_ES_ES:
    160           m['@']="uu";  m['C']="ch";  m['D']="rr";  m['E']="EY";  m['J']="jnk";
    161           m['M']="ks";  m['N']="nn";  m['T']="Z";   m['[']="oo";  m['^']="ENV";
    162           m['!']="aa";  m['#']="sil"; m['a']="AA";  m['b']="b";   m['c']="ee";
    163           m['d']="d";   m['e']="EE";  m['f']="f";   m[')']="AU";  m['g']="g";
    164           m['i']="II";  m['j']="j";   m['k']="k";   m['l']="l";   m['m']="m";
    165           m['n']="n";   m['o']="OO";  m['p']="p";   m['r']="r";   m['s']="s";
    166           m['6']="ii";  m['t']="t";   m['u']="UU";  m['w']="w";   m['y']="y";
    167           break;
    168      case ESR_LOCALE_NL_NL:
    169           m['S']="S";   m['a']="a";   m['N']="nK";  m['d']="d";   m['E']="E";
    170           m['2']="ep";  m['j']="j";   m['y']="y";   m['Z']="Z";   m['u']="u";
    171           m['1']="AA";  m['k']="k";   m['g']="g";   m['t']="t";   m['e']="e";
    172           m['J']="jnk"; m['v']="v";   m['s']="s";   m['^']="ENV"; m['b']="b";
    173           m['I']="I";   m['G']="G";   m['z']="z";   m['w']="w";   m['$']="$";
    174           m['r']="r";   m['x']="x";   m['h']="h";   m['f']="f";   m['i']="i";
    175           m['A']="A";   m['6']="A%t"; m['O']="O";   m['n']="n";   m['3']="Ei";
    176           m['#']="sil"; m['m']="m";   m['8']="O%t"; m['l']="l";   m['4']="yy";
    177           m['p']="p";   m['5']="Au";  m['o']="o";
    178           break;
    179      case ESR_LOCALE_IT_IT:
    180           m['@']="uu";  m['A']="AI";  m['C']="ci";  m['E']="EI";  m['J']="jnk";
    181           m['K']="rr";  m['M']="gi";  m['N']="gn";  m['O']="OI";  m[21]="gl";
    182           m['S']="sci"; m['Y']="ETT"; m['[']="oo";  m['^']="ENV"; m['!']="aa";
    183           m['#']="sil"; m['a']="AA";  m['b']="b";   m['c']="ee";  m['d']="d";
    184           m['e']="EE";  m['f']="f";   m[')']="AU";  m['g']="g";   m['i']="II";
    185           m['j']="j";   m['k']="k";   m['l']="l";   m['m']="m";   m['n']="n";
    186           m['o']="OO";  m['p']="p";   m['r']="r";   m['s']="s";   m['t']="t";
    187           m['6']="ii";  m['u']="UU";  m['v']="v";   m['w']="w";   m['z']="z";
    188           m['{']="OTT";
    189           break;
    190      case ESR_LOCALE_PT_PT:
    191           m['A']="ao";  m['B']="ojn"; m['E']="eh";  m['I']="ix";  m['J']="jnk";
    192           m['L']="lj";  m['N']="nj";  m['O']="on";  m['R']="rr";  m['S']="sh";
    193           m['U']="un";  m['Z']="zh";  m['^']="ENV"; m['#']="sil"; m['a']="a";
    194           m['b']="b";   m['c']="ew";  m['d']="d";   m['e']="e";   m['f']="f";
    195           m['g']="g";   m['h']="in";  m['i']="i";   m['j']="j";   m['k']="k";
    196           m['l']="l";   m['m']="m";   m['n']="n";   m['1']="aj";  m['o']="o";
    197           m['p']="p";   m['2']="ajn"; m['3']="an";  m['q']="iw";  m['r']="r";
    198           m['4']="aw";  m['s']="s";   m['5']="awn"; m['t']="t";   m['6']="ax";
    199           m['u']="u";   m['7']="axn"; m['v']="v";   m['8']="ej";  m['w']="w";
    200           m['9']="en";  m['x']="ls";  m['y']="oj";  m['z']="z";
    201           break;
    202      case ESR_LOCALE_JA_JP:
    203           return ESR_NOT_SUPPORTED;
    204           break;
    205      }
    206      m['#']="iwt"; m['&']="&";
    207 
    208      return ESR_SUCCESS;
    209 }
    210 
    211 ESR_ReturnCode SR_Vocabulary_etiinf_conv_multichar(ESR_Locale locale, const LCHAR* single, LCHAR* multi, size_t max_len)
    212 {
    213     const LCHAR* m[CH_MAX];
    214 
    215     ESR_ReturnCode rc = getTable(locale, m);
    216     if (rc != ESR_SUCCESS) return rc;
    217 
    218     for (*multi='\0'; *single; ++single)
    219     {
    220         LSTRCAT(multi, m[(int)*single]);
    221         if (*(single+1)) LSTRCAT(multi, " ");
    222     }
    223     return ESR_SUCCESS;
    224 }
    225 
    226 ESR_ReturnCode SR_Vocabulary_etiinf_conv_from_multichar(ESR_Locale locale, const LCHAR* multi, LCHAR* single)
    227 {
    228     const LCHAR* m[CH_MAX];
    229     int i;
    230 
    231     ESR_ReturnCode rc = getTable(locale, m);
    232     if (rc != ESR_SUCCESS) return rc;
    233 
    234     for (i = 0; i < CH_MAX; i++) {
    235         if (!LSTRCMP(m[i], multi)) {
    236             *single = (LCHAR)i;
    237             return ESR_SUCCESS;
    238         }
    239     }
    240     return ESR_NO_MATCH_ERROR;
    241 }
    242