Home | History | Annotate | Download | only in grxmlcompile
      1 /*---------------------------------------------------------------------------*
      2  *  vocab.cpp                                                                *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 #include <string>
     21 #include <iostream>
     22 #include <stdexcept>
     23 #include "ESR_Locale.h"
     24 #include "LCHAR.h"
     25 #include "pstdio.h"
     26 #include "ESR_Session.h"
     27 #include "SR_Vocabulary.h"
     28 
     29 #include "vocab.h"
     30 
     31 #define MAX_LINE_LENGTH     256
     32 #define MAX_PRONS_LENGTH 1024
     33 
     34 #define DEBUG	0
     35 
     36 #define GENERIC CONTEXT "#"
     37 
     38 Vocabulary::Vocabulary( std::string const & vocFileName )
     39 {
     40     ESR_ReturnCode rc;
     41     rc = SR_VocabularyLoad(vocFileName.c_str(), &m_hVocab);
     42     if (rc != ESR_SUCCESS)
     43     {
     44         std::cout << "Error: " << ESR_rc2str(rc) <<std::endl;
     45         exit (-1);
     46     }
     47 }
     48 
     49 Vocabulary::~Vocabulary()
     50 {
     51     SR_VocabularyDestroy(m_hVocab);
     52 }
     53 
     54 Pronunciation::Pronunciation()
     55 {
     56 }
     57 
     58 Pronunciation::~Pronunciation()
     59 {
     60 }
     61 
     62 void Pronunciation::clear()
     63 {
     64     m_Prons.clear();
     65     for (unsigned int ii=0;ii<m_ModelIDs.size();ii++ )
     66     {
     67         m_ModelIDs[ii].clear();
     68     }
     69     m_ModelIDs.clear();
     70 }
     71 
     72 int Pronunciation::lookup(  Vocabulary & vocab, std::string  & phrase )
     73 {
     74     ESR_ReturnCode rc;
     75     LCHAR prons[MAX_PRONS_LENGTH];
     76     LCHAR* c_phrase;
     77     size_t len;
     78 
     79     LCHAR s[MAX_LINE_LENGTH];
     80     strcpy (s, phrase.c_str() ); // No conversion for std::string to wchar
     81     //clear();
     82 
     83     memset (prons, 0x00, sizeof(LCHAR));
     84 
     85     c_phrase = s;
     86     SR_Vocabulary *p_SRVocab = vocab.getSRVocabularyHandle();
     87 #if DEBUG
     88     std::cout << "DEBUG: " << phrase <<" to be looked up" << std::endl;
     89 #endif
     90     rc = SR_VocabularyGetPronunciation( p_SRVocab, c_phrase, prons, &len );
     91     if (rc != ESR_SUCCESS)
     92         //  std::cout <<"ERORORORORROOR!" <<std::endl;
     93         std::cout <<"ERROR: " << ESR_rc2str(rc) << std::endl;
     94     else {
     95 #if DEBUG
     96         std::cout <<"OUTPUT: " << prons << " num " << len << std::endl;
     97 #endif
     98         size_t len_used;
     99         LCHAR *pron = 0;
    100         for(len_used=0; len_used <len; ) {
    101             pron = &prons[0]+len_used;
    102             len_used += LSTRLEN(pron)+1;
    103 #if DEBUG
    104             std::cout << "DEBUG: used " << len_used << " now " << LSTRLEN(pron) << std::endl;
    105 #endif
    106             std::string pronString( pron ); // wstring conversion if needed
    107             addPron( pronString );
    108 #if DEBUG
    109             std::cout << "DEBUG: " << phrase << " " << pron << std::endl;
    110 #endif
    111         }
    112     }
    113     return getPronCount();
    114 }
    115 
    116 
    117 int Pronunciation::addPron( std::string & s )
    118 {
    119     m_Prons.push_back( s );
    120     return m_Prons.size();
    121 }
    122 
    123 int Pronunciation::getPronCount()
    124 {  // returns number of prons
    125     return m_Prons.size();
    126 }
    127 
    128 bool Pronunciation::getPron( int index, std::string &s )
    129 {
    130  // returns string length used
    131     try {
    132       s = m_Prons.at(index);
    133     }
    134     catch(std::out_of_range& err) {
    135       std::cerr << "out_of_range: " << err.what() << std::endl;
    136     }
    137     return true;
    138 }
    139 
    140 void Pronunciation::print()
    141 {
    142   std::string s;
    143   for (int ii=0; ii< getPronCount(); ii++) {
    144     getPron(ii, s);
    145 #if DEBUG
    146     std::cout << "Pron #" << ii << ": " << s << std::endl;
    147 #endif
    148   }
    149 }
    150 
    151 void Pronunciation::printModelIDs()
    152 {
    153   std::string s;
    154   for (int ii=0; ii< getPronCount(); ii++) {
    155     getPron(ii, s);
    156 #if DEBUG
    157     std::cout << "  Pron #" << ii << ": " << s << std::endl;
    158     std::cout << "    Model IDs: ";
    159 #endif
    160     for (int jj=0;jj<getModelCount(ii);jj++) {
    161       std::cout << " " << getModelID(ii,jj);
    162     }
    163 #if DEBUG
    164     std::cout <<  std::endl;
    165 #endif
    166   }
    167 }
    168 
    169 int Pronunciation::getPhonemeCount( int pronIndex )
    170 {
    171   std::string s;
    172   getPron(pronIndex, s);
    173   return s.size();
    174 }
    175 
    176 bool Pronunciation::getPhoneme( int pronIndex, int picIndex , std::string &phoneme )
    177 {
    178   std::string s;
    179   getPron(pronIndex, s);
    180   phoneme= s.at(picIndex);
    181   return true;
    182 }
    183 
    184 
    185 bool Pronunciation::getPIC( int pronIndex, int picIndex, std::string &pic )
    186 {
    187   std::string pron;
    188   char lphon;
    189   char cphon;
    190   char rphon;
    191 
    192   getPron( pronIndex, pron );
    193   int numPhonemes = pron.size();
    194   if ( 1==numPhonemes ) {
    195     lphon=GENERIC_CONTEXT;
    196     rphon=GENERIC_CONTEXT;
    197     cphon = pron.at(0);
    198   }
    199   else
    200     {
    201       if ( 0==picIndex ) {
    202 	lphon=GENERIC_CONTEXT;
    203 	rphon=GENERIC_CONTEXT;
    204       }
    205       else if( numPhonemes-1==picIndex ) {
    206 	lphon = pron.at(picIndex-1);
    207 	rphon=GENERIC_CONTEXT;
    208       }
    209       else {
    210 	lphon = pron.at(picIndex-1);
    211 	rphon = pron.at(picIndex+1);
    212       }
    213       cphon = pron.at(picIndex);
    214       pic = lphon + cphon + rphon;
    215     }
    216   return true;
    217 }
    218 
    219 int Pronunciation::lookupModelIDs( AcousticModel &acoustic )
    220 {
    221   // Looks up all hmms for all prons
    222   std::string pron;
    223   char lphon;
    224   char cphon;
    225   char rphon;
    226 
    227   int numProns = getPronCount();
    228   int totalCount=0;
    229   for (int ii=0;ii < numProns; ii++ )
    230     {
    231       getPron( ii, pron );
    232       std::vector<int> idList; // Create storage
    233       int numPhonemes = getPhonemeCount(ii);
    234       if (1==numPhonemes) {
    235 	lphon=GENERIC_CONTEXT;
    236 	rphon=GENERIC_CONTEXT;
    237 	cphon = pron.at(0);
    238       }
    239       else
    240       for ( int jj=0;jj<numPhonemes;jj++ )
    241 	{
    242 	  std::string pic;
    243 	  getPIC(ii, jj, pic);
    244 	  lphon = pron.at(0);
    245 	  cphon = pron.at(1);
    246 	  rphon = pron.at(2);
    247 	  int id = CA_ArbdataGetModelIdsForPIC( acoustic.getCAModelHandle(), lphon, cphon,  rphon );
    248 #if DEBUG
    249 	  std::cout <<"DEBUG model id: " << lphon <<cphon << rphon << "  "<< id << std::endl;
    250 #endif
    251 
    252 	  idList.push_back(id);
    253 	}
    254       m_ModelIDs.push_back(idList);
    255       totalCount+=numPhonemes;
    256     }
    257   return totalCount;
    258 }
    259 
    260 int Pronunciation::getModelCount( int pronIndex )
    261 {
    262   return m_ModelIDs[pronIndex].size();
    263 }
    264 
    265 int Pronunciation::getModelID( int pronIndex, int modelPos )
    266 {
    267   return m_ModelIDs[pronIndex][modelPos];
    268 }
    269 
    270 AcousticModel::AcousticModel( std::string & arbFileName )
    271 {
    272   m_CA_Arbdata = CA_LoadArbdata( arbFileName.c_str() );
    273   if (!m_CA_Arbdata)
    274     {
    275       std::cout << "Error: while trying to load " << arbFileName.c_str() << std::endl;
    276       exit (-1);
    277     }
    278 
    279 }
    280 
    281 AcousticModel::~AcousticModel()
    282 {
    283   CA_FreeArbdata( m_CA_Arbdata);
    284 }
    285 
    286 int AcousticModel::getStateIndices(int id, std::vector<int> & stateIDs)
    287 {
    288   srec_arbdata *allotree = (srec_arbdata*) m_CA_Arbdata;
    289   int numStates = allotree->hmm_infos[id].num_states;
    290 #if DEBUG
    291   std::cout << "getStateIndices: count = " << numStates <<std::endl;
    292 #endif
    293   for (int ii=0; ii <numStates; ii++ ) {
    294     stateIDs.push_back( allotree->hmm_infos[id].state_indices[ii] );
    295 #if DEBUG
    296     std::cout <<  allotree->hmm_infos[id].state_indices[ii] ;
    297 #endif
    298   }
    299 #if DEBUG
    300   std::cout << std::endl;
    301 #endif
    302     return stateIDs.size();
    303 }
    304 
    305