Home | History | Annotate | Download | only in make_ve_grammar
      1 /*---------------------------------------------------------------------------*
      2  *  make_ve_grammar.c                                                            *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 
     21 #include <stdio.h>
     22 #include <stdlib.h>
     23 #include <string.h>
     24 
     25 #include "plog.h"
     26 #include "passert.h"
     27 #include "duk_args.h"
     28 #include "duk_err.h"
     29 #include "ptrd.h"
     30 
     31 #include "srec_arb.h"
     32 #include "simapi.h"
     33 
     34 #include "PFileSystem.h"
     35 #include "PANSIFileSystem.h"
     36 
     37 #define MAX_FILE_NAME_LEN 64
     38 #define DEFAULT_WWTRIPHONE_SILMODE 3
     39 
     40 
     41 /* check if the central phoneme is a word-specific phoneme; if so, do not enroll it into the ve grammar.*/
     42 int ws_verify(char * allo_phoneme){
     43   switch(allo_phoneme[0]){
     44   case '(': return 1;
     45   case '.': return 1;
     46   case '0': return 1;
     47   case '1': return 1;
     48   case '2': return 1;
     49   case '3': return 1;
     50   case '4': return 1;
     51   case '5': return 1;
     52   case '7': return 1;
     53   case '8': return 1;
     54   case '9': return 1;
     55   case '=': return 1;
     56   case '>': return 1;
     57   case 'B': return 1;
     58   case 'F': return 1;
     59   case 'G': return 1;
     60   case 'H': return 1;
     61   case 'K': return 1;
     62   case 'M': return 1;
     63   case 'Q': return 1;
     64   case 'R': return 1;
     65   case 'W': return 1;
     66   case 'X': return 1;
     67   case 'Y': return 1;
     68   case '[': return 1;
     69   case '\\': return 1;
     70   case '|': return 1;
     71   case '+': return 1;
     72   default: return 0;
     73   }
     74 }
     75 
     76 int main (int argc, char **argv)
     77 {
     78 	int i;
     79 	char filen[MAX_FILE_NAME_LEN]="";
     80 	CA_Arbdata *ca_arbdata = NULL;     /* new, link btw acc/syn */
     81 	char *arbfile = NULL;
     82 	char *base = NULL;
     83 
     84 	FILE* pfile;
     85 	FILE* pFile_PCLG;
     86 	FILE* pFile_map;
     87 	FILE* pFile_P;
     88         FILE* pFile_Grev;
     89 	FILE* pFile_script;
     90 
     91 	int num_hmms;
     92 	int num_wd = 0;
     93 	int script_line = 0;
     94 	int cflag = 0, fnode = 0;
     95 	int sil_model = DEFAULT_WWTRIPHONE_SILMODE;
     96 	int rc;
     97 	srec_arbdata *allotree = NULL;
     98 
     99 	nodeID startNode       = 0;
    100 	nodeID pauEndNode      = 1;
    101 	nodeID modelStartNode  = 2;
    102 	nodeID modelEndNode    = 3;
    103 	nodeID pau2StartNode   = 4;
    104 	nodeID pau2EndNode     = 5;
    105 	nodeID endNode         = 6;
    106 
    107 	/* initial memory */
    108 	CHKLOG(rc, PMemInit());
    109 
    110 	if(argc<5){
    111 	  printf("USAGE: -swiarb <swiarb file> -base <output base name>\n");
    112 	  exit(1);
    113 	}
    114 
    115 
    116 	for(i=1; i<argc; i++) {
    117 	  if(!strcmp(argv[i],"-swiarb")) {
    118 	    arbfile = argv[++i];
    119 	    printf("using swiarb from file %s\n", arbfile);
    120 	  }
    121 	  else if(!strcmp(argv[i],"-base")){
    122 	    base = argv[++i];
    123 	  }
    124 	  else {
    125 	    printf("error_usage: argument [%s]\n", argv[i]);
    126 	    exit(1);
    127 	  }
    128 	}
    129 
    130 	/* check arb file exist*/
    131 	if ( (pfile = fopen(arbfile, "r")) != NULL ){
    132 	    fclose(pfile);
    133 	}
    134 	else{
    135 	  printf("ERROR: the specified swiarb file does not exist.\n");
    136 	  exit(1);
    137 	}
    138 
    139 
    140 	ca_arbdata = CA_LoadArbdata(arbfile);
    141 
    142 	allotree = (srec_arbdata*)ca_arbdata;
    143 	num_hmms = allotree->num_hmms;
    144 
    145 
    146 	/* Dump out VE .PCLG.txt, .Grev2.det.txt, .P.txt, .script and .map files; .P.txt, .script and .map are not necessary for voice enroll, so just dump out to create .g2g file. Xufang */
    147 
    148 	printf("Dumping out VE files\n");
    149 
    150 	strcat(filen,base);
    151 	strcat(filen,".PCLG.txt");
    152 	pFile_PCLG = fopen(filen,"w");
    153 
    154 	filen[0]='\0';
    155 	strcat(filen,base);
    156 	strcat(filen,".map");
    157         pFile_map = fopen(filen,"w");
    158 
    159         filen[0]='\0';
    160         strcat(filen,base);
    161         strcat(filen,".P.txt");
    162         pFile_P = fopen(filen,"w");
    163 
    164         filen[0]='\0';
    165         strcat(filen,base);
    166         strcat(filen,".Grev2.det.txt");
    167         pFile_Grev = fopen(filen,"w");
    168 
    169         filen[0]='\0';
    170         strcat(filen,base);
    171         strcat(filen,".script");
    172         pFile_script = fopen(filen,"w");
    173 
    174         fprintf(pFile_Grev,"0\t1\teps\t80\n");
    175         fprintf(pFile_Grev,"1\t2\t%s.grxml@VE_Words\n",base);
    176 
    177 	fprintf(pFile_map,"eps %d\n",num_wd++);
    178         fprintf(pFile_map,"%s.grxml@ROOT %d\n",base,num_wd++);
    179         fprintf(pFile_map,"%s.grxml@VE_Words %d\n",base,num_wd++);
    180         fprintf(pFile_map,"-pau- %d\n",num_wd++);
    181         fprintf(pFile_map,"-pau2- %d\n",num_wd++);
    182         fprintf(pFile_map,"@VE_Words %d\n",num_wd++);
    183 
    184         fprintf(pFile_P,"0\t1\teps\t{\t\n");
    185         fprintf(pFile_P,"1\t2\teps\t{\t\n");
    186         fprintf(pFile_P,"2\t3\teps\t{\t\n");
    187         fprintf(pFile_P,"2\t4\teps\t{\t\n");
    188         fprintf(pFile_P,"3\t5\t%s.grxml@VE_Words\t%s.grxml@VE_Words\t\n",base,base);
    189         fprintf(pFile_P,"4\t8\teps\t{\t\n");
    190         fprintf(pFile_P,"5\t6\teps\t_3\t\n");
    191         fprintf(pFile_P,"6\t7\teps\tVE_Words}\t\n");
    192         fprintf(pFile_P,"7\t9\teps\t_2\t\n");
    193 
    194         fprintf(pFile_script,"%d type=SENT.type;meaning=SENT.V;\n",script_line++);
    195         fprintf(pFile_script,"%d type='NEW';V=UTT.V;\n",script_line++);
    196         fprintf(pFile_script,"%d type='OLD';V=VE_Words.V;\n",script_line++);
    197 	fprintf(pFile_script,"%d V=UTT.V?UTT.V:'--';\n",script_line++);
    198         fprintf(pFile_script,"%d V=PHONEME.V\n",script_line++);
    199 
    200 	for(i=0;i<num_hmms;i++){
    201 	  if(ws_verify(allotree->hmm_infos[i].name))
    202 	    continue;
    203 	  if(!strcmp(allotree->hmm_infos[i].name,"#")){
    204 	    sil_model = i;
    205 	    fprintf(pFile_PCLG,"%d\t%d\thmm%d_#sil#\t-pau-\n", startNode, pauEndNode, i);
    206             fprintf(pFile_PCLG,"%d\t%d\t.wb\teps\n", pauEndNode, modelStartNode);
    207           }
    208           else{
    209             if(strlen(allotree->hmm_infos[i].name)>0){
    210 	      if(cflag==0){
    211 		fnode = i;
    212 		cflag = 1;
    213 	      }
    214               fprintf(pFile_PCLG,"%d\t%d\thmm%d_%s\twd_hmm%d_%s\t40\n", modelStartNode, modelEndNode,
    215 		      i,allotree->hmm_infos[i].name,i,allotree->hmm_infos[i].name);
    216 	      fprintf(pFile_map,"wd_hmm%d_%s %d\n",i,allotree->hmm_infos[i].name,num_wd++);
    217 	      fprintf(pFile_Grev,"1\t3\twd_hmm%d_%s\n",i,allotree->hmm_infos[i].name);
    218 	      fprintf(pFile_P,"8\t10\twd_hmm%d_%s\t_%d\t\n",i,allotree->hmm_infos[i].name,script_line);
    219 	      fprintf(pFile_script,"%d V=V?V:'';V=V+'wd_hmm%d_%s';\n",script_line++,i,allotree->hmm_infos[i].name);
    220 	    }
    221           }
    222 	}
    223 
    224         fprintf(pFile_PCLG,"%d\t%d\t.wb\teps\n", modelEndNode, modelStartNode);
    225         fprintf(pFile_PCLG,"%d\t%d\t.wb\teps\n", modelEndNode, pau2StartNode);
    226         fprintf(pFile_PCLG,"%d\t%d\thmm%d_#sil#\t-pau2-\n",pau2StartNode, pau2EndNode, sil_model);
    227         fprintf(pFile_PCLG,"%d\t%d\t.wb\teps\n", pau2EndNode, endNode);
    228         fprintf(pFile_PCLG,"%d\n", endNode);
    229 
    230         fprintf(pFile_Grev,"2\n");
    231 	for(i=fnode;i<num_hmms;i++){
    232           if(ws_verify(allotree->hmm_infos[i].name))
    233             continue;
    234 	  fprintf(pFile_Grev,"3\t3\twd_hmm%d_%s\t40\n",i,allotree->hmm_infos[i].name);
    235 	}
    236         fprintf(pFile_Grev,"3\n");
    237 
    238         fprintf(pFile_P,"9\t11\teps\tSENT}\t\n");
    239         fprintf(pFile_P,"10\t12\teps\tPHONEME}\t\n");
    240         fprintf(pFile_P,"11\t13\teps\t_0\t\n");
    241         fprintf(pFile_P,"12\t14\teps\t_4\t\n");
    242         fprintf(pFile_P,"13\t15\teps\tROOT}\t\n");
    243         fprintf(pFile_P,"14\t16\teps\teps\t\n");
    244         fprintf(pFile_P,"15\t\n");
    245         fprintf(pFile_P,"16\t17\teps\tUTT}\t\n");
    246         fprintf(pFile_P,"16\t8\teps\t{\t\n");
    247         fprintf(pFile_P,"17\t9\teps\t_1\t\n");
    248 
    249 	fclose(pFile_PCLG);
    250 	printf("Creating %s.PCLG.txt...\n",base);
    251         fclose(pFile_Grev);
    252         printf("Creating %s.Grev2.det.txt...\n",base);
    253         fclose(pFile_map);
    254 	printf("Creating %s.map...\n",base);
    255         fclose(pFile_P);
    256 	printf("Creating %s.P.txt...\n",base);
    257 	fclose(pFile_script);
    258 	printf("Creating %s.script...\n",base);
    259 	printf("SUCCESS!\n");
    260 
    261 
    262   CA_FreeArbdata( ca_arbdata);
    263 
    264   PMemShutdown();
    265   return 0;
    266 CLEANUP:
    267   return 1;
    268 }
    269 
    270