1 /* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 18 // 19 // This converts the data found at http://www.speech.cs.cmu.edu/cgi-bin/cmudict 20 // into the *.ok format used by Nuance. 21 // We use the file c0.6, which corresponds to (v. 0.6). 22 // 23 // to run: make cmu2nuance && ./cmu2nuance <c0.6 >c0.6.ok 24 // 25 // TODO: look at generation of 'L', ')', and ',' 26 // 27 28 #include <stdio.h> 29 #include <string.h> 30 #include <ctype.h> 31 32 33 static const char* xlate(const char* phone, const char* cmu, const char* nuance) { 34 int ncmu = strlen(cmu); 35 if (strncmp(phone, cmu, ncmu) || !isspace(phone[ncmu])) return NULL; 36 fputs(nuance, stdout); 37 return phone + strlen(cmu); 38 } 39 40 41 int main(int argc, const char* argv[]) { 42 char line[200]; 43 44 fputs("#LANG=EN-US\n", stdout); 45 46 for (int lineno = 1; NULL != fgets(line, sizeof(line), stdin); lineno++) 47 { 48 if (line[0] == '#') continue; 49 if (line[0] == 0) continue; 50 if (!isalnum(line[0])) { 51 fprintf(stderr, "warning: ignoring line %d - %s", lineno, line); 52 continue; 53 } 54 55 const char* p = line; 56 57 // parse name, echoing in lower case and skipping (2) suffix 58 while (!isspace(*p)) { 59 if (*p == 0) { 60 fprintf(stderr, "can't read name at line %d\n", lineno); 61 break; 62 } 63 if (p[0] == '(' && isdigit(p[1]) && p[2] == ')' && isspace(p[3])) { 64 p += 3; 65 break; 66 } 67 fputc(tolower(*p), stdout); 68 p++; 69 } 70 fputc(' ', stdout); 71 72 // loop over whitespace delimited phonemes 73 while (1) { 74 // skip leading whitespace 75 while (isspace(*p)) p++; 76 if (*p == 0) break; 77 78 const char* next = 0; 79 if ( 80 (next=xlate(p, "AA1 R", ")r")) || // odd AA D 81 (next=xlate(p, "AA0", "o")) || // odd AA D 82 (next=xlate(p, "AA1", "o")) || // odd AA D 83 (next=xlate(p, "AA2", "o")) || // odd AA D 84 85 (next=xlate(p, "AE0", "a")) || // at AE T 86 (next=xlate(p, "AE1", "a")) || // at AE T 87 (next=xlate(p, "AE2", "a")) || // at AE T 88 89 // (next=xlate(p, "AH0 L", "L")) || // drops accuracy by 1% 90 (next=xlate(p, "AH0 N", "~")) || // hut HH AH T - from jean 91 (next=xlate(p, "AH0 M", "}")) || // hut HH AH T - from jean 92 (next=xlate(p, "AH0", "@")) || // hut HH AH T - from jean 93 (next=xlate(p, "AH1", "u")) || // hut HH AH T 94 (next=xlate(p, "AH2", "u")) || // hut HH AH T 95 96 (next=xlate(p, "AO0", "{")) || // ought AO T 97 (next=xlate(p, "AO1", "{")) || // ought AO T 98 (next=xlate(p, "AO2", "{")) || // ought AO T 99 100 (next=xlate(p, "AW0", "?")) || // cow K AW 101 (next=xlate(p, "AW1", "?")) || // cow K AW 102 (next=xlate(p, "AW2", "?")) || // cow K AW 103 104 (next=xlate(p, "AY0", "I")) || // hide HH AY D 105 (next=xlate(p, "AY1", "I")) || // hide HH AY D 106 (next=xlate(p, "AY2", "I")) || // hide HH AY D 107 108 (next=xlate(p, "B" , "b")) || // be B IY 109 (next=xlate(p, "CH" , "C")) || // cheese CH IY Z 110 (next=xlate(p, "D" , "d")) || // dee D IY 111 (next=xlate(p, "DH" , "D")) || // thee DH IY 112 113 (next=xlate(p, "EH1 R", ",r")) || // Ed EH D 114 (next=xlate(p, "EH0", "c")) || // Ed EH D - from jean 115 (next=xlate(p, "EH1", "e")) || // Ed EH D 116 (next=xlate(p, "EH2", "e")) || // Ed EH D 117 118 (next=xlate(p, "ER0", "P")) || // hurt HH ER T 119 (next=xlate(p, "ER1", "V")) || // hurt HH ER T 120 (next=xlate(p, "ER2", "V")) || // hurt HH ER T 121 122 (next=xlate(p, "EY0", "A")) || // ate EY T 123 (next=xlate(p, "EY1", "A")) || // ate EY T 124 (next=xlate(p, "EY2", "A")) || // ate EY T 125 126 (next=xlate(p, "F" , "f")) || // fee F IY 127 (next=xlate(p, "G" , "g")) || // green G R IY N 128 (next=xlate(p, "HH" , "h")) || // he HH IY 129 130 (next=xlate(p, "IH0", "6")) || // it IH T 131 (next=xlate(p, "IH1", "i")) || // it IH T 132 (next=xlate(p, "IH2", "i")) || // it IH T 133 134 (next=xlate(p, "IY0", "/")) || // eat IY T - from jean 135 (next=xlate(p, "IY1", "E")) || // eat IY T 136 (next=xlate(p, "IY2", "E")) || // eat IY T 137 138 (next=xlate(p, "JH" , "j")) || // gee JH IY 139 (next=xlate(p, "K" , "k")) || // key K IY 140 (next=xlate(p, "L" , "l")) || // lee L IY 141 (next=xlate(p, "M" , "m")) || // me M IY 142 (next=xlate(p, "N" , "n")) || // knee N IY 143 (next=xlate(p, "NG" , "N")) || // ping P IH NG 144 145 (next=xlate(p, "OW0", "]")) || // oat OW T 146 (next=xlate(p, "OW1", "O")) || // oat OW T 147 (next=xlate(p, "OW2", "O")) || // oat OW T 148 149 (next=xlate(p, "OY0", "<")) || // toy T OY 150 (next=xlate(p, "OY1", "<")) || // toy T OY 151 (next=xlate(p, "OY2", "<")) || // toy T OY 152 153 (next=xlate(p, "P" , "p")) || // pee P IY 154 (next=xlate(p, "R" , "r")) || // read R IY D 155 (next=xlate(p, "S" , "s")) || // sea S IY 156 (next=xlate(p, "SH" , "S")) || // she SH IY 157 (next=xlate(p, "T" , "t")) || // tea T IY 158 (next=xlate(p, "TH" , "T")) || // theta TH EY T AH 159 160 (next=xlate(p, "UH0", "q")) || // hood HH UH D 161 (next=xlate(p, "UH1", "q")) || // hood HH UH D 162 (next=xlate(p, "UH2", "q")) || // hood HH UH D 163 164 (next=xlate(p, "UW0", "U")) || // two T UW 165 (next=xlate(p, "UW1", "U")) || // two T UW 166 (next=xlate(p, "UW2", "U")) || // two T UW 167 168 (next=xlate(p, "V" , "v")) || // vee V IY 169 (next=xlate(p, "W" , "w")) || // we W IY 170 (next=xlate(p, "Y" , "y")) || // yield Y IY L D 171 (next=xlate(p, "Z" , "z")) || // zee Z IY 172 (next=xlate(p, "ZH" , "Z")) || // seizure S IY ZH ER 173 0) { 174 p = next; 175 } 176 else { 177 fprintf(stderr, "can't pronounce line %d: %s", lineno, p); 178 break; 179 } 180 181 } 182 183 fputc('\n', stdout); 184 185 } 186 } 187