1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************** 6 ************************************************************************ 7 * Date Name Description 8 * 02/28/2001 aliu Creation 9 * 03/01/2001 George port to HP/UX 10 ************************************************************************/ 11 12 #include "unicode/utypes.h" 13 14 #if !UCONFIG_NO_TRANSLITERATION 15 16 #include "jamotest.h" 17 #include "unicode/utypes.h" 18 #include "unicode/translit.h" 19 #include "cpdtrans.h" 20 21 // SEP is the disambiguation separator used by Latin-Jamo and Jamo-Latin 22 #define SEP "-" 23 24 JamoTest::JamoTest() 25 { 26 UParseError parseError; 27 UErrorCode status = U_ZERO_ERROR; 28 NAME_JAMO = Transliterator::createFromRules("Name-Jamo", 29 UnicodeString(JAMO_NAMES_RULES, -1, US_INV), 30 UTRANS_FORWARD, parseError, status); 31 32 if (U_FAILURE(status)) { 33 delete NAME_JAMO; 34 NAME_JAMO = NULL; 35 } 36 status = U_ZERO_ERROR; 37 JAMO_NAME = Transliterator::createFromRules("Jamo-Name", 38 UnicodeString(JAMO_NAMES_RULES, -1, US_INV), 39 UTRANS_REVERSE, parseError, status); 40 if (U_FAILURE(status)) { 41 delete JAMO_NAME; 42 JAMO_NAME = NULL; 43 } 44 } 45 46 JamoTest::~JamoTest() 47 { 48 delete NAME_JAMO; 49 delete JAMO_NAME; 50 } 51 52 void 53 JamoTest::runIndexedTest(int32_t index, UBool exec, 54 const char* &name, char* /*par*/) { 55 switch (index) { 56 TESTCASE(0,TestJamo); 57 TESTCASE(1,TestRealText); 58 TESTCASE(2,TestPiecemeal); 59 default: name = ""; break; 60 } 61 } 62 63 void 64 JamoTest::TestJamo() { 65 UParseError parseError; 66 UErrorCode status = U_ZERO_ERROR; 67 Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status); 68 69 if (latinJamo == 0 || U_FAILURE(status)) { 70 dataerrln("FAIL: createInstance() returned 0 - %s", u_errorName(status)); 71 return; 72 } 73 74 Transliterator* jamoLatin = latinJamo->createInverse(status); 75 76 if (jamoLatin == 0) { 77 delete latinJamo; 78 errln("FAIL: createInverse() returned 0"); 79 return; 80 } 81 82 static const char* CASE[] = { 83 // Column 1 is the latin text L1 to be fed to Latin-Jamo 84 // to yield output J. 85 86 // Column 2 is expected value of J. J is fed to 87 // Jamo-Latin to yield output L2. 88 89 // Column 3 is expected value of L2. If the expected 90 // value of L2 is L1, then L2 is NULL. 91 92 // add tests for the update to fix problems where it didn't follow the standard 93 // see also http://www.unicode.org/cldr/data/charts/transforms/Latin-Hangul.html 94 "gach", "(Gi)(A)(Cf)", NULL, 95 "geumhui", "(Gi)(EU)(Mf)(Hi)(YI)", NULL, 96 "choe", "(Ci)(OE)", NULL, 97 "wo", "(IEUNG)(WEO)", NULL, 98 "Wonpil", "(IEUNG)(WEO)(Nf)(Pi)(I)(L)", "wonpil", 99 "GIPPEUM", "(Gi)(I)(BB)(EU)(Mf)", "gippeum", 100 "EUTTEUM", "(IEUNG)(EU)(DD)(EU)(Mf)", "eutteum", 101 "KKOTNAE", "(GGi)(O)(Tf)(Ni)(AE)", "kkotnae", 102 "gaga", "(Gi)(A)(Gi)(A)", NULL, 103 "gag-a", "(Gi)(A)(Gf)(IEUNG)(A)", NULL, 104 "gak-ka", "(Gi)(A)(Kf)(Ki)(A)", NULL, 105 "gakka", "(Gi)(A)(GGi)(A)", NULL, 106 "gakk-a", "(Gi)(A)(GGf)(IEUNG)(A)", NULL, 107 "gakkka", "(Gi)(A)(GGf)(Ki)(A)", NULL, 108 "gak-kka", "(Gi)(A)(Kf)(GGi)(A)", NULL, 109 110 "bab", "(Bi)(A)(Bf)", NULL, 111 "babb", "(Bi)(A)(Bf)(Bi)(EU)", "babbeu", 112 "babbba", "(Bi)(A)(Bf)(Bi)(EU)(Bi)(A)", "babbeuba", 113 "bagg", "(Bi)(A)(Gf)(Gi)(EU)", "baggeu", 114 "baggga", "(Bi)(A)(Gf)(Gi)(EU)(Gi)(A)", "baggeuga", 115 //"bag" SEP "gga", "(Bi)(A)(Gf)" SEP "(Gi)(EU)(Gi)(A)", "bag" SEP "geuga", 116 "kabsa", "(Ki)(A)(Bf)(Si)(A)", NULL, 117 "kabska", "(Ki)(A)(BS)(Ki)(A)", NULL, 118 "gabsbka", "(Gi)(A)(BS)(Bi)(EU)(Ki)(A)", "gabsbeuka", // not (Kf) 119 "gga", "(Gi)(EU)(Gi)(A)", "geuga", 120 "bsa", "(Bi)(EU)(Si)(A)", "beusa", 121 "agg", "(IEUNG)(A)(Gf)(Gi)(EU)", "aggeu", 122 "agga", "(IEUNG)(A)(Gf)(Gi)(A)", NULL, 123 "la", "(R)(A)", NULL, 124 "bs", "(Bi)(EU)(Sf)", "beus", 125 "kalgga", "(Ki)(A)(L)(Gi)(EU)(Gi)(A)", "kalgeuga", 126 127 // 'r' in a final position is treated like 'l' 128 "karka", "(Ki)(A)(L)(Ki)(A)", "kalka", 129 }; 130 131 enum { CASE_length = sizeof(CASE) / sizeof(CASE[0]) }; 132 133 int32_t i; 134 for (i=0; i<CASE_length; i+=3) { 135 UnicodeString jamo = nameToJamo(CASE[i+1]); 136 if (CASE[i+2] == NULL) { 137 expect(*latinJamo, CASE[i], jamo, *jamoLatin); 138 } else { 139 // Handle case where round-trip is expected to fail 140 expect(*latinJamo, CASE[i], jamo); 141 expect(*jamoLatin, jamo, CASE[i+2]); 142 } 143 } 144 145 delete latinJamo; 146 delete jamoLatin; 147 } 148 149 /** 150 * Test various step-at-a-time transformation of hangul to jamo to 151 * latin and back. 152 */ 153 void JamoTest::TestPiecemeal(void) { 154 UnicodeString hangul; hangul.append((UChar)0xBC0F); 155 UnicodeString jamo = nameToJamo("(Mi)(I)(Cf)"); 156 UnicodeString latin("mic"); 157 UnicodeString latin2("mich"); 158 159 Transliterator *t = NULL; 160 UErrorCode status = U_ZERO_ERROR; 161 162 t = Transliterator::createInstance("NFD", UTRANS_FORWARD, status); // was Hangul-Jamo 163 if (U_FAILURE(status) || t == 0) { 164 dataerrln("FAIL: createInstance failed"); 165 return; 166 } 167 expect(*t, hangul, jamo); 168 delete t; 169 170 t = Transliterator::createInstance("NFC", UTRANS_FORWARD, status); // was Jamo-Hangul 171 if (U_FAILURE(status) || t == 0) { 172 errln("FAIL: createInstance failed"); 173 return; 174 } 175 expect(*t, jamo, hangul); 176 delete t; 177 178 t = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, status); 179 if (U_FAILURE(status) || t == 0) { 180 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); 181 return; 182 } 183 expect(*t, latin, jamo); 184 delete t; 185 186 t = Transliterator::createInstance("Jamo-Latin", UTRANS_FORWARD, status); 187 if (U_FAILURE(status) || t == 0) { 188 errln("FAIL: createInstance failed"); 189 return; 190 } 191 expect(*t, jamo, latin2); 192 delete t; 193 194 t = Transliterator::createInstance("Hangul-Latin", UTRANS_FORWARD, status); 195 if (U_FAILURE(status) || t == 0) { 196 errln("FAIL: createInstance failed"); 197 return; 198 } 199 expect(*t, hangul, latin2); 200 delete t; 201 202 t = Transliterator::createInstance("Latin-Hangul", UTRANS_FORWARD, status); 203 if (U_FAILURE(status) || t == 0) { 204 errln("FAIL: createInstance failed"); 205 return; 206 } 207 expect(*t, latin, hangul); 208 delete t; 209 210 t = Transliterator::createInstance("Hangul-Latin; Latin-Jamo", UTRANS_FORWARD, status); 211 if (U_FAILURE(status) || t == 0) { 212 errln("FAIL: createInstance failed"); 213 return; 214 } 215 expect(*t, hangul, jamo); 216 delete t; 217 218 t = Transliterator::createInstance("Jamo-Latin; Latin-Hangul", UTRANS_FORWARD, status); 219 if (U_FAILURE(status) || t == 0) { 220 errln("FAIL: createInstance failed"); 221 return; 222 } 223 expect(*t, jamo, hangul); 224 delete t; 225 226 t = Transliterator::createInstance("Hangul-Latin; Latin-Hangul", UTRANS_FORWARD, status); 227 if (U_FAILURE(status) || t == 0) { 228 errln("FAIL: createInstance failed"); 229 return; 230 } 231 expect(*t, hangul, hangul); 232 delete t; 233 } 234 235 void 236 JamoTest::TestRealText() { 237 // Test text taken from the Unicode web site 238 static const char* const WHAT_IS_UNICODE[] = { 239 "\\uc720\\ub2c8\\ucf54\\ub4dc\\uc5d0", "\\ub300\\ud574", "?", 240 241 "\\uc5b4\\ub5a4", "\\ud50c\\ub7ab\\ud3fc,", "\\uc5b4\\ub5a4", 242 "\\ud504\\ub85c\\uadf8\\ub7a8,", "\\uc5b4\\ub5a4", "\\uc5b8\\uc5b4\\uc5d0\\ub3c4", 243 "\\uc0c1\\uad00\\uc5c6\\uc774", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", "\\ubaa8\\ub4e0", 244 "\\ubb38\\uc790\\uc5d0", "\\ub300\\ud574", "\\uace0\\uc720", "\\ubc88\\ud638\\ub97c", 245 "\\uc81c\\uacf5\\ud569\\ub2c8\\ub2e4.", 246 247 "\\uae30\\ubcf8\\uc801\\uc73c\\ub85c", "\\ucef4\\ud4e8\\ud130\\ub294", 248 "\\uc22b\\uc790\\ub9cc", "\\ucc98\\ub9ac\\ud569\\ub2c8\\ub2e4.", "\\uae00\\uc790\\ub098", 249 "\\ub2e4\\ub978", "\\ubb38\\uc790\\uc5d0\\ub3c4", "\\uc22b\\uc790\\ub97c", 250 "\\uc9c0\\uc815\\ud558\\uc5ec", 251 "\\uc800\\uc7a5\\ud569\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\uac00", 252 "\\uac1c\\ubc1c\\ub418\\uae30", "\\uc804\\uc5d0\\ub294", "\\uc774\\ub7ec\\ud55c", 253 "\\uc22b\\uc790\\ub97c", "\\uc9c0\\uc815\\ud558\\uae30", "\\uc704\\ud574", "\\uc218\\ubc31", 254 "\\uac00\\uc9c0\\uc758", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", 255 "\\uc2dc\\uc2a4\\ud15c\\uc744", 256 "\\uc0ac\\uc6a9\\ud588\\uc2b5\\ub2c8\\ub2e4.", "\\ub2e8\\uc77c", "\\uae30\\ud638\\ud654", 257 "\\ubc29\\ubc95\\uc73c\\ub85c\\ub294", "\\ubaa8\\ub4e0", "\\ubb38\\uc790\\ub97c", 258 "\\ud3ec\\ud568\\ud560", "\\uc218", "\\uc5c6\\uc5c8\\uc2b5\\ub2c8\\ub2e4.", "\\uc608\\ub97c", 259 "\\ub4e4\\uc5b4", "\\uc720\\ub7fd", "\\uc5f0\\ud569\\uc5d0\\uc11c\\ub9cc", 260 "\\ubcf4\\ub354\\ub77c\\ub3c4", "\\ubaa8\\ub4e0", "\\uac01", "\\ub098\\ub77c\\ubcc4", 261 "\\uc5b8\\uc5b4\\ub97c", "\\ucc98\\ub9ac\\ud558\\ub824\\uba74", "\\uc5ec\\ub7ec", 262 "\\uac1c\\uc758", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774", 263 "\\ud544\\uc694\\ud569\\ub2c8\\ub2e4.", "\\uc601\\uc5b4\\uc640", "\\uac19\\uc740", 264 "\\ub2e8\\uc77c", "\\uc5b8\\uc5b4\\uc758", "\\uacbd\\uc6b0\\ub3c4", 265 "\\uacf5\\ud1b5\\uc801\\uc73c\\ub85c", "\\uc0ac\\uc6a9\\ub418\\ub294", "\\ubaa8\\ub4e0", 266 "\\uae00\\uc790,", "\\ubb38\\uc7a5", "\\ubd80\\ud638", "\\ubc0f", 267 "\\ud14c\\ud06c\\ub2c8\\uceec", "\\uae30\\ud638\\uc5d0", "\\ub9de\\ub294", "\\ub2e8\\uc77c", 268 "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc744", "\\uac16\\uace0", "\\uc788\\uc9c0", 269 "\\ubabb\\ud558\\uc600\\uc2b5\\ub2c8\\ub2e4.", 270 271 "\\uc774\\ub7ec\\ud55c", "\\uae30\\ud638\\ud654", "\\uc2dc\\uc2a4\\ud15c\\uc740", 272 "\\ub610\\ud55c", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\uc2dc\\uc2a4\\ud15c\\uacfc", 273 "\\ucda9\\ub3cc\\ud569\\ub2c8\\ub2e4.", "\\uc989", "\\ub450", "\\uac00\\uc9c0", 274 "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774", "\\ub450", "\\uac1c\\uc758", "\\ub2e4\\ub978", 275 "\\ubb38\\uc790\\uc5d0", "\\ub300\\ud574", "\\uac19\\uc740", "\\ubc88\\ud638\\ub97c", 276 "\\uc0ac\\uc6a9\\ud558\\uac70\\ub098", "\\uac19\\uc740", "\\ubb38\\uc790\\uc5d0", 277 "\\ub300\\ud574", "\\ub2e4\\ub978", "\\ubc88\\ud638\\ub97c", "\\uc0ac\\uc6a9\\ud560", "\\uc218", 278 "\\uc788\\uc2b5\\ub2c8\\ub2e4.", "\\uc8fc\\uc5b4\\uc9c4", "\\ubaa8\\ub4e0", 279 "\\ucef4\\ud4e8\\ud130(\\ud2b9\\ud788", "\\uc11c\\ubc84)\\ub294", "\\uc11c\\ub85c", 280 "\\ub2e4\\ub978", "\\uc5ec\\ub7ec", "\\uac00\\uc9c0", "\\uae30\\ud638\\ud654", 281 "\\ubc29\\ubc95\\uc744", "\\uc9c0\\uc6d0\\ud574\\uc57c", 282 "\\ud569\\ub2c8\\ub2e4.", "\\uadf8\\ub7ec\\ub098,", "\\ub370\\uc774\\ud130\\ub97c", 283 "\\uc11c\\ub85c", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774\\ub098", 284 "\\ud50c\\ub7ab\\ud3fc", "\\uac04\\uc5d0", "\\uc804\\ub2ec\\ud560", "\\ub54c\\ub9c8\\ub2e4", 285 "\\uadf8", "\\ub370\\uc774\\ud130\\ub294", "\\ud56d\\uc0c1", "\\uc190\\uc0c1\\uc758", 286 "\\uc704\\ud5d8\\uc744", "\\uacaa\\uac8c", "\\ub429\\ub2c8\\ub2e4.", 287 288 "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub85c", "\\ubaa8\\ub4e0", "\\uac83\\uc744", 289 "\\ud574\\uacb0\\ud560", "\\uc218", "\\uc788\\uc2b5\\ub2c8\\ub2e4!", 290 "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", "\\uc0ac\\uc6a9", "\\uc911\\uc778", 291 "\\ud50c\\ub7ab\\ud3fc,", "\\ud504\\ub85c\\uadf8\\ub7a8,", "\\uc5b8\\uc5b4\\uc5d0", 292 "\\uad00\\uacc4\\uc5c6\\uc774", "\\ubb38\\uc790\\ub9c8\\ub2e4", "\\uace0\\uc720\\ud55c", 293 "\\uc22b\\uc790\\ub97c", 294 "\\uc81c\\uacf5\\ud569\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc", 295 "\\ud45c\\uc900\\uc740", // "Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, " 296 // "Sun, Sybase, Unisys " 297 "\\ubc0f", "\\uae30\\ud0c0", "\\uc5ec\\ub7ec", 298 "\\ud68c\\uc0ac\\uc640", "\\uac19\\uc740", "\\uc5c5\\uacc4", 299 "\\uc120\\ub450\\uc8fc\\uc790\\uc5d0", "\\uc758\\ud574", 300 "\\ucc44\\ud0dd\\ub418\\uc5c8\\uc2b5\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", 301 // "XML, Java, ECMAScript(JavaScript), LDAP, CORBA 3.0, WML " 302 "\\ub4f1\\uacfc", 303 "\\uac19\\uc774", "\\ud604\\uc7ac", "\\ub110\\ub9ac", "\\uc0ac\\uc6a9\\ub418\\ub294", 304 "\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud544\\uc694\\ud558\\uba70", "\\uc774\\ub294", // "ISO/IEC", 305 "10646\\uc744", "\\uad6c\\ud604\\ud558\\ub294", "\\uacf5\\uc2dd\\uc801\\uc778", 306 "\\ubc29\\ubc95\\uc785\\ub2c8\\ub2e4.", "\\uc774\\ub294", "\\ub9ce\\uc740", "\\uc6b4\\uc601", 307 "\\uccb4\\uc81c,", "\\uc694\\uc998", "\\uc0ac\\uc6a9\\ub418\\ub294", "\\ubaa8\\ub4e0", 308 "\\ube0c\\ub77c\\uc6b0\\uc800", "\\ubc0f", "\\uae30\\ud0c0", "\\ub9ce\\uc740", 309 "\\uc81c\\ud488\\uc5d0\\uc11c", 310 "\\uc9c0\\uc6d0\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc", 311 "\\ud45c\\uc900\\uc758", "\\ubd80\\uc0c1\\uacfc", "\\uc774\\ub97c", 312 "\\uc9c0\\uc6d0\\ud558\\ub294", "\\ub3c4\\uad6c\\uc758", "\\uac00\\uc6a9\\uc131\\uc740", 313 "\\ucd5c\\uadfc", "\\uc804", "\\uc138\\uacc4\\uc5d0", "\\ubd88\\uace0", "\\uc788\\ub294", 314 "\\uae30\\uc220", "\\uacbd\\ud5a5\\uc5d0\\uc11c", "\\uac00\\uc7a5", "\\uc911\\uc694\\ud55c", 315 "\\ubd80\\ubd84\\uc744", "\\ucc28\\uc9c0\\ud558\\uace0", "\\uc788\\uc2b5\\ub2c8\\ub2e4.", 316 317 "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub97c", 318 // Replaced a hyphen with a space to make the test case work with CLDR1.5 319 //"\\ud074\\ub77c\\uc774\\uc5b8\\ud2b8-\\uc11c\\ubc84", "\\ub610\\ub294", 320 "\\ud074\\ub77c\\uc774\\uc5b8\\ud2b8 \\uc11c\\ubc84", "\\ub610\\ub294", 321 // Replaced a hyphen with a space. 322 //"\\ub2e4\\uc911-\\uc5f0\\uacb0", "\\uc751\\uc6a9", "\\ud504\\ub85c\\uadf8\\ub7a8\\uacfc", 323 "\\ub2e4\\uc911 \\uc5f0\\uacb0", "\\uc751\\uc6a9", "\\ud504\\ub85c\\uadf8\\ub7a8\\uacfc", 324 "\\uc6f9", "\\uc0ac\\uc774\\ud2b8\\uc5d0", "\\ud1b5\\ud569\\ud558\\uba74", 325 "\\ub808\\uac70\\uc2dc", "\\ubb38\\uc790", "\\uc138\\ud2b8", "\\uc0ac\\uc6a9\\uc5d0", 326 "\\uc788\\uc5b4\\uc11c", "\\uc0c1\\ub2f9\\ud55c", "\\ube44\\uc6a9", "\\uc808\\uac10", 327 "\\ud6a8\\uacfc\\uac00", 328 "\\ub098\\ud0c0\\ub0a9\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub97c", 329 "\\ud1b5\\ud574", "\\ub9ac\\uc5d4\\uc9c0\\ub2c8\\uc5b4\\ub9c1", "\\uc5c6\\uc774", 330 "\\ub2e4\\uc911", "\\ud50c\\ub7ab\\ud3fc,", "\\uc5b8\\uc5b4", "\\ubc0f", "\\uad6d\\uac00", 331 "\\uac04\\uc5d0", "\\ub2e8\\uc77c", "\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4", 332 "\\ud50c\\ub7ab\\ud3fc", "\\ub610\\ub294", "\\ub2e8\\uc77c", "\\uc6f9", 333 "\\uc0ac\\uc774\\ud2b8\\ub97c", "\\ubaa9\\ud45c\\ub85c", "\\uc0bc\\uc744", "\\uc218", 334 "\\uc788\\uc2b5\\ub2c8\\ub2e4.", "\\uc774\\ub97c", "\\uc0ac\\uc6a9\\ud558\\uba74", 335 "\\ub370\\uc774\\ud130\\ub97c", "\\uc190\\uc0c1", "\\uc5c6\\uc774", "\\uc5ec\\ub7ec", 336 "\\uc2dc\\uc2a4\\ud15c\\uc744", "\\ud1b5\\ud574", "\\uc804\\uc1a1\\ud560", "\\uc218", 337 "\\uc788\\uc2b5\\ub2c8\\ub2e4.", 338 339 "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc5d0", "\\ub300\\ud574", 340 "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc740", 341 "\\ube44\\uc601\\ub9ac", "\\uc870\\uc9c1\\uc73c\\ub85c\\uc11c", "\\ud604\\ub300", 342 "\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4", "\\uc81c\\ud488\\uacfc", 343 "\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud14d\\uc2a4\\ud2b8\\uc758", "\\ud45c\\ud604\\uc744", 344 "\\uc9c0\\uc815\\ud558\\ub294", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc758", 345 "\\uc0ac\\uc6a9\\uc744", "\\uac1c\\ubc1c\\ud558\\uace0", "\\ud655\\uc7a5\\ud558\\uba70", 346 "\\uc7a5\\ub824\\ud558\\uae30", "\\uc704\\ud574", 347 "\\uc138\\uc6cc\\uc84c\\uc2b5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4", 348 "\\uba64\\ubc84\\uc27d\\uc740", "\\ucef4\\ud4e8\\ud130\\uc640", "\\uc815\\ubcf4", 349 "\\ucc98\\ub9ac", "\\uc0b0\\uc5c5\\uc5d0", "\\uc885\\uc0ac\\ud558\\uace0", "\\uc788\\ub294", 350 "\\uad11\\ubc94\\uc704\\ud55c", "\\ud68c\\uc0ac", "\\ubc0f", "\\uc870\\uc9c1\\uc758", 351 "\\ubc94\\uc704\\ub97c", 352 "\\ub098\\ud0c0\\ub0c5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc758", 353 "\\uc7ac\\uc815\\uc740", "\\uc804\\uc801\\uc73c\\ub85c", "\\ud68c\\ube44\\uc5d0", 354 "\\uc758\\ud574", "\\ucda9\\ub2f9\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc", 355 "\\ucee8\\uc18c\\uc2dc\\uc5c4\\uc5d0\\uc11c\\uc758", "\\uba64\\ubc84\\uc27d\\uc740", 356 "\\uc804", "\\uc138\\uacc4", "\\uc5b4\\ub290", "\\uacf3\\uc5d0\\uc11c\\ub098", 357 "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc744", "\\uc9c0\\uc6d0\\ud558\\uace0", 358 "\\uadf8", "\\ud655\\uc7a5\\uacfc", "\\uad6c\\ud604\\uc744", 359 "\\uc9c0\\uc6d0\\ud558\\uace0\\uc790\\ud558\\ub294", "\\uc870\\uc9c1\\uacfc", 360 "\\uac1c\\uc778\\uc5d0\\uac8c", "\\uac1c\\ubc29\\ub418\\uc5b4", 361 "\\uc788\\uc2b5\\ub2c8\\ub2e4.", 362 363 "\\ub354", "\\uc790\\uc138\\ud55c", "\\ub0b4\\uc6a9\\uc740", "\\uc6a9\\uc5b4\\uc9d1,", 364 "\\uc608\\uc81c", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\uc0ac\\uc6a9", "\\uac00\\ub2a5", 365 "\\uc81c\\ud488,", "\\uae30\\uc220", "\\uc815\\ubcf4", "\\ubc0f", "\\uae30\\ud0c0", 366 "\\uc720\\uc6a9\\ud55c", "\\uc815\\ubcf4\\ub97c", 367 "\\ucc38\\uc870\\ud558\\uc2ed\\uc2dc\\uc624." 368 }; 369 370 enum { WHAT_IS_UNICODE_length = sizeof(WHAT_IS_UNICODE) / sizeof(WHAT_IS_UNICODE[0]) }; 371 372 UParseError parseError; 373 UErrorCode status = U_ZERO_ERROR; 374 Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status); 375 Transliterator* jamoHangul = Transliterator::createInstance("NFC(NFD)", UTRANS_FORWARD, parseError, status); 376 if (latinJamo == 0 || jamoHangul == 0 || U_FAILURE(status)) { 377 delete latinJamo; 378 delete jamoHangul; 379 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status)); 380 return; 381 } 382 Transliterator* jamoLatin = latinJamo->createInverse(status); 383 Transliterator* hangulJamo = jamoHangul->createInverse(status); 384 if (jamoLatin == 0 || hangulJamo == 0) { 385 errln("FAIL: createInverse returned NULL"); 386 delete latinJamo; 387 delete jamoLatin; 388 delete jamoHangul; 389 delete hangulJamo; 390 return; 391 } 392 393 Transliterator* tarray[4] = 394 { hangulJamo, jamoLatin, latinJamo, jamoHangul }; 395 CompoundTransliterator rt(tarray, 4); 396 397 UnicodeString buf; 398 int32_t total = 0; 399 int32_t errors = 0; 400 int32_t i; 401 for (i=0; i < WHAT_IS_UNICODE_length; ++i) { 402 ++total; 403 UnicodeString hangul = UnicodeString(WHAT_IS_UNICODE[i], -1, US_INV); 404 hangul = hangul.unescape(); // Parse backslash-u escapes 405 UnicodeString hangulX = hangul; 406 rt.transliterate(hangulX); 407 if (hangul != hangulX) { 408 ++errors; 409 UnicodeString jamo = hangul; hangulJamo->transliterate(jamo); 410 UnicodeString latin = jamo; jamoLatin->transliterate(latin); 411 UnicodeString jamo2 = latin; latinJamo->transliterate(jamo2); 412 UnicodeString hangul2 = jamo2; jamoHangul->transliterate(hangul2); 413 414 buf.remove(0); 415 buf.append("FAIL: "); 416 if (hangul2 != hangulX) { 417 buf.append((UnicodeString)"(Weird: " + hangulX + " != " + hangul2 + ")"); 418 } 419 // The Hangul-Jamo conversion is not usually the 420 // bug here, so we hide it from display. 421 // Uncomment lines to see the Hangul. 422 buf.append(//hangul + " => " + 423 jamoToName(jamo) + " => " + 424 latin + " => " + jamoToName(jamo2) 425 //+ " => " + hangul2 426 ); 427 errln(prettify(buf)); 428 } 429 } 430 if (errors != 0) { 431 errln((UnicodeString)"Test word failures: " + errors + " out of " + total); 432 } else { 433 logln((UnicodeString)"All " + total + " test words passed"); 434 } 435 436 delete latinJamo; 437 delete jamoLatin; 438 delete jamoHangul; 439 delete hangulJamo; 440 } 441 442 // Override TransliteratorTest 443 void 444 JamoTest::expectAux(const UnicodeString& tag, 445 const UnicodeString& summary, UBool pass, 446 const UnicodeString& expectedResult) { 447 UnicodeString jsum = jamoToName(summary); 448 UnicodeString jexp = jamoToName(expectedResult); 449 TransliteratorTest::expectAux(tag, jsum, pass, jexp); 450 } 451 452 const char* JamoTest::JAMO_NAMES_RULES = 453 "'(Gi)' <> \\u1100;" 454 "'(GGi)' <> \\u1101;" 455 "'(Ni)' <> \\u1102;" 456 "'(Di)' <> \\u1103;" 457 "'(DD)' <> \\u1104;" 458 "'(R)' <> \\u1105;" 459 "'(Mi)' <> \\u1106;" 460 "'(Bi)' <> \\u1107;" 461 "'(BB)' <> \\u1108;" 462 "'(Si)' <> \\u1109;" 463 "'(SSi)' <> \\u110A;" 464 "'(IEUNG)' <> \\u110B;" 465 "'(Ji)' <> \\u110C;" 466 "'(JJ)' <> \\u110D;" 467 "'(Ci)' <> \\u110E;" 468 "'(Ki)' <> \\u110F;" 469 "'(Ti)' <> \\u1110;" 470 "'(Pi)' <> \\u1111;" 471 "'(Hi)' <> \\u1112;" 472 473 "'(A)' <> \\u1161;" 474 "'(AE)' <> \\u1162;" 475 "'(YA)' <> \\u1163;" 476 "'(YAE)' <> \\u1164;" 477 "'(EO)' <> \\u1165;" 478 "'(E)' <> \\u1166;" 479 "'(YEO)' <> \\u1167;" 480 "'(YE)' <> \\u1168;" 481 "'(O)' <> \\u1169;" 482 "'(WA)' <> \\u116A;" 483 "'(WAE)' <> \\u116B;" 484 "'(OE)' <> \\u116C;" 485 "'(YO)' <> \\u116D;" 486 "'(U)' <> \\u116E;" 487 "'(WEO)' <> \\u116F;" 488 "'(WE)' <> \\u1170;" 489 "'(WI)' <> \\u1171;" 490 "'(YU)' <> \\u1172;" 491 "'(EU)' <> \\u1173;" 492 "'(YI)' <> \\u1174;" 493 "'(I)' <> \\u1175;" 494 495 "'(Gf)' <> \\u11A8;" 496 "'(GGf)' <> \\u11A9;" 497 "'(GS)' <> \\u11AA;" 498 "'(Nf)' <> \\u11AB;" 499 "'(NJ)' <> \\u11AC;" 500 "'(NH)' <> \\u11AD;" 501 "'(Df)' <> \\u11AE;" 502 "'(L)' <> \\u11AF;" 503 "'(LG)' <> \\u11B0;" 504 "'(LM)' <> \\u11B1;" 505 "'(LB)' <> \\u11B2;" 506 "'(LS)' <> \\u11B3;" 507 "'(LT)' <> \\u11B4;" 508 "'(LP)' <> \\u11B5;" 509 "'(LH)' <> \\u11B6;" 510 "'(Mf)' <> \\u11B7;" 511 "'(Bf)' <> \\u11B8;" 512 "'(BS)' <> \\u11B9;" 513 "'(Sf)' <> \\u11BA;" 514 "'(SSf)' <> \\u11BB;" 515 "'(NG)' <> \\u11BC;" 516 "'(Jf)' <> \\u11BD;" 517 "'(Cf)' <> \\u11BE;" 518 "'(Kf)' <> \\u11BF;" 519 "'(Tf)' <> \\u11C0;" 520 "'(Pf)' <> \\u11C1;" 521 "'(Hf)' <> \\u11C2;"; 522 523 /** 524 * Convert short names to actual jamo. E.g., "x(LG)y" returns 525 * "x\u11B0y". See JAMO_NAMES for table of names. 526 */ 527 UnicodeString 528 JamoTest::nameToJamo(const UnicodeString& input) { 529 if (NAME_JAMO == 0) { 530 errln("Failed to create NAME_JAMO"); 531 return input; /* failure! */ 532 } 533 UnicodeString result(input); 534 NAME_JAMO->transliterate(result); 535 return result; 536 } 537 538 /** 539 * Convert jamo to short names. E.g., "x\u11B0y" returns 540 * "x(LG)y". See JAMO_NAMES for table of names. 541 */ 542 UnicodeString 543 JamoTest::jamoToName(const UnicodeString& input) { 544 if (NAME_JAMO == 0) { 545 errln("Failed to create NAME_JAMO"); 546 return input; /* failure! */ 547 } 548 UnicodeString result(input); 549 JAMO_NAME->transliterate(result); 550 return result; 551 } 552 553 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 554