1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1997-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************** 8 ************************************************************************ 9 * Date Name Description 10 * 02/28/2001 aliu Creation 11 * 03/01/2001 George port to HP/UX 12 ************************************************************************/ 13 14 #include "unicode/utypes.h" 15 16 #if !UCONFIG_NO_TRANSLITERATION 17 18 #include "jamotest.h" 19 #include "unicode/utypes.h" 20 #include "unicode/translit.h" 21 #include "cmemory.h" 22 #include "cpdtrans.h" 23 24 // SEP is the disambiguation separator used by Latin-Jamo and Jamo-Latin 25 #define SEP "-" 26 27 JamoTest::JamoTest() 28 { 29 UParseError parseError; 30 UErrorCode status = U_ZERO_ERROR; 31 NAME_JAMO = Transliterator::createFromRules("Name-Jamo", 32 UnicodeString(JAMO_NAMES_RULES, -1, US_INV), 33 UTRANS_FORWARD, parseError, status); 34 35 if (U_FAILURE(status)) { 36 delete NAME_JAMO; 37 NAME_JAMO = NULL; 38 } 39 status = U_ZERO_ERROR; 40 JAMO_NAME = Transliterator::createFromRules("Jamo-Name", 41 UnicodeString(JAMO_NAMES_RULES, -1, US_INV), 42 UTRANS_REVERSE, parseError, status); 43 if (U_FAILURE(status)) { 44 delete JAMO_NAME; 45 JAMO_NAME = NULL; 46 } 47 } 48 49 JamoTest::~JamoTest() 50 { 51 delete NAME_JAMO; 52 delete JAMO_NAME; 53 } 54 55 void 56 JamoTest::runIndexedTest(int32_t index, UBool exec, 57 const char* &name, char* /*par*/) { 58 switch (index) { 59 TESTCASE(0,TestJamo); 60 TESTCASE(1,TestRealText); 61 TESTCASE(2,TestPiecemeal); 62 default: name = ""; break; 63 } 64 } 65 66 void 67 JamoTest::TestJamo() { 68 UParseError parseError; 69 UErrorCode status = U_ZERO_ERROR; 70 Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status); 71 72 if (latinJamo == 0 || U_FAILURE(status)) { 73 dataerrln("FAIL: createInstance() returned 0 - %s", u_errorName(status)); 74 return; 75 } 76 77 Transliterator* jamoLatin = latinJamo->createInverse(status); 78 79 if (jamoLatin == 0) { 80 delete latinJamo; 81 errln("FAIL: createInverse() returned 0"); 82 return; 83 } 84 85 static const char* CASE[] = { 86 // Column 1 is the latin text L1 to be fed to Latin-Jamo 87 // to yield output J. 88 89 // Column 2 is expected value of J. J is fed to 90 // Jamo-Latin to yield output L2. 91 92 // Column 3 is expected value of L2. If the expected 93 // value of L2 is L1, then L2 is NULL. 94 95 // add tests for the update to fix problems where it didn't follow the standard 96 // see also http://www.unicode.org/cldr/data/charts/transforms/Latin-Hangul.html 97 "gach", "(Gi)(A)(Cf)", NULL, 98 "geumhui", "(Gi)(EU)(Mf)(Hi)(YI)", NULL, 99 "choe", "(Ci)(OE)", NULL, 100 "wo", "(IEUNG)(WEO)", NULL, 101 "Wonpil", "(IEUNG)(WEO)(Nf)(Pi)(I)(L)", "wonpil", 102 "GIPPEUM", "(Gi)(I)(BB)(EU)(Mf)", "gippeum", 103 "EUTTEUM", "(IEUNG)(EU)(DD)(EU)(Mf)", "eutteum", 104 "KKOTNAE", "(GGi)(O)(Tf)(Ni)(AE)", "kkotnae", 105 "gaga", "(Gi)(A)(Gi)(A)", NULL, 106 "gag-a", "(Gi)(A)(Gf)(IEUNG)(A)", NULL, 107 "gak-ka", "(Gi)(A)(Kf)(Ki)(A)", NULL, 108 "gakka", "(Gi)(A)(GGi)(A)", NULL, 109 "gakk-a", "(Gi)(A)(GGf)(IEUNG)(A)", NULL, 110 "gakkka", "(Gi)(A)(GGf)(Ki)(A)", NULL, 111 "gak-kka", "(Gi)(A)(Kf)(GGi)(A)", NULL, 112 113 "bab", "(Bi)(A)(Bf)", NULL, 114 "babb", "(Bi)(A)(Bf)(Bi)(EU)", "babbeu", 115 "babbba", "(Bi)(A)(Bf)(Bi)(EU)(Bi)(A)", "babbeuba", 116 "bagg", "(Bi)(A)(Gf)(Gi)(EU)", "baggeu", 117 "baggga", "(Bi)(A)(Gf)(Gi)(EU)(Gi)(A)", "baggeuga", 118 //"bag" SEP "gga", "(Bi)(A)(Gf)" SEP "(Gi)(EU)(Gi)(A)", "bag" SEP "geuga", 119 "kabsa", "(Ki)(A)(Bf)(Si)(A)", NULL, 120 "kabska", "(Ki)(A)(BS)(Ki)(A)", NULL, 121 "gabsbka", "(Gi)(A)(BS)(Bi)(EU)(Ki)(A)", "gabsbeuka", // not (Kf) 122 "gga", "(Gi)(EU)(Gi)(A)", "geuga", 123 "bsa", "(Bi)(EU)(Si)(A)", "beusa", 124 "agg", "(IEUNG)(A)(Gf)(Gi)(EU)", "aggeu", 125 "agga", "(IEUNG)(A)(Gf)(Gi)(A)", NULL, 126 "la", "(R)(A)", NULL, 127 "bs", "(Bi)(EU)(Sf)", "beus", 128 "kalgga", "(Ki)(A)(L)(Gi)(EU)(Gi)(A)", "kalgeuga", 129 130 // 'r' in a final position is treated like 'l' 131 "karka", "(Ki)(A)(L)(Ki)(A)", "kalka", 132 }; 133 134 enum { CASE_length = UPRV_LENGTHOF(CASE) }; 135 136 int32_t i; 137 for (i=0; i<CASE_length; i+=3) { 138 UnicodeString jamo = nameToJamo(CASE[i+1]); 139 if (CASE[i+2] == NULL) { 140 expect(*latinJamo, CASE[i], jamo, *jamoLatin); 141 } else { 142 // Handle case where round-trip is expected to fail 143 expect(*latinJamo, CASE[i], jamo); 144 expect(*jamoLatin, jamo, CASE[i+2]); 145 } 146 } 147 148 delete latinJamo; 149 delete jamoLatin; 150 } 151 152 /** 153 * Test various step-at-a-time transformation of hangul to jamo to 154 * latin and back. 155 */ 156 void JamoTest::TestPiecemeal(void) { 157 UnicodeString hangul; hangul.append((UChar)0xBC0F); 158 UnicodeString jamo = nameToJamo("(Mi)(I)(Cf)"); 159 UnicodeString latin("mic"); 160 UnicodeString latin2("mich"); 161 162 Transliterator *t = NULL; 163 UErrorCode status = U_ZERO_ERROR; 164 165 t = Transliterator::createInstance("NFD", UTRANS_FORWARD, status); // was Hangul-Jamo 166 if (U_FAILURE(status) || t == 0) { 167 dataerrln("FAIL: createInstance failed"); 168 return; 169 } 170 expect(*t, hangul, jamo); 171 delete t; 172 173 t = Transliterator::createInstance("NFC", UTRANS_FORWARD, status); // was Jamo-Hangul 174 if (U_FAILURE(status) || t == 0) { 175 errln("FAIL: createInstance failed"); 176 return; 177 } 178 expect(*t, jamo, hangul); 179 delete t; 180 181 t = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, status); 182 if (U_FAILURE(status) || t == 0) { 183 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); 184 return; 185 } 186 expect(*t, latin, jamo); 187 delete t; 188 189 t = Transliterator::createInstance("Jamo-Latin", UTRANS_FORWARD, status); 190 if (U_FAILURE(status) || t == 0) { 191 errln("FAIL: createInstance failed"); 192 return; 193 } 194 expect(*t, jamo, latin2); 195 delete t; 196 197 t = Transliterator::createInstance("Hangul-Latin", UTRANS_FORWARD, status); 198 if (U_FAILURE(status) || t == 0) { 199 errln("FAIL: createInstance failed"); 200 return; 201 } 202 expect(*t, hangul, latin2); 203 delete t; 204 205 t = Transliterator::createInstance("Latin-Hangul", UTRANS_FORWARD, status); 206 if (U_FAILURE(status) || t == 0) { 207 errln("FAIL: createInstance failed"); 208 return; 209 } 210 expect(*t, latin, hangul); 211 delete t; 212 213 t = Transliterator::createInstance("Hangul-Latin; Latin-Jamo", UTRANS_FORWARD, status); 214 if (U_FAILURE(status) || t == 0) { 215 errln("FAIL: createInstance failed"); 216 return; 217 } 218 expect(*t, hangul, jamo); 219 delete t; 220 221 t = Transliterator::createInstance("Jamo-Latin; Latin-Hangul", UTRANS_FORWARD, status); 222 if (U_FAILURE(status) || t == 0) { 223 errln("FAIL: createInstance failed"); 224 return; 225 } 226 expect(*t, jamo, hangul); 227 delete t; 228 229 t = Transliterator::createInstance("Hangul-Latin; Latin-Hangul", UTRANS_FORWARD, status); 230 if (U_FAILURE(status) || t == 0) { 231 errln("FAIL: createInstance failed"); 232 return; 233 } 234 expect(*t, hangul, hangul); 235 delete t; 236 } 237 238 void 239 JamoTest::TestRealText() { 240 // Test text taken from the Unicode web site 241 static const char* const WHAT_IS_UNICODE[] = { 242 "\\uc720\\ub2c8\\ucf54\\ub4dc\\uc5d0", "\\ub300\\ud574", "?", 243 244 "\\uc5b4\\ub5a4", "\\ud50c\\ub7ab\\ud3fc,", "\\uc5b4\\ub5a4", 245 "\\ud504\\ub85c\\uadf8\\ub7a8,", "\\uc5b4\\ub5a4", "\\uc5b8\\uc5b4\\uc5d0\\ub3c4", 246 "\\uc0c1\\uad00\\uc5c6\\uc774", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", "\\ubaa8\\ub4e0", 247 "\\ubb38\\uc790\\uc5d0", "\\ub300\\ud574", "\\uace0\\uc720", "\\ubc88\\ud638\\ub97c", 248 "\\uc81c\\uacf5\\ud569\\ub2c8\\ub2e4.", 249 250 "\\uae30\\ubcf8\\uc801\\uc73c\\ub85c", "\\ucef4\\ud4e8\\ud130\\ub294", 251 "\\uc22b\\uc790\\ub9cc", "\\ucc98\\ub9ac\\ud569\\ub2c8\\ub2e4.", "\\uae00\\uc790\\ub098", 252 "\\ub2e4\\ub978", "\\ubb38\\uc790\\uc5d0\\ub3c4", "\\uc22b\\uc790\\ub97c", 253 "\\uc9c0\\uc815\\ud558\\uc5ec", 254 "\\uc800\\uc7a5\\ud569\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\uac00", 255 "\\uac1c\\ubc1c\\ub418\\uae30", "\\uc804\\uc5d0\\ub294", "\\uc774\\ub7ec\\ud55c", 256 "\\uc22b\\uc790\\ub97c", "\\uc9c0\\uc815\\ud558\\uae30", "\\uc704\\ud574", "\\uc218\\ubc31", 257 "\\uac00\\uc9c0\\uc758", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", 258 "\\uc2dc\\uc2a4\\ud15c\\uc744", 259 "\\uc0ac\\uc6a9\\ud588\\uc2b5\\ub2c8\\ub2e4.", "\\ub2e8\\uc77c", "\\uae30\\ud638\\ud654", 260 "\\ubc29\\ubc95\\uc73c\\ub85c\\ub294", "\\ubaa8\\ub4e0", "\\ubb38\\uc790\\ub97c", 261 "\\ud3ec\\ud568\\ud560", "\\uc218", "\\uc5c6\\uc5c8\\uc2b5\\ub2c8\\ub2e4.", "\\uc608\\ub97c", 262 "\\ub4e4\\uc5b4", "\\uc720\\ub7fd", "\\uc5f0\\ud569\\uc5d0\\uc11c\\ub9cc", 263 "\\ubcf4\\ub354\\ub77c\\ub3c4", "\\ubaa8\\ub4e0", "\\uac01", "\\ub098\\ub77c\\ubcc4", 264 "\\uc5b8\\uc5b4\\ub97c", "\\ucc98\\ub9ac\\ud558\\ub824\\uba74", "\\uc5ec\\ub7ec", 265 "\\uac1c\\uc758", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774", 266 "\\ud544\\uc694\\ud569\\ub2c8\\ub2e4.", "\\uc601\\uc5b4\\uc640", "\\uac19\\uc740", 267 "\\ub2e8\\uc77c", "\\uc5b8\\uc5b4\\uc758", "\\uacbd\\uc6b0\\ub3c4", 268 "\\uacf5\\ud1b5\\uc801\\uc73c\\ub85c", "\\uc0ac\\uc6a9\\ub418\\ub294", "\\ubaa8\\ub4e0", 269 "\\uae00\\uc790,", "\\ubb38\\uc7a5", "\\ubd80\\ud638", "\\ubc0f", 270 "\\ud14c\\ud06c\\ub2c8\\uceec", "\\uae30\\ud638\\uc5d0", "\\ub9de\\ub294", "\\ub2e8\\uc77c", 271 "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc744", "\\uac16\\uace0", "\\uc788\\uc9c0", 272 "\\ubabb\\ud558\\uc600\\uc2b5\\ub2c8\\ub2e4.", 273 274 "\\uc774\\ub7ec\\ud55c", "\\uae30\\ud638\\ud654", "\\uc2dc\\uc2a4\\ud15c\\uc740", 275 "\\ub610\\ud55c", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\uc2dc\\uc2a4\\ud15c\\uacfc", 276 "\\ucda9\\ub3cc\\ud569\\ub2c8\\ub2e4.", "\\uc989", "\\ub450", "\\uac00\\uc9c0", 277 "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774", "\\ub450", "\\uac1c\\uc758", "\\ub2e4\\ub978", 278 "\\ubb38\\uc790\\uc5d0", "\\ub300\\ud574", "\\uac19\\uc740", "\\ubc88\\ud638\\ub97c", 279 "\\uc0ac\\uc6a9\\ud558\\uac70\\ub098", "\\uac19\\uc740", "\\ubb38\\uc790\\uc5d0", 280 "\\ub300\\ud574", "\\ub2e4\\ub978", "\\ubc88\\ud638\\ub97c", "\\uc0ac\\uc6a9\\ud560", "\\uc218", 281 "\\uc788\\uc2b5\\ub2c8\\ub2e4.", "\\uc8fc\\uc5b4\\uc9c4", "\\ubaa8\\ub4e0", 282 "\\ucef4\\ud4e8\\ud130(\\ud2b9\\ud788", "\\uc11c\\ubc84)\\ub294", "\\uc11c\\ub85c", 283 "\\ub2e4\\ub978", "\\uc5ec\\ub7ec", "\\uac00\\uc9c0", "\\uae30\\ud638\\ud654", 284 "\\ubc29\\ubc95\\uc744", "\\uc9c0\\uc6d0\\ud574\\uc57c", 285 "\\ud569\\ub2c8\\ub2e4.", "\\uadf8\\ub7ec\\ub098,", "\\ub370\\uc774\\ud130\\ub97c", 286 "\\uc11c\\ub85c", "\\ub2e4\\ub978", "\\uae30\\ud638\\ud654", "\\ubc29\\ubc95\\uc774\\ub098", 287 "\\ud50c\\ub7ab\\ud3fc", "\\uac04\\uc5d0", "\\uc804\\ub2ec\\ud560", "\\ub54c\\ub9c8\\ub2e4", 288 "\\uadf8", "\\ub370\\uc774\\ud130\\ub294", "\\ud56d\\uc0c1", "\\uc190\\uc0c1\\uc758", 289 "\\uc704\\ud5d8\\uc744", "\\uacaa\\uac8c", "\\ub429\\ub2c8\\ub2e4.", 290 291 "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub85c", "\\ubaa8\\ub4e0", "\\uac83\\uc744", 292 "\\ud574\\uacb0\\ud560", "\\uc218", "\\uc788\\uc2b5\\ub2c8\\ub2e4!", 293 "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", "\\uc0ac\\uc6a9", "\\uc911\\uc778", 294 "\\ud50c\\ub7ab\\ud3fc,", "\\ud504\\ub85c\\uadf8\\ub7a8,", "\\uc5b8\\uc5b4\\uc5d0", 295 "\\uad00\\uacc4\\uc5c6\\uc774", "\\ubb38\\uc790\\ub9c8\\ub2e4", "\\uace0\\uc720\\ud55c", 296 "\\uc22b\\uc790\\ub97c", 297 "\\uc81c\\uacf5\\ud569\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc", 298 "\\ud45c\\uc900\\uc740", // "Apple, HP, IBM, JustSystem, Microsoft, Oracle, SAP, " 299 // "Sun, Sybase, Unisys " 300 "\\ubc0f", "\\uae30\\ud0c0", "\\uc5ec\\ub7ec", 301 "\\ud68c\\uc0ac\\uc640", "\\uac19\\uc740", "\\uc5c5\\uacc4", 302 "\\uc120\\ub450\\uc8fc\\uc790\\uc5d0", "\\uc758\\ud574", 303 "\\ucc44\\ud0dd\\ub418\\uc5c8\\uc2b5\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub294", 304 // "XML, Java, ECMAScript(JavaScript), LDAP, CORBA 3.0, WML " 305 "\\ub4f1\\uacfc", 306 "\\uac19\\uc774", "\\ud604\\uc7ac", "\\ub110\\ub9ac", "\\uc0ac\\uc6a9\\ub418\\ub294", 307 "\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud544\\uc694\\ud558\\uba70", "\\uc774\\ub294", // "ISO/IEC", 308 "10646\\uc744", "\\uad6c\\ud604\\ud558\\ub294", "\\uacf5\\uc2dd\\uc801\\uc778", 309 "\\ubc29\\ubc95\\uc785\\ub2c8\\ub2e4.", "\\uc774\\ub294", "\\ub9ce\\uc740", "\\uc6b4\\uc601", 310 "\\uccb4\\uc81c,", "\\uc694\\uc998", "\\uc0ac\\uc6a9\\ub418\\ub294", "\\ubaa8\\ub4e0", 311 "\\ube0c\\ub77c\\uc6b0\\uc800", "\\ubc0f", "\\uae30\\ud0c0", "\\ub9ce\\uc740", 312 "\\uc81c\\ud488\\uc5d0\\uc11c", 313 "\\uc9c0\\uc6d0\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc", 314 "\\ud45c\\uc900\\uc758", "\\ubd80\\uc0c1\\uacfc", "\\uc774\\ub97c", 315 "\\uc9c0\\uc6d0\\ud558\\ub294", "\\ub3c4\\uad6c\\uc758", "\\uac00\\uc6a9\\uc131\\uc740", 316 "\\ucd5c\\uadfc", "\\uc804", "\\uc138\\uacc4\\uc5d0", "\\ubd88\\uace0", "\\uc788\\ub294", 317 "\\uae30\\uc220", "\\uacbd\\ud5a5\\uc5d0\\uc11c", "\\uac00\\uc7a5", "\\uc911\\uc694\\ud55c", 318 "\\ubd80\\ubd84\\uc744", "\\ucc28\\uc9c0\\ud558\\uace0", "\\uc788\\uc2b5\\ub2c8\\ub2e4.", 319 320 "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub97c", 321 // Replaced a hyphen with a space to make the test case work with CLDR1.5 322 //"\\ud074\\ub77c\\uc774\\uc5b8\\ud2b8-\\uc11c\\ubc84", "\\ub610\\ub294", 323 "\\ud074\\ub77c\\uc774\\uc5b8\\ud2b8 \\uc11c\\ubc84", "\\ub610\\ub294", 324 // Replaced a hyphen with a space. 325 //"\\ub2e4\\uc911-\\uc5f0\\uacb0", "\\uc751\\uc6a9", "\\ud504\\ub85c\\uadf8\\ub7a8\\uacfc", 326 "\\ub2e4\\uc911 \\uc5f0\\uacb0", "\\uc751\\uc6a9", "\\ud504\\ub85c\\uadf8\\ub7a8\\uacfc", 327 "\\uc6f9", "\\uc0ac\\uc774\\ud2b8\\uc5d0", "\\ud1b5\\ud569\\ud558\\uba74", 328 "\\ub808\\uac70\\uc2dc", "\\ubb38\\uc790", "\\uc138\\ud2b8", "\\uc0ac\\uc6a9\\uc5d0", 329 "\\uc788\\uc5b4\\uc11c", "\\uc0c1\\ub2f9\\ud55c", "\\ube44\\uc6a9", "\\uc808\\uac10", 330 "\\ud6a8\\uacfc\\uac00", 331 "\\ub098\\ud0c0\\ub0a9\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc\\ub97c", 332 "\\ud1b5\\ud574", "\\ub9ac\\uc5d4\\uc9c0\\ub2c8\\uc5b4\\ub9c1", "\\uc5c6\\uc774", 333 "\\ub2e4\\uc911", "\\ud50c\\ub7ab\\ud3fc,", "\\uc5b8\\uc5b4", "\\ubc0f", "\\uad6d\\uac00", 334 "\\uac04\\uc5d0", "\\ub2e8\\uc77c", "\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4", 335 "\\ud50c\\ub7ab\\ud3fc", "\\ub610\\ub294", "\\ub2e8\\uc77c", "\\uc6f9", 336 "\\uc0ac\\uc774\\ud2b8\\ub97c", "\\ubaa9\\ud45c\\ub85c", "\\uc0bc\\uc744", "\\uc218", 337 "\\uc788\\uc2b5\\ub2c8\\ub2e4.", "\\uc774\\ub97c", "\\uc0ac\\uc6a9\\ud558\\uba74", 338 "\\ub370\\uc774\\ud130\\ub97c", "\\uc190\\uc0c1", "\\uc5c6\\uc774", "\\uc5ec\\ub7ec", 339 "\\uc2dc\\uc2a4\\ud15c\\uc744", "\\ud1b5\\ud574", "\\uc804\\uc1a1\\ud560", "\\uc218", 340 "\\uc788\\uc2b5\\ub2c8\\ub2e4.", 341 342 "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc5d0", "\\ub300\\ud574", 343 "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc740", 344 "\\ube44\\uc601\\ub9ac", "\\uc870\\uc9c1\\uc73c\\ub85c\\uc11c", "\\ud604\\ub300", 345 "\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4", "\\uc81c\\ud488\\uacfc", 346 "\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud14d\\uc2a4\\ud2b8\\uc758", "\\ud45c\\ud604\\uc744", 347 "\\uc9c0\\uc815\\ud558\\ub294", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc758", 348 "\\uc0ac\\uc6a9\\uc744", "\\uac1c\\ubc1c\\ud558\\uace0", "\\ud655\\uc7a5\\ud558\\uba70", 349 "\\uc7a5\\ub824\\ud558\\uae30", "\\uc704\\ud574", 350 "\\uc138\\uc6cc\\uc84c\\uc2b5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4", 351 "\\uba64\\ubc84\\uc27d\\uc740", "\\ucef4\\ud4e8\\ud130\\uc640", "\\uc815\\ubcf4", 352 "\\ucc98\\ub9ac", "\\uc0b0\\uc5c5\\uc5d0", "\\uc885\\uc0ac\\ud558\\uace0", "\\uc788\\ub294", 353 "\\uad11\\ubc94\\uc704\\ud55c", "\\ud68c\\uc0ac", "\\ubc0f", "\\uc870\\uc9c1\\uc758", 354 "\\ubc94\\uc704\\ub97c", 355 "\\ub098\\ud0c0\\ub0c5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc758", 356 "\\uc7ac\\uc815\\uc740", "\\uc804\\uc801\\uc73c\\ub85c", "\\ud68c\\ube44\\uc5d0", 357 "\\uc758\\ud574", "\\ucda9\\ub2f9\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc", 358 "\\ucee8\\uc18c\\uc2dc\\uc5c4\\uc5d0\\uc11c\\uc758", "\\uba64\\ubc84\\uc27d\\uc740", 359 "\\uc804", "\\uc138\\uacc4", "\\uc5b4\\ub290", "\\uacf3\\uc5d0\\uc11c\\ub098", 360 "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc744", "\\uc9c0\\uc6d0\\ud558\\uace0", 361 "\\uadf8", "\\ud655\\uc7a5\\uacfc", "\\uad6c\\ud604\\uc744", 362 "\\uc9c0\\uc6d0\\ud558\\uace0\\uc790\\ud558\\ub294", "\\uc870\\uc9c1\\uacfc", 363 "\\uac1c\\uc778\\uc5d0\\uac8c", "\\uac1c\\ubc29\\ub418\\uc5b4", 364 "\\uc788\\uc2b5\\ub2c8\\ub2e4.", 365 366 "\\ub354", "\\uc790\\uc138\\ud55c", "\\ub0b4\\uc6a9\\uc740", "\\uc6a9\\uc5b4\\uc9d1,", 367 "\\uc608\\uc81c", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\uc0ac\\uc6a9", "\\uac00\\ub2a5", 368 "\\uc81c\\ud488,", "\\uae30\\uc220", "\\uc815\\ubcf4", "\\ubc0f", "\\uae30\\ud0c0", 369 "\\uc720\\uc6a9\\ud55c", "\\uc815\\ubcf4\\ub97c", 370 "\\ucc38\\uc870\\ud558\\uc2ed\\uc2dc\\uc624." 371 }; 372 373 enum { WHAT_IS_UNICODE_length = UPRV_LENGTHOF(WHAT_IS_UNICODE) }; 374 375 UParseError parseError; 376 UErrorCode status = U_ZERO_ERROR; 377 Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status); 378 Transliterator* jamoHangul = Transliterator::createInstance("NFC(NFD)", UTRANS_FORWARD, parseError, status); 379 if (latinJamo == 0 || jamoHangul == 0 || U_FAILURE(status)) { 380 delete latinJamo; 381 delete jamoHangul; 382 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status)); 383 return; 384 } 385 Transliterator* jamoLatin = latinJamo->createInverse(status); 386 Transliterator* hangulJamo = jamoHangul->createInverse(status); 387 if (jamoLatin == 0 || hangulJamo == 0) { 388 errln("FAIL: createInverse returned NULL"); 389 delete latinJamo; 390 delete jamoLatin; 391 delete jamoHangul; 392 delete hangulJamo; 393 return; 394 } 395 396 Transliterator* tarray[4] = 397 { hangulJamo, jamoLatin, latinJamo, jamoHangul }; 398 CompoundTransliterator rt(tarray, 4); 399 400 UnicodeString buf; 401 int32_t total = 0; 402 int32_t errors = 0; 403 int32_t i; 404 for (i=0; i < WHAT_IS_UNICODE_length; ++i) { 405 ++total; 406 UnicodeString hangul = UnicodeString(WHAT_IS_UNICODE[i], -1, US_INV); 407 hangul = hangul.unescape(); // Parse backslash-u escapes 408 UnicodeString hangulX = hangul; 409 rt.transliterate(hangulX); 410 if (hangul != hangulX) { 411 ++errors; 412 UnicodeString jamo = hangul; hangulJamo->transliterate(jamo); 413 UnicodeString latin = jamo; jamoLatin->transliterate(latin); 414 UnicodeString jamo2 = latin; latinJamo->transliterate(jamo2); 415 UnicodeString hangul2 = jamo2; jamoHangul->transliterate(hangul2); 416 417 buf.remove(0); 418 buf.append("FAIL: "); 419 if (hangul2 != hangulX) { 420 buf.append((UnicodeString)"(Weird: " + hangulX + " != " + hangul2 + ")"); 421 } 422 // The Hangul-Jamo conversion is not usually the 423 // bug here, so we hide it from display. 424 // Uncomment lines to see the Hangul. 425 buf.append(//hangul + " => " + 426 jamoToName(jamo) + " => " + 427 latin + " => " + jamoToName(jamo2) 428 //+ " => " + hangul2 429 ); 430 errln(prettify(buf)); 431 } 432 } 433 if (errors != 0) { 434 errln((UnicodeString)"Test word failures: " + errors + " out of " + total); 435 } else { 436 logln((UnicodeString)"All " + total + " test words passed"); 437 } 438 439 delete latinJamo; 440 delete jamoLatin; 441 delete jamoHangul; 442 delete hangulJamo; 443 } 444 445 // Override TransliteratorTest 446 void 447 JamoTest::expectAux(const UnicodeString& tag, 448 const UnicodeString& summary, UBool pass, 449 const UnicodeString& expectedResult) { 450 UnicodeString jsum = jamoToName(summary); 451 UnicodeString jexp = jamoToName(expectedResult); 452 TransliteratorTest::expectAux(tag, jsum, pass, jexp); 453 } 454 455 const char* JamoTest::JAMO_NAMES_RULES = 456 "'(Gi)' <> \\u1100;" 457 "'(GGi)' <> \\u1101;" 458 "'(Ni)' <> \\u1102;" 459 "'(Di)' <> \\u1103;" 460 "'(DD)' <> \\u1104;" 461 "'(R)' <> \\u1105;" 462 "'(Mi)' <> \\u1106;" 463 "'(Bi)' <> \\u1107;" 464 "'(BB)' <> \\u1108;" 465 "'(Si)' <> \\u1109;" 466 "'(SSi)' <> \\u110A;" 467 "'(IEUNG)' <> \\u110B;" 468 "'(Ji)' <> \\u110C;" 469 "'(JJ)' <> \\u110D;" 470 "'(Ci)' <> \\u110E;" 471 "'(Ki)' <> \\u110F;" 472 "'(Ti)' <> \\u1110;" 473 "'(Pi)' <> \\u1111;" 474 "'(Hi)' <> \\u1112;" 475 476 "'(A)' <> \\u1161;" 477 "'(AE)' <> \\u1162;" 478 "'(YA)' <> \\u1163;" 479 "'(YAE)' <> \\u1164;" 480 "'(EO)' <> \\u1165;" 481 "'(E)' <> \\u1166;" 482 "'(YEO)' <> \\u1167;" 483 "'(YE)' <> \\u1168;" 484 "'(O)' <> \\u1169;" 485 "'(WA)' <> \\u116A;" 486 "'(WAE)' <> \\u116B;" 487 "'(OE)' <> \\u116C;" 488 "'(YO)' <> \\u116D;" 489 "'(U)' <> \\u116E;" 490 "'(WEO)' <> \\u116F;" 491 "'(WE)' <> \\u1170;" 492 "'(WI)' <> \\u1171;" 493 "'(YU)' <> \\u1172;" 494 "'(EU)' <> \\u1173;" 495 "'(YI)' <> \\u1174;" 496 "'(I)' <> \\u1175;" 497 498 "'(Gf)' <> \\u11A8;" 499 "'(GGf)' <> \\u11A9;" 500 "'(GS)' <> \\u11AA;" 501 "'(Nf)' <> \\u11AB;" 502 "'(NJ)' <> \\u11AC;" 503 "'(NH)' <> \\u11AD;" 504 "'(Df)' <> \\u11AE;" 505 "'(L)' <> \\u11AF;" 506 "'(LG)' <> \\u11B0;" 507 "'(LM)' <> \\u11B1;" 508 "'(LB)' <> \\u11B2;" 509 "'(LS)' <> \\u11B3;" 510 "'(LT)' <> \\u11B4;" 511 "'(LP)' <> \\u11B5;" 512 "'(LH)' <> \\u11B6;" 513 "'(Mf)' <> \\u11B7;" 514 "'(Bf)' <> \\u11B8;" 515 "'(BS)' <> \\u11B9;" 516 "'(Sf)' <> \\u11BA;" 517 "'(SSf)' <> \\u11BB;" 518 "'(NG)' <> \\u11BC;" 519 "'(Jf)' <> \\u11BD;" 520 "'(Cf)' <> \\u11BE;" 521 "'(Kf)' <> \\u11BF;" 522 "'(Tf)' <> \\u11C0;" 523 "'(Pf)' <> \\u11C1;" 524 "'(Hf)' <> \\u11C2;"; 525 526 /** 527 * Convert short names to actual jamo. E.g., "x(LG)y" returns 528 * "x\u11B0y". See JAMO_NAMES for table of names. 529 */ 530 UnicodeString 531 JamoTest::nameToJamo(const UnicodeString& input) { 532 if (NAME_JAMO == 0) { 533 errln("Failed to create NAME_JAMO"); 534 return input; /* failure! */ 535 } 536 UnicodeString result(input); 537 NAME_JAMO->transliterate(result); 538 return result; 539 } 540 541 /** 542 * Convert jamo to short names. E.g., "x\u11B0y" returns 543 * "x(LG)y". See JAMO_NAMES for table of names. 544 */ 545 UnicodeString 546 JamoTest::jamoToName(const UnicodeString& input) { 547 if (NAME_JAMO == 0) { 548 errln("Failed to create NAME_JAMO"); 549 return input; /* failure! */ 550 } 551 UnicodeString result(input); 552 JAMO_NAME->transliterate(result); 553 return result; 554 } 555 556 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 557