1 /* 2 ********************************************************************** 3 * Copyright (C) 1999-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Date Name Description 7 * 11/10/99 aliu Creation. 8 ********************************************************************** 9 */ 10 11 #include "unicode/utypes.h" 12 13 #if !UCONFIG_NO_TRANSLITERATION 14 15 #include "transtst.h" 16 #include "unicode/locid.h" 17 #include "unicode/dtfmtsym.h" 18 #include "unicode/normlzr.h" 19 #include "unicode/translit.h" 20 #include "unicode/uchar.h" 21 #include "unicode/unifilt.h" 22 #include "unicode/uniset.h" 23 #include "unicode/ustring.h" 24 #include "unicode/usetiter.h" 25 #include "unicode/uscript.h" 26 #include "unicode/utf16.h" 27 #include "cpdtrans.h" 28 #include "nultrans.h" 29 #include "rbt.h" 30 #include "rbt_pars.h" 31 #include "anytrans.h" 32 #include "esctrn.h" 33 #include "name2uni.h" 34 #include "nortrans.h" 35 #include "remtrans.h" 36 #include "titletrn.h" 37 #include "tolowtrn.h" 38 #include "toupptrn.h" 39 #include "unesctrn.h" 40 #include "uni2name.h" 41 #include "cstring.h" 42 #include "cmemory.h" 43 #include <stdio.h> 44 45 /*********************************************************************** 46 47 HOW TO USE THIS TEST FILE 48 -or- 49 How I developed on two platforms 50 without losing (too much of) my mind 51 52 53 1. Add new tests by copying/pasting/changing existing tests. On Java, 54 any public void method named Test...() taking no parameters becomes 55 a test. On C++, you need to modify the header and add a line to 56 the runIndexedTest() dispatch method. 57 58 2. Make liberal use of the expect() method; it is your friend. 59 60 3. The tests in this file exactly match those in a sister file on the 61 other side. The two files are: 62 63 icu4j: src/com/ibm/test/translit/TransliteratorTest.java 64 icu4c: source/test/intltest/transtst.cpp 65 66 ==> THIS IS THE IMPORTANT PART <== 67 68 When you add a test in this file, add it in TransliteratorTest.java 69 too. Give it the same name and put it in the same relative place. 70 This makes maintenance a lot simpler for any poor soul who ends up 71 trying to synchronize the tests between icu4j and icu4c. 72 73 4. If you MUST enter a test that is NOT paralleled in the sister file, 74 then add it in the special non-mirrored section. These are 75 labeled 76 77 "icu4j ONLY" 78 79 or 80 81 "icu4c ONLY" 82 83 Make sure you document the reason the test is here and not there. 84 85 86 Thank you. 87 The Management 88 ***********************************************************************/ 89 90 // Define character constants thusly to be EBCDIC-friendly 91 enum { 92 LEFT_BRACE=((UChar)0x007B), /*{*/ 93 PIPE =((UChar)0x007C), /*|*/ 94 ZERO =((UChar)0x0030), /*0*/ 95 UPPER_A =((UChar)0x0041) /*A*/ 96 }; 97 98 TransliteratorTest::TransliteratorTest() 99 : DESERET_DEE((UChar32)0x10414), 100 DESERET_dee((UChar32)0x1043C) 101 { 102 } 103 104 TransliteratorTest::~TransliteratorTest() {} 105 106 void 107 TransliteratorTest::runIndexedTest(int32_t index, UBool exec, 108 const char* &name, char* /*par*/) { 109 switch (index) { 110 TESTCASE(0,TestInstantiation); 111 TESTCASE(1,TestSimpleRules); 112 TESTCASE(2,TestRuleBasedInverse); 113 TESTCASE(3,TestKeyboard); 114 TESTCASE(4,TestKeyboard2); 115 TESTCASE(5,TestKeyboard3); 116 TESTCASE(6,TestArabic); 117 TESTCASE(7,TestCompoundKana); 118 TESTCASE(8,TestCompoundHex); 119 TESTCASE(9,TestFiltering); 120 TESTCASE(10,TestInlineSet); 121 TESTCASE(11,TestPatternQuoting); 122 TESTCASE(12,TestJ277); 123 TESTCASE(13,TestJ243); 124 TESTCASE(14,TestJ329); 125 TESTCASE(15,TestSegments); 126 TESTCASE(16,TestCursorOffset); 127 TESTCASE(17,TestArbitraryVariableValues); 128 TESTCASE(18,TestPositionHandling); 129 TESTCASE(19,TestHiraganaKatakana); 130 TESTCASE(20,TestCopyJ476); 131 TESTCASE(21,TestAnchors); 132 TESTCASE(22,TestInterIndic); 133 TESTCASE(23,TestFilterIDs); 134 TESTCASE(24,TestCaseMap); 135 TESTCASE(25,TestNameMap); 136 TESTCASE(26,TestLiberalizedID); 137 TESTCASE(27,TestCreateInstance); 138 TESTCASE(28,TestNormalizationTransliterator); 139 TESTCASE(29,TestCompoundRBT); 140 TESTCASE(30,TestCompoundFilter); 141 TESTCASE(31,TestRemove); 142 TESTCASE(32,TestToRules); 143 TESTCASE(33,TestContext); 144 TESTCASE(34,TestSupplemental); 145 TESTCASE(35,TestQuantifier); 146 TESTCASE(36,TestSTV); 147 TESTCASE(37,TestCompoundInverse); 148 TESTCASE(38,TestNFDChainRBT); 149 TESTCASE(39,TestNullInverse); 150 TESTCASE(40,TestAliasInverseID); 151 TESTCASE(41,TestCompoundInverseID); 152 TESTCASE(42,TestUndefinedVariable); 153 TESTCASE(43,TestEmptyContext); 154 TESTCASE(44,TestCompoundFilterID); 155 TESTCASE(45,TestPropertySet); 156 TESTCASE(46,TestNewEngine); 157 TESTCASE(47,TestQuantifiedSegment); 158 TESTCASE(48,TestDevanagariLatinRT); 159 TESTCASE(49,TestTeluguLatinRT); 160 TESTCASE(50,TestCompoundLatinRT); 161 TESTCASE(51,TestSanskritLatinRT); 162 TESTCASE(52,TestLocaleInstantiation); 163 TESTCASE(53,TestTitleAccents); 164 TESTCASE(54,TestLocaleResource); 165 TESTCASE(55,TestParseError); 166 TESTCASE(56,TestOutputSet); 167 TESTCASE(57,TestVariableRange); 168 TESTCASE(58,TestInvalidPostContext); 169 TESTCASE(59,TestIDForms); 170 TESTCASE(60,TestToRulesMark); 171 TESTCASE(61,TestEscape); 172 TESTCASE(62,TestAnchorMasking); 173 TESTCASE(63,TestDisplayName); 174 TESTCASE(64,TestSpecialCases); 175 #if !UCONFIG_NO_FILE_IO 176 TESTCASE(65,TestIncrementalProgress); 177 #endif 178 TESTCASE(66,TestSurrogateCasing); 179 TESTCASE(67,TestFunction); 180 TESTCASE(68,TestInvalidBackRef); 181 TESTCASE(69,TestMulticharStringSet); 182 TESTCASE(70,TestUserFunction); 183 TESTCASE(71,TestAnyX); 184 TESTCASE(72,TestSourceTargetSet); 185 TESTCASE(73,TestGurmukhiDevanagari); 186 TESTCASE(74,TestPatternWhiteSpace); 187 TESTCASE(75,TestAllCodepoints); 188 TESTCASE(76,TestBoilerplate); 189 TESTCASE(77,TestAlternateSyntax); 190 TESTCASE(78,TestBeginEnd); 191 TESTCASE(79,TestBeginEndToRules); 192 TESTCASE(80,TestRegisterAlias); 193 TESTCASE(81,TestRuleStripping); 194 TESTCASE(82,TestHalfwidthFullwidth); 195 TESTCASE(83,TestThai); 196 TESTCASE(84,TestAny); 197 default: name = ""; break; 198 } 199 } 200 201 static const UVersionInfo ICU_39 = {3,9,4,0}; 202 /** 203 * Make sure every system transliterator can be instantiated. 204 * 205 * ALSO test that the result of toRules() for each rule is a valid 206 * rule. Do this here so we don't have to have another test that 207 * instantiates everything as well. 208 */ 209 void TransliteratorTest::TestInstantiation() { 210 UErrorCode ec = U_ZERO_ERROR; 211 StringEnumeration* avail = Transliterator::getAvailableIDs(ec); 212 assertSuccess("getAvailableIDs()", ec); 213 assertTrue("getAvailableIDs()!=NULL", avail!=NULL); 214 int32_t n = Transliterator::countAvailableIDs(); 215 assertTrue("getAvailableIDs().count()==countAvailableIDs()", 216 avail->count(ec) == n); 217 assertSuccess("count()", ec); 218 UnicodeString name; 219 for (int32_t i=0; i<n; ++i) { 220 const UnicodeString& id = *avail->snext(ec); 221 if (!assertSuccess("snext()", ec) || 222 !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) { 223 break; 224 } 225 UnicodeString id2 = Transliterator::getAvailableID(i); 226 if (id.length() < 1) { 227 errln(UnicodeString("FAIL: getAvailableID(") + 228 i + ") returned empty string"); 229 continue; 230 } 231 if (id != id2) { 232 errln(UnicodeString("FAIL: getAvailableID(") + 233 i + ") != getAvailableIDs().snext()"); 234 continue; 235 } 236 UParseError parseError; 237 UErrorCode status = U_ZERO_ERROR; 238 Transliterator* t = Transliterator::createInstance(id, 239 UTRANS_FORWARD, parseError,status); 240 name.truncate(0); 241 Transliterator::getDisplayName(id, name); 242 if (t == 0) { 243 #if UCONFIG_NO_BREAK_ITERATION 244 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail. 245 if (id.compare((UnicodeString)"Thai-Latin") != 0) 246 #endif 247 dataerrln(UnicodeString("FAIL: Couldn't create ") + id + 248 /*", parse error " + parseError.code +*/ 249 ", line " + parseError.line + 250 ", offset " + parseError.offset + 251 ", pre-context " + prettify(parseError.preContext, TRUE) + 252 ", post-context " +prettify(parseError.postContext,TRUE) + 253 ", Error: " + u_errorName(status)); 254 // When createInstance fails, it deletes the failing 255 // entry from the available ID list. We detect this 256 // here by looking for a change in countAvailableIDs. 257 int32_t nn = Transliterator::countAvailableIDs(); 258 if (nn == (n - 1)) { 259 n = nn; 260 --i; // Compensate for deleted entry 261 } 262 } else { 263 logln(UnicodeString("OK: ") + name + " (" + id + ")"); 264 265 // Now test toRules 266 UnicodeString rules; 267 t->toRules(rules, TRUE); 268 Transliterator *u = Transliterator::createFromRules("x", 269 rules, UTRANS_FORWARD, parseError,status); 270 if (u == 0) { 271 errln(UnicodeString("FAIL: ") + id + 272 ".createFromRules() => bad rules" + 273 /*", parse error " + parseError.code +*/ 274 ", line " + parseError.line + 275 ", offset " + parseError.offset + 276 ", context " + prettify(parseError.preContext, TRUE) + 277 ", rules: " + prettify(rules, TRUE)); 278 } else { 279 delete u; 280 } 281 delete t; 282 } 283 } 284 assertTrue("snext()==NULL", avail->snext(ec)==NULL); 285 assertSuccess("snext()", ec); 286 delete avail; 287 288 // Now test the failure path 289 UParseError parseError; 290 UErrorCode status = U_ZERO_ERROR; 291 UnicodeString id("<Not a valid Transliterator ID>"); 292 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status); 293 if (t != 0) { 294 errln("FAIL: " + id + " returned a transliterator"); 295 delete t; 296 } else { 297 logln("OK: Bogus ID handled properly"); 298 } 299 } 300 301 void TransliteratorTest::TestSimpleRules(void) { 302 /* Example: rules 1. ab>x|y 303 * 2. yc>z 304 * 305 * []|eabcd start - no match, copy e to tranlated buffer 306 * [e]|abcd match rule 1 - copy output & adjust cursor 307 * [ex|y]cd match rule 2 - copy output & adjust cursor 308 * [exz]|d no match, copy d to transliterated buffer 309 * [exzd]| done 310 */ 311 expect(UnicodeString("ab>x|y;", "") + 312 "yc>z", 313 "eabcd", "exzd"); 314 315 /* Another set of rules: 316 * 1. ab>x|yzacw 317 * 2. za>q 318 * 3. qc>r 319 * 4. cw>n 320 * 321 * []|ab Rule 1 322 * [x|yzacw] No match 323 * [xy|zacw] Rule 2 324 * [xyq|cw] Rule 4 325 * [xyqn]| Done 326 */ 327 expect(UnicodeString("ab>x|yzacw;") + 328 "za>q;" + 329 "qc>r;" + 330 "cw>n", 331 "ab", "xyqn"); 332 333 /* Test categories 334 */ 335 UErrorCode status = U_ZERO_ERROR; 336 UParseError parseError; 337 Transliterator *t = Transliterator::createFromRules( 338 "<ID>", 339 UnicodeString("$dummy=").append((UChar)0xE100) + 340 UnicodeString(";" 341 "$vowel=[aeiouAEIOU];" 342 "$lu=[:Lu:];" 343 "$vowel } $lu > '!';" 344 "$vowel > '&';" 345 "'!' { $lu > '^';" 346 "$lu > '*';" 347 "a > ERROR", ""), 348 UTRANS_FORWARD, parseError, 349 status); 350 if (U_FAILURE(status)) { 351 dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status)); 352 return; 353 } 354 expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&"); 355 delete t; 356 } 357 358 /** 359 * Test inline set syntax and set variable syntax. 360 */ 361 void TransliteratorTest::TestInlineSet(void) { 362 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz"); 363 expect("a[0-9]b > qrs", "1a7b9", "1qrs9"); 364 365 expect(UnicodeString( 366 "$digit = [0-9];" 367 "$alpha = [a-zA-Z];" 368 "$alphanumeric = [$digit $alpha];" // *** 369 "$special = [^$alphanumeric];" // *** 370 "$alphanumeric > '-';" 371 "$special > '*';", ""), 372 373 "thx-1138", "---*----"); 374 } 375 376 /** 377 * Create some inverses and confirm that they work. We have to be 378 * careful how we do this, since the inverses will not be true 379 * inverses -- we can't throw any random string at the composition 380 * of the transliterators and expect the identity function. F x 381 * F' != I. However, if we are careful about the input, we will 382 * get the expected results. 383 */ 384 void TransliteratorTest::TestRuleBasedInverse(void) { 385 UnicodeString RULES = 386 UnicodeString("abc>zyx;") + 387 "ab>yz;" + 388 "bc>zx;" + 389 "ca>xy;" + 390 "a>x;" + 391 "b>y;" + 392 "c>z;" + 393 394 "abc<zyx;" + 395 "ab<yz;" + 396 "bc<zx;" + 397 "ca<xy;" + 398 "a<x;" + 399 "b<y;" + 400 "c<z;" + 401 402 ""; 403 404 const char* DATA[] = { 405 // Careful here -- random strings will not work. If we keep 406 // the left side to the domain and the right side to the range 407 // we will be okay though (left, abc; right xyz). 408 "a", "x", 409 "abcacab", "zyxxxyy", 410 "caccb", "xyzzy", 411 }; 412 413 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); 414 415 UErrorCode status = U_ZERO_ERROR; 416 UParseError parseError; 417 Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES, 418 UTRANS_FORWARD, parseError, status); 419 Transliterator *rev = Transliterator::createFromRules("<ID>", RULES, 420 UTRANS_REVERSE, parseError, status); 421 if (U_FAILURE(status)) { 422 errln("FAIL: RBT constructor failed"); 423 return; 424 } 425 for (int32_t i=0; i<DATA_length; i+=2) { 426 expect(*fwd, DATA[i], DATA[i+1]); 427 expect(*rev, DATA[i+1], DATA[i]); 428 } 429 delete fwd; 430 delete rev; 431 } 432 433 /** 434 * Basic test of keyboard. 435 */ 436 void TransliteratorTest::TestKeyboard(void) { 437 UParseError parseError; 438 UErrorCode status = U_ZERO_ERROR; 439 Transliterator *t = Transliterator::createFromRules("<ID>", 440 UnicodeString("psch>Y;") 441 +"ps>y;" 442 +"ch>x;" 443 +"a>A;", 444 UTRANS_FORWARD, parseError, 445 status); 446 if (U_FAILURE(status)) { 447 errln("FAIL: RBT constructor failed"); 448 return; 449 } 450 const char* DATA[] = { 451 // insertion, buffer 452 "a", "A", 453 "p", "Ap", 454 "s", "Aps", 455 "c", "Apsc", 456 "a", "AycA", 457 "psch", "AycAY", 458 0, "AycAY", // null means finishKeyboardTransliteration 459 }; 460 461 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); 462 delete t; 463 } 464 465 /** 466 * Basic test of keyboard with cursor. 467 */ 468 void TransliteratorTest::TestKeyboard2(void) { 469 UParseError parseError; 470 UErrorCode status = U_ZERO_ERROR; 471 Transliterator *t = Transliterator::createFromRules("<ID>", 472 UnicodeString("ych>Y;") 473 +"ps>|y;" 474 +"ch>x;" 475 +"a>A;", 476 UTRANS_FORWARD, parseError, 477 status); 478 if (U_FAILURE(status)) { 479 errln("FAIL: RBT constructor failed"); 480 return; 481 } 482 const char* DATA[] = { 483 // insertion, buffer 484 "a", "A", 485 "p", "Ap", 486 "s", "Aps", // modified for rollback - "Ay", 487 "c", "Apsc", // modified for rollback - "Ayc", 488 "a", "AycA", 489 "p", "AycAp", 490 "s", "AycAps", // modified for rollback - "AycAy", 491 "c", "AycApsc", // modified for rollback - "AycAyc", 492 "h", "AycAY", 493 0, "AycAY", // null means finishKeyboardTransliteration 494 }; 495 496 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); 497 delete t; 498 } 499 500 /** 501 * Test keyboard transliteration with back-replacement. 502 */ 503 void TransliteratorTest::TestKeyboard3(void) { 504 // We want th>z but t>y. Furthermore, during keyboard 505 // transliteration we want t>y then yh>z if t, then h are 506 // typed. 507 UnicodeString RULES("t>|y;" 508 "yh>z;"); 509 510 const char* DATA[] = { 511 // Column 1: characters to add to buffer (as if typed) 512 // Column 2: expected appearance of buffer after 513 // keyboard xliteration. 514 "a", "a", 515 "b", "ab", 516 "t", "abt", // modified for rollback - "aby", 517 "c", "abyc", 518 "t", "abyct", // modified for rollback - "abycy", 519 "h", "abycz", 520 0, "abycz", // null means finishKeyboardTransliteration 521 }; 522 523 UParseError parseError; 524 UErrorCode status = U_ZERO_ERROR; 525 Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status); 526 if (U_FAILURE(status)) { 527 errln("FAIL: RBT constructor failed"); 528 return; 529 } 530 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); 531 delete t; 532 } 533 534 void TransliteratorTest::keyboardAux(const Transliterator& t, 535 const char* DATA[], int32_t DATA_length) { 536 UErrorCode status = U_ZERO_ERROR; 537 UTransPosition index={0, 0, 0, 0}; 538 UnicodeString s; 539 for (int32_t i=0; i<DATA_length; i+=2) { 540 UnicodeString log; 541 if (DATA[i] != 0) { 542 log = s + " + " 543 + DATA[i] 544 + " -> "; 545 t.transliterate(s, index, DATA[i], status); 546 } else { 547 log = s + " => "; 548 t.finishTransliteration(s, index); 549 } 550 // Show the start index '{' and the cursor '|' 551 UnicodeString a, b, c; 552 s.extractBetween(0, index.contextStart, a); 553 s.extractBetween(index.contextStart, index.start, b); 554 s.extractBetween(index.start, s.length(), c); 555 log.append(a). 556 append((UChar)LEFT_BRACE). 557 append(b). 558 append((UChar)PIPE). 559 append(c); 560 if (s == DATA[i+1] && U_SUCCESS(status)) { 561 logln(log); 562 } else { 563 errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]); 564 } 565 } 566 } 567 568 void TransliteratorTest::TestArabic(void) { 569 // Test disabled for 2.0 until new Arabic transliterator can be written. 570 // /* 571 // const char* DATA[] = { 572 // "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+ 573 // "\u0627\u0644\u0644\u063a\u0629\u0020"+ 574 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+ 575 // "\u0628\u0628\u0646\u0638\u0645\u0020"+ 576 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+ 577 // "\u062c\u0645\u064a\u0644\u0629", 578 // }; 579 // */ 580 // 581 // UChar ar_raw[] = { 582 // 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627, 583 // 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644, 584 // 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020, 585 // 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643, 586 // 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020, 587 // 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0 588 // }; 589 // UnicodeString ar(ar_raw); 590 // UErrorCode status=U_ZERO_ERROR; 591 // UParseError parseError; 592 // Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status); 593 // if (t == 0) { 594 // errln("FAIL: createInstance failed"); 595 // return; 596 // } 597 // expect(*t, "Arabic", ar); 598 // delete t; 599 } 600 601 /** 602 * Compose the Kana transliterator forward and reverse and try 603 * some strings that should come out unchanged. 604 */ 605 void TransliteratorTest::TestCompoundKana(void) { 606 UParseError parseError; 607 UErrorCode status = U_ZERO_ERROR; 608 Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status); 609 if (t == 0) { 610 dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status)); 611 } else { 612 expect(*t, "aaaaa", "aaaaa"); 613 delete t; 614 } 615 } 616 617 /** 618 * Compose the hex transliterators forward and reverse. 619 */ 620 void TransliteratorTest::TestCompoundHex(void) { 621 UParseError parseError; 622 UErrorCode status = U_ZERO_ERROR; 623 Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status); 624 Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status); 625 Transliterator* transab[] = { a, b }; 626 Transliterator* transba[] = { b, a }; 627 if (a == 0 || b == 0) { 628 errln("FAIL: construction failed"); 629 delete a; 630 delete b; 631 return; 632 } 633 // Do some basic tests of a 634 expect(*a, "01", UnicodeString("\\u0030\\u0031", "")); 635 // Do some basic tests of b 636 expect(*b, UnicodeString("\\u0030\\u0031", ""), "01"); 637 638 Transliterator* ab = new CompoundTransliterator(transab, 2); 639 UnicodeString s("abcde", ""); 640 expect(*ab, s, s); 641 642 UnicodeString str(s); 643 a->transliterate(str); 644 Transliterator* ba = new CompoundTransliterator(transba, 2); 645 expect(*ba, str, str); 646 647 delete ab; 648 delete ba; 649 delete a; 650 delete b; 651 } 652 653 int gTestFilterClassID = 0; 654 /** 655 * Used by TestFiltering(). 656 */ 657 class TestFilter : public UnicodeFilter { 658 virtual UnicodeFunctor* clone() const { 659 return new TestFilter(*this); 660 } 661 virtual UBool contains(UChar32 c) const { 662 return c != (UChar)0x0063 /*c*/; 663 } 664 // Stubs 665 virtual UnicodeString& toPattern(UnicodeString& result, 666 UBool /*escapeUnprintable*/) const { 667 return result; 668 } 669 virtual UBool matchesIndexValue(uint8_t /*v*/) const { 670 return FALSE; 671 } 672 virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {} 673 public: 674 UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; } 675 }; 676 677 /** 678 * Do some basic tests of filtering. 679 */ 680 void TransliteratorTest::TestFiltering(void) { 681 UParseError parseError; 682 UErrorCode status = U_ZERO_ERROR; 683 Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status); 684 if (hex == 0) { 685 errln("FAIL: createInstance(Any-Hex) failed"); 686 return; 687 } 688 hex->adoptFilter(new TestFilter()); 689 UnicodeString s("abcde"); 690 hex->transliterate(s); 691 UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", ""); 692 if (s == exp) { 693 logln(UnicodeString("Ok: \"") + exp + "\""); 694 } else { 695 logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\""); 696 } 697 698 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J. 699 UnicodeFilter *f = hex->orphanFilter(); 700 if (f == NULL){ 701 errln("FAIL: orphanFilter() should get a UnicodeFilter"); 702 } else { 703 delete f; 704 } 705 delete hex; 706 } 707 708 /** 709 * Test anchors 710 */ 711 void TransliteratorTest::TestAnchors(void) { 712 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""), 713 "aaa", 714 "012"); 715 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""), 716 "aaa", 717 "012"); 718 expect(UnicodeString("^ab > 01 ;" 719 " ab > |8 ;" 720 " b > k ;" 721 " 8x$ > 45 ;" 722 " 8x > 77 ;", ""), 723 724 "ababbabxabx", 725 "018k7745"); 726 expect(UnicodeString("$s = [z$] ;" 727 "$s{ab > 01 ;" 728 " ab > |8 ;" 729 " b > k ;" 730 " 8x}$s > 45 ;" 731 " 8x > 77 ;", ""), 732 733 "abzababbabxzabxabx", 734 "01z018k45z01x45"); 735 } 736 737 /** 738 * Test pattern quoting and escape mechanisms. 739 */ 740 void TransliteratorTest::TestPatternQuoting(void) { 741 // Array of 3n items 742 // Each item is <rules>, <input>, <expected output> 743 const UnicodeString DATA[] = { 744 UnicodeString(UChar(0x4E01)) + ">'[male adult]'", 745 UnicodeString(UChar(0x4E01)), 746 "[male adult]" 747 }; 748 749 for (int32_t i=0; i<3; i+=3) { 750 logln(UnicodeString("Pattern: ") + prettify(DATA[i])); 751 UParseError parseError; 752 UErrorCode status = U_ZERO_ERROR; 753 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status); 754 if (U_FAILURE(status)) { 755 errln("RBT constructor failed"); 756 } else { 757 expect(*t, DATA[i+1], DATA[i+2]); 758 } 759 delete t; 760 } 761 } 762 763 /** 764 * Regression test for bugs found in Greek transliteration. 765 */ 766 void TransliteratorTest::TestJ277(void) { 767 UErrorCode status = U_ZERO_ERROR; 768 UParseError parseError; 769 Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status); 770 if (gl == NULL) { 771 dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status)); 772 return; 773 } 774 775 UChar sigma = 0x3C3; 776 UChar upsilon = 0x3C5; 777 UChar nu = 0x3BD; 778 // UChar PHI = 0x3A6; 779 UChar alpha = 0x3B1; 780 // UChar omega = 0x3C9; 781 // UChar omicron = 0x3BF; 782 // UChar epsilon = 0x3B5; 783 784 // sigma upsilon nu -> syn 785 UnicodeString syn; 786 syn.append(sigma).append(upsilon).append(nu); 787 expect(*gl, syn, "syn"); 788 789 // sigma alpha upsilon nu -> saun 790 UnicodeString sayn; 791 sayn.append(sigma).append(alpha).append(upsilon).append(nu); 792 expect(*gl, sayn, "saun"); 793 794 // Again, using a smaller rule set 795 UnicodeString rules( 796 "$alpha = \\u03B1;" 797 "$nu = \\u03BD;" 798 "$sigma = \\u03C3;" 799 "$ypsilon = \\u03C5;" 800 "$vowel = [aeiouAEIOU$alpha$ypsilon];" 801 "s <> $sigma;" 802 "a <> $alpha;" 803 "u <> $vowel { $ypsilon;" 804 "y <> $ypsilon;" 805 "n <> $nu;", 806 ""); 807 Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status); 808 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; } 809 expect(*mini, syn, "syn"); 810 expect(*mini, sayn, "saun"); 811 delete mini; 812 mini = NULL; 813 814 #if !UCONFIG_NO_FORMATTING 815 // Transliterate the Greek locale data 816 Locale el("el"); 817 DateFormatSymbols syms(el, status); 818 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; } 819 int32_t i, count; 820 const UnicodeString* data = syms.getMonths(count); 821 for (i=0; i<count; ++i) { 822 if (data[i].length() == 0) { 823 continue; 824 } 825 UnicodeString out(data[i]); 826 gl->transliterate(out); 827 UBool ok = TRUE; 828 if (data[i].length() >= 2 && out.length() >= 2 && 829 u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) { 830 if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) { 831 ok = FALSE; 832 } 833 } 834 if (ok) { 835 logln(prettify(data[i] + " -> " + out)); 836 } else { 837 errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out)); 838 } 839 } 840 #endif 841 842 delete gl; 843 } 844 845 /** 846 * Prefix, suffix support in hex transliterators 847 */ 848 void TransliteratorTest::TestJ243(void) { 849 UErrorCode ec = U_ZERO_ERROR; 850 851 // Test default Hex-Any, which should handle 852 // \u, \U, u+, and U+ 853 Transliterator *hex = 854 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec); 855 if (assertSuccess("getInstance", ec)) { 856 expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz"); 857 } 858 delete hex; 859 860 // // Try a custom Hex-Unicode 861 // // \uXXXX and &#xXXXX; 862 // ec = U_ZERO_ERROR; 863 // HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec); 864 // expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""), 865 // "abcd5fx0123"); 866 // // Try custom Any-Hex (default is tested elsewhere) 867 // ec = U_ZERO_ERROR; 868 // UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec); 869 // expect(hex3, "012", "012"); 870 } 871 872 /** 873 * Parsers need better syntax error messages. 874 */ 875 void TransliteratorTest::TestJ329(void) { 876 877 struct { UBool containsErrors; const char* rule; } DATA[] = { 878 { FALSE, "a > b; c > d" }, 879 { TRUE, "a > b; no operator; c > d" }, 880 }; 881 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); 882 883 for (int32_t i=0; i<DATA_length; ++i) { 884 UErrorCode status = U_ZERO_ERROR; 885 UParseError parseError; 886 Transliterator *rbt = Transliterator::createFromRules("<ID>", 887 DATA[i].rule, 888 UTRANS_FORWARD, 889 parseError, 890 status); 891 UBool gotError = U_FAILURE(status); 892 UnicodeString desc(DATA[i].rule); 893 desc.append(gotError ? " -> error" : " -> no error"); 894 if (gotError) { 895 desc = desc + ", ParseError code=" + u_errorName(status) + 896 " line=" + parseError.line + 897 " offset=" + parseError.offset + 898 " context=" + parseError.preContext; 899 } 900 if (gotError == DATA[i].containsErrors) { 901 logln(UnicodeString("Ok: ") + desc); 902 } else { 903 errln(UnicodeString("FAIL: ") + desc); 904 } 905 delete rbt; 906 } 907 } 908 909 /** 910 * Test segments and segment references. 911 */ 912 void TransliteratorTest::TestSegments(void) { 913 // Array of 3n items 914 // Each item is <rules>, <input>, <expected output> 915 UnicodeString DATA[] = { 916 "([a-z]) '.' ([0-9]) > $2 '-' $1", 917 "abc.123.xyz.456", 918 "ab1-c23.xy4-z56", 919 920 // nested 921 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;", 922 "a1 b2", 923 "a1.a.1 b2.b.2", 924 }; 925 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA)); 926 927 for (int32_t i=0; i<DATA_length; i+=3) { 928 logln("Pattern: " + prettify(DATA[i])); 929 UParseError parseError; 930 UErrorCode status = U_ZERO_ERROR; 931 Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status); 932 if (U_FAILURE(status)) { 933 errln("FAIL: RBT constructor"); 934 } else { 935 expect(*t, DATA[i+1], DATA[i+2]); 936 } 937 delete t; 938 } 939 } 940 941 /** 942 * Test cursor positioning outside of the key 943 */ 944 void TransliteratorTest::TestCursorOffset(void) { 945 // Array of 3n items 946 // Each item is <rules>, <input>, <expected output> 947 UnicodeString DATA[] = { 948 "pre {alpha} post > | @ ALPHA ;" 949 "eALPHA > beta ;" 950 "pre {beta} post > BETA @@ | ;" 951 "post > xyz", 952 953 "prealphapost prebetapost", 954 955 "prbetaxyz preBETApost", 956 }; 957 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA)); 958 959 for (int32_t i=0; i<DATA_length; i+=3) { 960 logln("Pattern: " + prettify(DATA[i])); 961 UParseError parseError; 962 UErrorCode status = U_ZERO_ERROR; 963 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status); 964 if (U_FAILURE(status)) { 965 errln("FAIL: RBT constructor"); 966 } else { 967 expect(*t, DATA[i+1], DATA[i+2]); 968 } 969 delete t; 970 } 971 } 972 973 /** 974 * Test zero length and > 1 char length variable values. Test 975 * use of variable refs in UnicodeSets. 976 */ 977 void TransliteratorTest::TestArbitraryVariableValues(void) { 978 // Array of 3n items 979 // Each item is <rules>, <input>, <expected output> 980 UnicodeString DATA[] = { 981 "$abe = ab;" 982 "$pat = x[yY]z;" 983 "$ll = 'a-z';" 984 "$llZ = [$ll];" 985 "$llY = [$ll$pat];" 986 "$emp = ;" 987 988 "$abe > ABE;" 989 "$pat > END;" 990 "$llZ > 1;" 991 "$llY > 2;" 992 "7$emp 8 > 9;" 993 "", 994 995 "ab xYzxyz stY78", 996 "ABE ENDEND 1129", 997 }; 998 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA)); 999 1000 for (int32_t i=0; i<DATA_length; i+=3) { 1001 logln("Pattern: " + prettify(DATA[i])); 1002 UParseError parseError; 1003 UErrorCode status = U_ZERO_ERROR; 1004 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status); 1005 if (U_FAILURE(status)) { 1006 errln("FAIL: RBT constructor"); 1007 } else { 1008 expect(*t, DATA[i+1], DATA[i+2]); 1009 } 1010 delete t; 1011 } 1012 } 1013 1014 /** 1015 * Confirm that the contextStart, contextLimit, start, and limit 1016 * behave correctly. J474. 1017 */ 1018 void TransliteratorTest::TestPositionHandling(void) { 1019 // Array of 3n items 1020 // Each item is <rules>, <input>, <expected output> 1021 const char* DATA[] = { 1022 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 1023 "xtat txtb", // pos 0,9,0,9 1024 "xTTaSS TTxUUb", 1025 1026 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;", 1027 "xtat txtb", // pos 2,9,3,8 1028 "xtaSS TTxUUb", 1029 1030 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;", 1031 "xtat txtb", // pos 3,8,3,8 1032 "xtaTT TTxTTb", 1033 }; 1034 1035 // Array of 4n positions -- these go with the DATA array 1036 // They are: contextStart, contextLimit, start, limit 1037 int32_t POS[] = { 1038 0, 9, 0, 9, 1039 2, 9, 3, 8, 1040 3, 8, 3, 8, 1041 }; 1042 1043 int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3; 1044 for (int32_t i=0; i<n; i++) { 1045 UErrorCode status = U_ZERO_ERROR; 1046 UParseError parseError; 1047 Transliterator *t = Transliterator::createFromRules("<ID>", 1048 DATA[3*i], UTRANS_FORWARD, parseError, status); 1049 if (U_FAILURE(status)) { 1050 delete t; 1051 errln("FAIL: RBT constructor"); 1052 return; 1053 } 1054 UTransPosition pos; 1055 pos.contextStart= POS[4*i]; 1056 pos.contextLimit = POS[4*i+1]; 1057 pos.start = POS[4*i+2]; 1058 pos.limit = POS[4*i+3]; 1059 UnicodeString rsource(DATA[3*i+1]); 1060 t->transliterate(rsource, pos, status); 1061 if (U_FAILURE(status)) { 1062 delete t; 1063 errln("FAIL: transliterate"); 1064 return; 1065 } 1066 t->finishTransliteration(rsource, pos); 1067 expectAux(DATA[3*i], 1068 DATA[3*i+1], 1069 rsource, 1070 DATA[3*i+2]); 1071 delete t; 1072 } 1073 } 1074 1075 /** 1076 * Test the Hiragana-Katakana transliterator. 1077 */ 1078 void TransliteratorTest::TestHiraganaKatakana(void) { 1079 UParseError parseError; 1080 UErrorCode status = U_ZERO_ERROR; 1081 Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status); 1082 Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status); 1083 if (hk == 0 || kh == 0) { 1084 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); 1085 delete hk; 1086 delete kh; 1087 return; 1088 } 1089 1090 // Array of 3n items 1091 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana> 1092 const char* DATA[] = { 1093 "both", 1094 "\\u3042\\u3090\\u3099\\u3092\\u3050", 1095 "\\u30A2\\u30F8\\u30F2\\u30B0", 1096 1097 "kh", 1098 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC", 1099 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC", 1100 }; 1101 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); 1102 1103 for (int32_t i=0; i<DATA_length; i+=3) { 1104 UnicodeString h = CharsToUnicodeString(DATA[i+1]); 1105 UnicodeString k = CharsToUnicodeString(DATA[i+2]); 1106 switch (*DATA[i]) { 1107 case 0x68: //'h': // Hiragana-Katakana 1108 expect(*hk, h, k); 1109 break; 1110 case 0x6B: //'k': // Katakana-Hiragana 1111 expect(*kh, k, h); 1112 break; 1113 case 0x62: //'b': // both 1114 expect(*hk, h, k); 1115 expect(*kh, k, h); 1116 break; 1117 } 1118 } 1119 delete hk; 1120 delete kh; 1121 } 1122 1123 /** 1124 * Test cloning / copy constructor of RBT. 1125 */ 1126 void TransliteratorTest::TestCopyJ476(void) { 1127 // The real test here is what happens when the destructors are 1128 // called. So we let one object get destructed, and check to 1129 // see that its copy still works. 1130 Transliterator *t2 = 0; 1131 { 1132 UParseError parseError; 1133 UErrorCode status = U_ZERO_ERROR; 1134 Transliterator *t1 = Transliterator::createFromRules("t1", 1135 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status); 1136 if (U_FAILURE(status)) { 1137 errln("FAIL: RBT constructor"); 1138 return; 1139 } 1140 t2 = t1->clone(); // Call copy constructor under the covers. 1141 expect(*t1, "abcfoofoo", "ABcbar"); 1142 delete t1; 1143 } 1144 expect(*t2, "abcfoofoo", "ABcbar"); 1145 delete t2; 1146 } 1147 1148 /** 1149 * Test inter-Indic transliterators. These are composed. 1150 * ICU4C Jitterbug 483. 1151 */ 1152 void TransliteratorTest::TestInterIndic(void) { 1153 UnicodeString ID("Devanagari-Gujarati", ""); 1154 UErrorCode status = U_ZERO_ERROR; 1155 UParseError parseError; 1156 Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status); 1157 if (dg == 0) { 1158 dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status)); 1159 return; 1160 } 1161 UnicodeString id = dg->getID(); 1162 if (id != ID) { 1163 errln("FAIL: createInstance(" + ID + ")->getID() => " + id); 1164 } 1165 UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925"); 1166 UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5"); 1167 expect(*dg, dev, guj); 1168 delete dg; 1169 } 1170 1171 /** 1172 * Test filter syntax in IDs. (J918) 1173 */ 1174 void TransliteratorTest::TestFilterIDs(void) { 1175 // Array of 3n strings: 1176 // <id>, <inverse id>, <input>, <expected output> 1177 const char* DATA[] = { 1178 "[aeiou]Any-Hex", // ID 1179 "[aeiou]Hex-Any", // expected inverse ID 1180 "quizzical", // src 1181 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src) 1182 1183 "[aeiou]Any-Hex;[^5]Hex-Any", 1184 "[^5]Any-Hex;[aeiou]Hex-Any", 1185 "quizzical", 1186 "q\\u0075izzical", 1187 1188 "[abc]Null", 1189 "[abc]Null", 1190 "xyz", 1191 "xyz", 1192 }; 1193 enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) }; 1194 1195 for (int i=0; i<DATA_length; i+=4) { 1196 UnicodeString ID(DATA[i], ""); 1197 UnicodeString uID(DATA[i+1], ""); 1198 UnicodeString data2(DATA[i+2], ""); 1199 UnicodeString data3(DATA[i+3], ""); 1200 UParseError parseError; 1201 UErrorCode status = U_ZERO_ERROR; 1202 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status); 1203 if (t == 0) { 1204 errln("FAIL: createInstance(" + ID + ") returned NULL"); 1205 return; 1206 } 1207 expect(*t, data2, data3); 1208 1209 // Check the ID 1210 if (ID != t->getID()) { 1211 errln("FAIL: createInstance(" + ID + ").getID() => " + 1212 t->getID()); 1213 } 1214 1215 // Check the inverse 1216 Transliterator *u = t->createInverse(status); 1217 if (u == 0) { 1218 errln("FAIL: " + ID + ".createInverse() returned NULL"); 1219 } else if (u->getID() != uID) { 1220 errln("FAIL: " + ID + ".createInverse().getID() => " + 1221 u->getID() + ", expected " + uID); 1222 } 1223 1224 delete t; 1225 delete u; 1226 } 1227 } 1228 1229 /** 1230 * Test the case mapping transliterators. 1231 */ 1232 void TransliteratorTest::TestCaseMap(void) { 1233 UParseError parseError; 1234 UErrorCode status = U_ZERO_ERROR; 1235 Transliterator* toUpper = 1236 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status); 1237 Transliterator* toLower = 1238 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status); 1239 Transliterator* toTitle = 1240 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status); 1241 if (toUpper==0 || toLower==0 || toTitle==0) { 1242 errln("FAIL: createInstance returned NULL"); 1243 delete toUpper; 1244 delete toLower; 1245 delete toTitle; 1246 return; 1247 } 1248 1249 expect(*toUpper, "The quick brown fox jumped over the lazy dogs.", 1250 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS."); 1251 expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.", 1252 "the quick brown foX jumped over the lazY dogs."); 1253 expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.", 1254 "The Quick Brown FoX Can't Jump Over The LaZy Dogs."); 1255 1256 delete toUpper; 1257 delete toLower; 1258 delete toTitle; 1259 } 1260 1261 /** 1262 * Test the name mapping transliterators. 1263 */ 1264 void TransliteratorTest::TestNameMap(void) { 1265 UParseError parseError; 1266 UErrorCode status = U_ZERO_ERROR; 1267 Transliterator* uni2name = 1268 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status); 1269 Transliterator* name2uni = 1270 Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status); 1271 if (uni2name==0 || name2uni==0) { 1272 errln("FAIL: createInstance returned NULL"); 1273 delete uni2name; 1274 delete name2uni; 1275 return; 1276 } 1277 1278 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N 1279 expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"), 1280 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}")); 1281 expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"), 1282 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{")); 1283 1284 delete uni2name; 1285 delete name2uni; 1286 1287 // round trip 1288 Transliterator* t = 1289 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status); 1290 if (t==0) { 1291 errln("FAIL: createInstance returned NULL"); 1292 delete t; 1293 return; 1294 } 1295 1296 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N 1297 UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"); 1298 expect(*t, s, s); 1299 delete t; 1300 } 1301 1302 /** 1303 * Test liberalized ID syntax. 1006c 1304 */ 1305 void TransliteratorTest::TestLiberalizedID(void) { 1306 // Some test cases have an expected getID() value of NULL. This 1307 // means I have disabled the test case for now. This stuff is 1308 // still under development, and I haven't decided whether to make 1309 // getID() return canonical case yet. It will all get rewritten 1310 // with the move to Source-Target/Variant IDs anyway. [aliu] 1311 const char* DATA[] = { 1312 "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity", 1313 " Null ", "Null", "whitespace", 1314 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter", 1315 " null ; latin-greek ", NULL /*"Null;Latin-Greek"*/, "compound whitespace", 1316 }; 1317 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]); 1318 UParseError parseError; 1319 UErrorCode status= U_ZERO_ERROR; 1320 for (int32_t i=0; i<DATA_length; i+=3) { 1321 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status); 1322 if (t == 0) { 1323 dataerrln(UnicodeString("FAIL: ") + DATA[i+2] + 1324 " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status)); 1325 } else { 1326 UnicodeString exp; 1327 if (DATA[i+1]) { 1328 exp = UnicodeString(DATA[i+1], ""); 1329 } 1330 // Don't worry about getID() if the expected char* 1331 // is NULL -- see above. 1332 if (exp.length() == 0 || exp == t->getID()) { 1333 logln(UnicodeString("Ok: ") + DATA[i+2] + 1334 " create ID \"" + DATA[i] + "\" => \"" + 1335 exp + "\""); 1336 } else { 1337 errln(UnicodeString("FAIL: ") + DATA[i+2] + 1338 " create ID \"" + DATA[i] + "\" => \"" + 1339 t->getID() + "\", exp \"" + exp + "\""); 1340 } 1341 delete t; 1342 } 1343 } 1344 } 1345 1346 /* test for Jitterbug 912 */ 1347 void TransliteratorTest::TestCreateInstance(){ 1348 const char* FORWARD = "F"; 1349 const char* REVERSE = "R"; 1350 const char* DATA[] = { 1351 // Column 1: id 1352 // Column 2: direction 1353 // Column 3: expected ID, or "" if expect failure 1354 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912 1355 1356 // JB#2689: bad compound causes crash 1357 "InvalidSource-InvalidTarget", FORWARD, "", 1358 "InvalidSource-InvalidTarget", REVERSE, "", 1359 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "", 1360 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "", 1361 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "", 1362 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "", 1363 1364 NULL 1365 }; 1366 1367 for (int32_t i=0; DATA[i]; i+=3) { 1368 UParseError err; 1369 UErrorCode ec = U_ZERO_ERROR; 1370 UnicodeString id(DATA[i]); 1371 UTransDirection dir = (DATA[i+1]==FORWARD)? 1372 UTRANS_FORWARD:UTRANS_REVERSE; 1373 UnicodeString expID(DATA[i+2]); 1374 Transliterator* t = 1375 Transliterator::createInstance(id,dir,err,ec); 1376 UnicodeString newID; 1377 if (t) { 1378 newID = t->getID(); 1379 } 1380 UBool ok = (newID == expID); 1381 if (!t) { 1382 newID = u_errorName(ec); 1383 } 1384 if (ok) { 1385 logln((UnicodeString)"Ok: createInstance(" + 1386 id + "," + DATA[i+1] + ") => " + newID); 1387 } else { 1388 dataerrln((UnicodeString)"FAIL: createInstance(" + 1389 id + "," + DATA[i+1] + ") => " + newID + 1390 ", expected " + expID); 1391 } 1392 delete t; 1393 } 1394 } 1395 1396 /** 1397 * Test the normalization transliterator. 1398 */ 1399 void TransliteratorTest::TestNormalizationTransliterator() { 1400 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest 1401 // PLEASE KEEP THEM IN SYNC WITH BasicTest. 1402 const char* CANON[] = { 1403 // Input Decomposed Composed 1404 "cat", "cat", "cat" , 1405 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" , 1406 1407 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above 1408 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above 1409 1410 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above 1411 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below 1412 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above 1413 1414 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above 1415 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below 1416 1417 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave 1418 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave 1419 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron 1420 1421 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign 1422 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring 1423 1424 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0 1425 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0 1426 1427 "Henry IV", "Henry IV", "Henry IV" , 1428 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" , 1429 1430 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana) 1431 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten 1432 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten 1433 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten 1434 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten 1435 1436 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" , 1437 0 // end 1438 }; 1439 1440 const char* COMPAT[] = { 1441 // Input Decomposed Composed 1442 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed 1443 1444 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0 1445 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i 1446 1447 "Henry IV", "Henry IV", "Henry IV" , 1448 "Henry \\u2163", "Henry IV", "Henry IV" , 1449 1450 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana) 1451 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten 1452 1453 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten 1454 0 // end 1455 }; 1456 1457 int32_t i; 1458 UParseError parseError; 1459 UErrorCode status = U_ZERO_ERROR; 1460 Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status); 1461 Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status); 1462 if (!NFD || !NFC) { 1463 dataerrln("FAIL: createInstance failed: %s", u_errorName(status)); 1464 delete NFD; 1465 delete NFC; 1466 return; 1467 } 1468 for (i=0; CANON[i]; i+=3) { 1469 UnicodeString in = CharsToUnicodeString(CANON[i]); 1470 UnicodeString expd = CharsToUnicodeString(CANON[i+1]); 1471 UnicodeString expc = CharsToUnicodeString(CANON[i+2]); 1472 expect(*NFD, in, expd); 1473 expect(*NFC, in, expc); 1474 } 1475 delete NFD; 1476 delete NFC; 1477 1478 Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status); 1479 Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status); 1480 if (!NFKD || !NFKC) { 1481 errln("FAIL: createInstance failed"); 1482 delete NFKD; 1483 delete NFKC; 1484 return; 1485 } 1486 for (i=0; COMPAT[i]; i+=3) { 1487 UnicodeString in = CharsToUnicodeString(COMPAT[i]); 1488 UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]); 1489 UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]); 1490 expect(*NFKD, in, expkd); 1491 expect(*NFKC, in, expkc); 1492 } 1493 delete NFKD; 1494 delete NFKC; 1495 1496 UParseError pe; 1497 status = U_ZERO_ERROR; 1498 Transliterator *t = Transliterator::createInstance("NFD; [x]Remove", 1499 UTRANS_FORWARD, 1500 pe, status); 1501 if (t == 0) { 1502 errln("FAIL: createInstance failed"); 1503 } 1504 expect(*t, CharsToUnicodeString("\\u010dx"), 1505 CharsToUnicodeString("c\\u030C")); 1506 delete t; 1507 } 1508 1509 /** 1510 * Test compound RBT rules. 1511 */ 1512 void TransliteratorTest::TestCompoundRBT(void) { 1513 // Careful with spacing and ';' here: Phrase this exactly 1514 // as toRules() is going to return it. If toRules() changes 1515 // with regard to spacing or ';', then adjust this string. 1516 UnicodeString rule("::Hex-Any;\n" 1517 "::Any-Lower;\n" 1518 "a > '.A.';\n" 1519 "b > '.B.';\n" 1520 "::[^t]Any-Upper;", ""); 1521 UParseError parseError; 1522 UErrorCode status = U_ZERO_ERROR; 1523 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status); 1524 if (t == 0) { 1525 errln("FAIL: createFromRules failed"); 1526 return; 1527 } 1528 expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"), 1529 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t"); 1530 UnicodeString r; 1531 t->toRules(r, TRUE); 1532 if (r == rule) { 1533 logln((UnicodeString)"OK: toRules() => " + r); 1534 } else { 1535 errln((UnicodeString)"FAIL: toRules() => " + r + 1536 ", expected " + rule); 1537 } 1538 delete t; 1539 1540 // Now test toRules 1541 t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status); 1542 if (t == 0) { 1543 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); 1544 return; 1545 } 1546 UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;"); 1547 t->toRules(r, TRUE); 1548 if (r != exp) { 1549 errln((UnicodeString)"FAIL: toRules() => " + r + 1550 ", expected " + exp); 1551 } else { 1552 logln((UnicodeString)"OK: toRules() => " + r); 1553 } 1554 delete t; 1555 1556 // Round trip the result of toRules 1557 t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status); 1558 if (t == 0) { 1559 errln("FAIL: createFromRules #2 failed"); 1560 return; 1561 } else { 1562 logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded"); 1563 } 1564 1565 // Test toRules again 1566 t->toRules(r, TRUE); 1567 if (r != exp) { 1568 errln((UnicodeString)"FAIL: toRules() => " + r + 1569 ", expected " + exp); 1570 } else { 1571 logln((UnicodeString)"OK: toRules() => " + r); 1572 } 1573 1574 delete t; 1575 1576 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform 1577 // to what the regenerated ID will look like. 1578 UnicodeString id("Upper(Lower);(NFKC)", ""); 1579 t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status); 1580 if (t == 0) { 1581 errln("FAIL: createInstance #2 failed"); 1582 return; 1583 } 1584 if (t->getID() == id) { 1585 logln((UnicodeString)"OK: created " + id); 1586 } else { 1587 errln((UnicodeString)"FAIL: createInstance(" + id + 1588 ").getID() => " + t->getID()); 1589 } 1590 1591 Transliterator *u = t->createInverse(status); 1592 if (u == 0) { 1593 errln("FAIL: createInverse failed"); 1594 delete t; 1595 return; 1596 } 1597 exp = "NFKC();Lower(Upper)"; 1598 if (u->getID() == exp) { 1599 logln((UnicodeString)"OK: createInverse(" + id + ") => " + 1600 u->getID()); 1601 } else { 1602 errln((UnicodeString)"FAIL: createInverse(" + id + ") => " + 1603 u->getID()); 1604 } 1605 delete t; 1606 delete u; 1607 } 1608 1609 /** 1610 * Compound filter semantics were orginially not implemented 1611 * correctly. Originally, each component filter f(i) is replaced by 1612 * f'(i) = f(i) && g, where g is the filter for the compound 1613 * transliterator. 1614 * 1615 * From Mark: 1616 * 1617 * Suppose and I have a transliterator X. Internally X is 1618 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A]. 1619 * 1620 * The compound should convert all greek characters (through latin) to 1621 * cyrillic, then lowercase the result. The filter should say "don't 1622 * touch 'A' in the original". But because an intermediate result 1623 * happens to go through "A", the Greek Alpha gets hung up. 1624 */ 1625 void TransliteratorTest::TestCompoundFilter(void) { 1626 UParseError parseError; 1627 UErrorCode status = U_ZERO_ERROR; 1628 Transliterator *t = Transliterator::createInstance 1629 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status); 1630 if (t == 0) { 1631 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); 1632 return; 1633 } 1634 t->adoptFilter(new UnicodeSet("[^A]", status)); 1635 if (U_FAILURE(status)) { 1636 errln("FAIL: UnicodeSet ct failed"); 1637 delete t; 1638 return; 1639 } 1640 1641 // Only the 'A' at index 1 should remain unchanged 1642 expect(*t, 1643 CharsToUnicodeString("BA\\u039A\\u0391"), 1644 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1")); 1645 delete t; 1646 } 1647 1648 void TransliteratorTest::TestRemove(void) { 1649 UParseError parseError; 1650 UErrorCode status = U_ZERO_ERROR; 1651 Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status); 1652 if (t == 0) { 1653 errln("FAIL: createInstance failed"); 1654 return; 1655 } 1656 1657 expect(*t, "Able bodied baker's cats", "Ale odied ker's ts"); 1658 1659 // extra test for RemoveTransliterator::clone(), which at one point wasn't 1660 // duplicating the filter 1661 Transliterator* t2 = t->clone(); 1662 expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts"); 1663 1664 delete t; 1665 delete t2; 1666 } 1667 1668 void TransliteratorTest::TestToRules(void) { 1669 const char* RBT = "rbt"; 1670 const char* SET = "set"; 1671 static const char* DATA[] = { 1672 RBT, 1673 "$a=\\u4E61; [$a] > A;", 1674 "[\\u4E61] > A;", 1675 1676 RBT, 1677 "$white=[[:Zs:][:Zl:]]; $white{a} > A;", 1678 "[[:Zs:][:Zl:]]{a} > A;", 1679 1680 SET, 1681 "[[:Zs:][:Zl:]]", 1682 "[[:Zs:][:Zl:]]", 1683 1684 SET, 1685 "[:Ps:]", 1686 "[:Ps:]", 1687 1688 SET, 1689 "[:L:]", 1690 "[:L:]", 1691 1692 SET, 1693 "[[:L:]-[A]]", 1694 "[[:L:]-[A]]", 1695 1696 SET, 1697 "[~[:Lu:][:Ll:]]", 1698 "[~[:Lu:][:Ll:]]", 1699 1700 SET, 1701 "[~[a-z]]", 1702 "[~[a-z]]", 1703 1704 RBT, 1705 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;", 1706 "[^[:Zs:]]{a} > A;", 1707 1708 RBT, 1709 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;", 1710 "[[a-z]-[:Zs:]]{a} > A;", 1711 1712 RBT, 1713 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;", 1714 "[[:Zs:]&[a-z]]{a} > A;", 1715 1716 RBT, 1717 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;", 1718 "[x[:Zs:]]{a} > A;", 1719 1720 RBT, 1721 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;" 1722 "$macron = \\u0304 ;" 1723 "$evowel = [aeiouyAEIOUY] ;" 1724 "$iotasub = \\u0345 ;" 1725 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;", 1726 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;", 1727 1728 RBT, 1729 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1730 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1731 }; 1732 static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); 1733 1734 for (int32_t d=0; d < DATA_length; d+=3) { 1735 if (DATA[d] == RBT) { 1736 // Transliterator test 1737 UParseError parseError; 1738 UErrorCode status = U_ZERO_ERROR; 1739 Transliterator *t = Transliterator::createFromRules("ID", 1740 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status); 1741 if (t == 0) { 1742 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status)); 1743 return; 1744 } 1745 UnicodeString rules, escapedRules; 1746 t->toRules(rules, FALSE); 1747 t->toRules(escapedRules, TRUE); 1748 UnicodeString expRules = CharsToUnicodeString(DATA[d+2]); 1749 UnicodeString expEscapedRules(DATA[d+2], -1, US_INV); 1750 if (rules == expRules) { 1751 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) + 1752 " => " + rules); 1753 } else { 1754 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) + 1755 " => " + rules + ", exp " + expRules); 1756 } 1757 if (escapedRules == expEscapedRules) { 1758 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) + 1759 " => " + escapedRules); 1760 } else { 1761 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) + 1762 " => " + escapedRules + ", exp " + expEscapedRules); 1763 } 1764 delete t; 1765 1766 } else { 1767 // UnicodeSet test 1768 UErrorCode status = U_ZERO_ERROR; 1769 UnicodeString pat(DATA[d+1], -1, US_INV); 1770 UnicodeString expToPat(DATA[d+2], -1, US_INV); 1771 UnicodeSet set(pat, status); 1772 if (U_FAILURE(status)) { 1773 errln("FAIL: UnicodeSet ct failed"); 1774 return; 1775 } 1776 // Adjust spacing etc. as necessary. 1777 UnicodeString toPat; 1778 set.toPattern(toPat); 1779 if (expToPat == toPat) { 1780 logln((UnicodeString)"Ok: " + pat + 1781 " => " + toPat); 1782 } else { 1783 errln((UnicodeString)"FAIL: " + pat + 1784 " => " + prettify(toPat, TRUE) + 1785 ", exp " + prettify(pat, TRUE)); 1786 } 1787 } 1788 } 1789 } 1790 1791 void TransliteratorTest::TestContext() { 1792 UTransPosition pos = {0, 2, 0, 1}; // cs cl s l 1793 expect("de > x; {d}e > y;", 1794 "de", 1795 "ye", 1796 &pos); 1797 1798 expect("ab{c} > z;", 1799 "xadabdabcy", 1800 "xadabdabzy"); 1801 } 1802 1803 void TransliteratorTest::TestSupplemental() { 1804 1805 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" 1806 "a > $a; $s > i;"), 1807 CharsToUnicodeString("ab\\U0001030Fx"), 1808 CharsToUnicodeString("\\U00010300bix")); 1809 1810 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" 1811 "$b=[A-Z\\U00010400-\\U0001044D];" 1812 "($a)($b) > $2 $1;"), 1813 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"), 1814 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301")); 1815 1816 // k|ax\\U00010300xm 1817 1818 // k|a\\U00010400\\U00010300xm 1819 // ky|\\U00010400\\U00010300xm 1820 // ky\\U00010400|\\U00010300xm 1821 1822 // ky\\U00010400|\\U00010300\\U00010400m 1823 // ky\\U00010400y|\\U00010400m 1824 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" 1825 "$a {x} > | @ \\U00010400;" 1826 "{$a} [^\\u0000-\\uFFFF] > y;"), 1827 CharsToUnicodeString("kax\\U00010300xm"), 1828 CharsToUnicodeString("ky\\U00010400y\\U00010400m")); 1829 1830 expectT("Any-Name", 1831 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"), 1832 UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}")); 1833 1834 expectT("Any-Hex/Unicode", 1835 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1836 UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0")); 1837 1838 expectT("Any-Hex/C", 1839 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1840 UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0")); 1841 1842 expectT("Any-Hex/Perl", 1843 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1844 UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}")); 1845 1846 expectT("Any-Hex/Java", 1847 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1848 UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0")); 1849 1850 expectT("Any-Hex/XML", 1851 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1852 "𐌰􏼀󠁡 "); 1853 1854 expectT("Any-Hex/XML10", 1855 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1856 "𐌰􏼀󠁡 "); 1857 1858 expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"), 1859 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1860 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0")); 1861 } 1862 1863 void TransliteratorTest::TestQuantifier() { 1864 1865 // Make sure @ in a quantified anteContext works 1866 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';", 1867 "AAAAAb", 1868 "aaa(aac)"); 1869 1870 // Make sure @ in a quantified postContext works 1871 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';", 1872 "baaaaa", 1873 "caa(aaa)"); 1874 1875 // Make sure @ in a quantified postContext with seg ref works 1876 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';", 1877 "baaaaa", 1878 "baa(aaa)"); 1879 1880 // Make sure @ past ante context doesn't enter ante context 1881 UTransPosition pos = {0, 5, 3, 5}; 1882 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';", 1883 "xxxab", 1884 "xxx(ac)", 1885 &pos); 1886 1887 // Make sure @ past post context doesn't pass limit 1888 UTransPosition pos2 = {0, 4, 0, 2}; 1889 expect("{b} a+ > c @@ |; x > y; a > A;", 1890 "baxx", 1891 "caxx", 1892 &pos2); 1893 1894 // Make sure @ past post context doesn't enter post context 1895 expect("{b} a+ > c @@ |; x > y; a > A;", 1896 "baxx", 1897 "cayy"); 1898 1899 expect("(ab)? c > d;", 1900 "c abc ababc", 1901 "d d abd"); 1902 1903 // NOTE: The (ab)+ when referenced just yields a single "ab", 1904 // not the full sequence of them. This accords with perl behavior. 1905 expect("(ab)+ {x} > '(' $1 ')';", 1906 "x abx ababxy", 1907 "x ab(ab) abab(ab)y"); 1908 1909 expect("b+ > x;", 1910 "ac abc abbc abbbc", 1911 "ac axc axc axc"); 1912 1913 expect("[abc]+ > x;", 1914 "qac abrc abbcs abtbbc", 1915 "qx xrx xs xtx"); 1916 1917 expect("q{(ab)+} > x;", 1918 "qa qab qaba qababc qaba", 1919 "qa qx qxa qxc qxa"); 1920 1921 expect("q(ab)* > x;", 1922 "qa qab qaba qababc", 1923 "xa x xa xc"); 1924 1925 // NOTE: The (ab)+ when referenced just yields a single "ab", 1926 // not the full sequence of them. This accords with perl behavior. 1927 expect("q(ab)* > '(' $1 ')';", 1928 "qa qab qaba qababc", 1929 "()a (ab) (ab)a (ab)c"); 1930 1931 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire 1932 // quoted string 1933 expect("'ab'+ > x;", 1934 "bb ab ababb", 1935 "bb x xb"); 1936 1937 // $foo+ and $foo* -- the quantifier should apply to the entire 1938 // variable reference 1939 expect("$var = ab; $var+ > x;", 1940 "bb ab ababb", 1941 "bb x xb"); 1942 } 1943 1944 class TestTrans : public Transliterator { 1945 public: 1946 TestTrans(const UnicodeString& id) : Transliterator(id, 0) { 1947 } 1948 virtual Transliterator* clone(void) const { 1949 return new TestTrans(getID()); 1950 } 1951 virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets, 1952 UBool /*isIncremental*/) const 1953 { 1954 offsets.start = offsets.limit; 1955 } 1956 virtual UClassID getDynamicClassID() const; 1957 static UClassID U_EXPORT2 getStaticClassID(); 1958 }; 1959 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans) 1960 1961 /** 1962 * Test Source-Target/Variant. 1963 */ 1964 void TransliteratorTest::TestSTV(void) { 1965 int32_t ns = Transliterator::countAvailableSources(); 1966 if (ns < 0 || ns > 255) { 1967 errln((UnicodeString)"FAIL: Bad source count: " + ns); 1968 return; 1969 } 1970 int32_t i, j; 1971 for (i=0; i<ns; ++i) { 1972 UnicodeString source; 1973 Transliterator::getAvailableSource(i, source); 1974 logln((UnicodeString)"" + i + ": " + source); 1975 if (source.length() == 0) { 1976 errln("FAIL: empty source"); 1977 continue; 1978 } 1979 int32_t nt = Transliterator::countAvailableTargets(source); 1980 if (nt < 0 || nt > 255) { 1981 errln((UnicodeString)"FAIL: Bad target count: " + nt); 1982 continue; 1983 } 1984 for (int32_t j=0; j<nt; ++j) { 1985 UnicodeString target; 1986 Transliterator::getAvailableTarget(j, source, target); 1987 logln((UnicodeString)" " + j + ": " + target); 1988 if (target.length() == 0) { 1989 errln("FAIL: empty target"); 1990 continue; 1991 } 1992 int32_t nv = Transliterator::countAvailableVariants(source, target); 1993 if (nv < 0 || nv > 255) { 1994 errln((UnicodeString)"FAIL: Bad variant count: " + nv); 1995 continue; 1996 } 1997 for (int32_t k=0; k<nv; ++k) { 1998 UnicodeString variant; 1999 Transliterator::getAvailableVariant(k, source, target, variant); 2000 if (variant.length() == 0) { 2001 logln((UnicodeString)" " + k + ": <empty>"); 2002 } else { 2003 logln((UnicodeString)" " + k + ": " + variant); 2004 } 2005 } 2006 } 2007 } 2008 2009 // Test registration 2010 const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 2011 const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 2012 const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" }; 2013 for (i=0; i<3; ++i) { 2014 Transliterator *t = new TestTrans(IDS[i]); 2015 if (t == 0) { 2016 errln("FAIL: out of memory"); 2017 return; 2018 } 2019 if (t->getID() != IDS[i]) { 2020 errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]); 2021 delete t; 2022 return; 2023 } 2024 Transliterator::registerInstance(t); 2025 UErrorCode status = U_ZERO_ERROR; 2026 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status); 2027 if (t == NULL) { 2028 errln((UnicodeString)"FAIL: Registration/creation failed for ID " + 2029 IDS[i]); 2030 } else { 2031 logln((UnicodeString)"Ok: Registration/creation succeeded for ID " + 2032 IDS[i]); 2033 delete t; 2034 } 2035 Transliterator::unregister(IDS[i]); 2036 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status); 2037 if (t != NULL) { 2038 errln((UnicodeString)"FAIL: Unregistration failed for ID " + 2039 IDS[i]); 2040 delete t; 2041 } 2042 } 2043 2044 // Make sure getAvailable API reflects removal 2045 int32_t n = Transliterator::countAvailableIDs(); 2046 for (i=0; i<n; ++i) { 2047 UnicodeString id = Transliterator::getAvailableID(i); 2048 for (j=0; j<3; ++j) { 2049 if (id.caseCompare(FULL_IDS[j],0)==0) { 2050 errln((UnicodeString)"FAIL: unregister(" + id + ") failed"); 2051 } 2052 } 2053 } 2054 n = Transliterator::countAvailableTargets("Any"); 2055 for (i=0; i<n; ++i) { 2056 UnicodeString t; 2057 Transliterator::getAvailableTarget(i, "Any", t); 2058 if (t.caseCompare(IDS[0],0)==0) { 2059 errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed"); 2060 } 2061 } 2062 n = Transliterator::countAvailableSources(); 2063 for (i=0; i<n; ++i) { 2064 UnicodeString s; 2065 Transliterator::getAvailableSource(i, s); 2066 for (j=0; j<3; ++j) { 2067 if (SOURCES[j] == NULL) continue; 2068 if (s.caseCompare(SOURCES[j],0)==0) { 2069 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed"); 2070 } 2071 } 2072 } 2073 } 2074 2075 /** 2076 * Test inverse of Greek-Latin; Title() 2077 */ 2078 void TransliteratorTest::TestCompoundInverse(void) { 2079 UParseError parseError; 2080 UErrorCode status = U_ZERO_ERROR; 2081 Transliterator *t = Transliterator::createInstance 2082 ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status); 2083 if (t == 0) { 2084 dataerrln("FAIL: createInstance - %s", u_errorName(status)); 2085 return; 2086 } 2087 UnicodeString exp("(Title);Latin-Greek"); 2088 if (t->getID() == exp) { 2089 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" + 2090 t->getID()); 2091 } else { 2092 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" + 2093 t->getID() + "\", expected \"" + exp + "\""); 2094 } 2095 delete t; 2096 } 2097 2098 /** 2099 * Test NFD chaining with RBT 2100 */ 2101 void TransliteratorTest::TestNFDChainRBT() { 2102 UParseError pe; 2103 UErrorCode ec = U_ZERO_ERROR; 2104 Transliterator* t = Transliterator::createFromRules( 2105 "TEST", "::NFD; aa > Q; a > q;", 2106 UTRANS_FORWARD, pe, ec); 2107 if (t == NULL || U_FAILURE(ec)) { 2108 dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec)); 2109 return; 2110 } 2111 expect(*t, "aa", "Q"); 2112 delete t; 2113 2114 // TEMPORARY TESTS -- BEING DEBUGGED 2115 //=- UnicodeString s, s2; 2116 //=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec); 2117 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t"); 2118 //=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D"); 2119 //=- expect(*t, s, s2); 2120 //=- delete t; 2121 //=- 2122 //=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec); 2123 //=- expect(*t, s2, s); 2124 //=- delete t; 2125 //=- 2126 //=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec); 2127 //=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t"); 2128 //=- expect(*t, s, s); 2129 //=- delete t; 2130 2131 // const char* source[] = { 2132 // /* 2133 // "\\u015Br\\u012Bmad", 2134 // "bhagavadg\\u012Bt\\u0101", 2135 // "adhy\\u0101ya", 2136 // "arjuna", 2137 // "vi\\u1E63\\u0101da", 2138 // "y\\u014Dga", 2139 // "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra", 2140 // "uv\\u0101cr\\u0325", 2141 // */ 2142 // "rmk\\u1E63\\u0113t", 2143 // //"dharmak\\u1E63\\u0113tr\\u0113", 2144 // /* 2145 // "kuruk\\u1E63\\u0113tr\\u0113", 2146 // "samav\\u0113t\\u0101", 2147 // "yuyutsava-\\u1E25", 2148 // "m\\u0101mak\\u0101-\\u1E25", 2149 // // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva", 2150 // "kimakurvata", 2151 // "san\\u0304java", 2152 // */ 2153 // 2154 // 0 2155 // }; 2156 // const char* expected[] = { 2157 // /* 2158 // "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d", 2159 // "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e", 2160 // "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f", 2161 // "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928", 2162 // "\\u0935\\u093f\\u0937\\u093e\\u0926", 2163 // "\\u092f\\u094b\\u0917", 2164 // "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930", 2165 // "\\u0909\\u0935\\u093E\\u091A\\u0943", 2166 // */ 2167 // "\\u0927", 2168 // //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 2169 // /* 2170 // "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 2171 // "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e", 2172 // "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903", 2173 // "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903", 2174 // // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935", 2175 // "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924", 2176 // "\\u0938\\u0902\\u091c\\u0935", 2177 // */ 2178 // 0 2179 // }; 2180 // UErrorCode status = U_ZERO_ERROR; 2181 // UParseError parseError; 2182 // UnicodeString message; 2183 // Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status); 2184 // Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status); 2185 // if(U_FAILURE(status)){ 2186 // errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status)); 2187 // errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) ); 2188 // delete latinToDevToLatin; 2189 // delete devToLatinToDev; 2190 // return; 2191 // } 2192 // UnicodeString gotResult; 2193 // for(int i= 0; source[i] != 0; i++){ 2194 // gotResult = source[i]; 2195 // expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i])); 2196 // expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i])); 2197 // } 2198 // delete latinToDevToLatin; 2199 // delete devToLatinToDev; 2200 } 2201 2202 /** 2203 * Inverse of "Null" should be "Null". (J21) 2204 */ 2205 void TransliteratorTest::TestNullInverse() { 2206 UParseError pe; 2207 UErrorCode ec = U_ZERO_ERROR; 2208 Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec); 2209 if (t == 0 || U_FAILURE(ec)) { 2210 errln("FAIL: createInstance"); 2211 return; 2212 } 2213 Transliterator *u = t->createInverse(ec); 2214 if (u == 0 || U_FAILURE(ec)) { 2215 errln("FAIL: createInverse"); 2216 delete t; 2217 return; 2218 } 2219 if (u->getID() != "Null") { 2220 errln("FAIL: Inverse of Null should be Null"); 2221 } 2222 delete t; 2223 delete u; 2224 } 2225 2226 /** 2227 * Check ID of inverse of alias. (J22) 2228 */ 2229 void TransliteratorTest::TestAliasInverseID() { 2230 UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse 2231 UParseError pe; 2232 UErrorCode ec = U_ZERO_ERROR; 2233 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec); 2234 if (t == 0 || U_FAILURE(ec)) { 2235 dataerrln("FAIL: createInstance - %s", u_errorName(ec)); 2236 return; 2237 } 2238 Transliterator *u = t->createInverse(ec); 2239 if (u == 0 || U_FAILURE(ec)) { 2240 errln("FAIL: createInverse"); 2241 delete t; 2242 return; 2243 } 2244 UnicodeString exp = "Hangul-Latin"; 2245 UnicodeString got = u->getID(); 2246 if (got != exp) { 2247 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got + 2248 ", expected " + exp); 2249 } 2250 delete t; 2251 delete u; 2252 } 2253 2254 /** 2255 * Test IDs of inverses of compound transliterators. (J20) 2256 */ 2257 void TransliteratorTest::TestCompoundInverseID() { 2258 UnicodeString ID = "Latin-Jamo;NFC(NFD)"; 2259 UParseError pe; 2260 UErrorCode ec = U_ZERO_ERROR; 2261 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec); 2262 if (t == 0 || U_FAILURE(ec)) { 2263 dataerrln("FAIL: createInstance - %s", u_errorName(ec)); 2264 return; 2265 } 2266 Transliterator *u = t->createInverse(ec); 2267 if (u == 0 || U_FAILURE(ec)) { 2268 errln("FAIL: createInverse"); 2269 delete t; 2270 return; 2271 } 2272 UnicodeString exp = "NFD(NFC);Jamo-Latin"; 2273 UnicodeString got = u->getID(); 2274 if (got != exp) { 2275 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got + 2276 ", expected " + exp); 2277 } 2278 delete t; 2279 delete u; 2280 } 2281 2282 /** 2283 * Test undefined variable. 2284 2285 */ 2286 void TransliteratorTest::TestUndefinedVariable() { 2287 UnicodeString rule = "$initial } a <> \\u1161;"; 2288 UParseError pe; 2289 UErrorCode ec = U_ZERO_ERROR; 2290 Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec); 2291 delete t; 2292 if (U_FAILURE(ec)) { 2293 logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " + 2294 u_errorName(ec)); 2295 return; 2296 } 2297 errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " + 2298 u_errorName(ec)); 2299 } 2300 2301 /** 2302 * Test empty context. 2303 */ 2304 void TransliteratorTest::TestEmptyContext() { 2305 expect(" { a } > b;", "xay a ", "xby b "); 2306 } 2307 2308 /** 2309 * Test compound filter ID syntax 2310 */ 2311 void TransliteratorTest::TestCompoundFilterID(void) { 2312 static const char* DATA[] = { 2313 // Col. 1 = ID or rule set (latter must start with #) 2314 2315 // = columns > 1 are null if expect col. 1 to be illegal = 2316 2317 // Col. 2 = direction, "F..." or "R..." 2318 // Col. 3 = source string 2319 // Col. 4 = exp result 2320 2321 "[abc]; [abc]", NULL, NULL, NULL, // multiple filters 2322 "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter 2323 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c", 2324 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393", 2325 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c", 2326 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393", 2327 NULL, 2328 }; 2329 2330 for (int32_t i=0; DATA[i]; i+=4) { 2331 UnicodeString id = CharsToUnicodeString(DATA[i]); 2332 UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ? 2333 UTRANS_REVERSE : UTRANS_FORWARD; 2334 UnicodeString source; 2335 UnicodeString exp; 2336 if (DATA[i+2] != NULL) { 2337 source = CharsToUnicodeString(DATA[i+2]); 2338 exp = CharsToUnicodeString(DATA[i+3]); 2339 } 2340 UBool expOk = (DATA[i+1] != NULL); 2341 Transliterator* t = NULL; 2342 UParseError pe; 2343 UErrorCode ec = U_ZERO_ERROR; 2344 if (id.charAt(0) == 0x23/*#*/) { 2345 t = Transliterator::createFromRules("ID", id, direction, pe, ec); 2346 } else { 2347 t = Transliterator::createInstance(id, direction, pe, ec); 2348 } 2349 UBool ok = (t != NULL && U_SUCCESS(ec)); 2350 UnicodeString transID; 2351 if (t!=0) { 2352 transID = t->getID(); 2353 } 2354 else { 2355 transID = UnicodeString("NULL", ""); 2356 } 2357 if (ok == expOk) { 2358 logln((UnicodeString)"Ok: " + id + " => " + transID + ", " + 2359 u_errorName(ec)); 2360 if (source.length() != 0) { 2361 expect(*t, source, exp); 2362 } 2363 delete t; 2364 } else { 2365 dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " + 2366 u_errorName(ec)); 2367 } 2368 } 2369 } 2370 2371 /** 2372 * Test new property set syntax 2373 */ 2374 void TransliteratorTest::TestPropertySet() { 2375 expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx"); 2376 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9", 2377 "[ a stitch ]\n[ in time ]\r[ saves 9]"); 2378 } 2379 2380 /** 2381 * Test various failure points of the new 2.0 engine. 2382 */ 2383 void TransliteratorTest::TestNewEngine() { 2384 UParseError pe; 2385 UErrorCode ec = U_ZERO_ERROR; 2386 Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec); 2387 if (t == 0 || U_FAILURE(ec)) { 2388 dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec)); 2389 return; 2390 } 2391 // Katakana should be untouched 2392 expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"), 2393 CharsToUnicodeString("\\u3042\\u3042\\u30A2")); 2394 2395 delete t; 2396 2397 #if 1 2398 // This test will only work if Transliterator.ROLLBACK is 2399 // true. Otherwise, this test will fail, revealing a 2400 // limitation of global filters in incremental mode. 2401 Transliterator *a = 2402 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec); 2403 Transliterator *A = 2404 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec); 2405 if (U_FAILURE(ec)) { 2406 delete a; 2407 delete A; 2408 return; 2409 } 2410 2411 Transliterator* array[3]; 2412 array[0] = a; 2413 array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec); 2414 array[2] = A; 2415 if (U_FAILURE(ec)) { 2416 errln("FAIL: createInstance NFD"); 2417 delete a; 2418 delete A; 2419 delete array[1]; 2420 return; 2421 } 2422 2423 t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec)); 2424 if (U_FAILURE(ec)) { 2425 errln("FAIL: UnicodeSet constructor"); 2426 delete a; 2427 delete A; 2428 delete array[1]; 2429 delete t; 2430 return; 2431 } 2432 2433 expect(*t, "aAaA", "bAbA"); 2434 2435 assertTrue("countElements", t->countElements() == 3); 2436 assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A"); 2437 assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD"); 2438 assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b"); 2439 assertSuccess("getElement", ec); 2440 2441 delete a; 2442 delete A; 2443 delete array[1]; 2444 delete t; 2445 #endif 2446 2447 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;", 2448 "a", 2449 "ax"); 2450 2451 UnicodeString gr = CharsToUnicodeString( 2452 "$ddot = \\u0308 ;" 2453 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;" 2454 "$rough = \\u0314 ;" 2455 "($lcgvowel+ $ddot?) $rough > h | $1 ;" 2456 "\\u03b1 <> a ;" 2457 "$rough <> h ;"); 2458 2459 expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha"); 2460 } 2461 2462 /** 2463 * Test quantified segment behavior. We want: 2464 * ([abc])+ > x $1 x; applied to "cba" produces "xax" 2465 */ 2466 void TransliteratorTest::TestQuantifiedSegment(void) { 2467 // The normal case 2468 expect("([abc]+) > x $1 x;", "cba", "xcbax"); 2469 2470 // The tricky case; the quantifier is around the segment 2471 expect("([abc])+ > x $1 x;", "cba", "xax"); 2472 2473 // Tricky case in reverse direction 2474 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax"); 2475 2476 // Check post-context segment 2477 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba"); 2478 2479 // Test toRule/toPattern for non-quantified segment. 2480 // Careful with spacing here. 2481 UnicodeString r("([a-c]){q} > x $1 x;"); 2482 UParseError pe; 2483 UErrorCode ec = U_ZERO_ERROR; 2484 Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec); 2485 if (U_FAILURE(ec)) { 2486 errln("FAIL: createFromRules"); 2487 delete t; 2488 return; 2489 } 2490 UnicodeString rr; 2491 t->toRules(rr, TRUE); 2492 if (r != rr) { 2493 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 2494 } else { 2495 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 2496 } 2497 delete t; 2498 2499 // Test toRule/toPattern for quantified segment. 2500 // Careful with spacing here. 2501 r = "([a-c])+{q} > x $1 x;"; 2502 t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec); 2503 if (U_FAILURE(ec)) { 2504 errln("FAIL: createFromRules"); 2505 delete t; 2506 return; 2507 } 2508 t->toRules(rr, TRUE); 2509 if (r != rr) { 2510 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 2511 } else { 2512 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 2513 } 2514 delete t; 2515 } 2516 2517 //====================================================================== 2518 // Ram's tests 2519 //====================================================================== 2520 void TransliteratorTest::TestDevanagariLatinRT(){ 2521 const int MAX_LEN= 52; 2522 const char* const source[MAX_LEN] = { 2523 "bh\\u0101rata", 2524 "kra", 2525 "k\\u1E63a", 2526 "khra", 2527 "gra", 2528 "\\u1E45ra", 2529 "cra", 2530 "chra", 2531 "j\\u00F1a", 2532 "jhra", 2533 "\\u00F1ra", 2534 "\\u1E6Dya", 2535 "\\u1E6Dhra", 2536 "\\u1E0Dya", 2537 //"r\\u0323ya", // \u095c is not valid in Devanagari 2538 "\\u1E0Dhya", 2539 "\\u1E5Bhra", 2540 "\\u1E47ra", 2541 "tta", 2542 "thra", 2543 "dda", 2544 "dhra", 2545 "nna", 2546 "pra", 2547 "phra", 2548 "bra", 2549 "bhra", 2550 "mra", 2551 "\\u1E49ra", 2552 //"l\\u0331ra", 2553 "yra", 2554 "\\u1E8Fra", 2555 //"l-", 2556 "vra", 2557 "\\u015Bra", 2558 "\\u1E63ra", 2559 "sra", 2560 "hma", 2561 "\\u1E6D\\u1E6Da", 2562 "\\u1E6D\\u1E6Dha", 2563 "\\u1E6Dh\\u1E6Dha", 2564 "\\u1E0D\\u1E0Da", 2565 "\\u1E0D\\u1E0Dha", 2566 "\\u1E6Dya", 2567 "\\u1E6Dhya", 2568 "\\u1E0Dya", 2569 "\\u1E0Dhya", 2570 // Not roundtrippable -- 2571 // \\u0939\\u094d\\u094d\\u092E - hma 2572 // \\u0939\\u094d\\u092E - hma 2573 // CharsToUnicodeString("hma"), 2574 "hya", 2575 "\\u015Br\\u0325", 2576 "\\u015Bca", 2577 "\\u0115", 2578 "san\\u0304j\\u012Bb s\\u0113nagupta", 2579 "\\u0101nand vaddir\\u0101ju", 2580 "\\u0101", 2581 "a" 2582 }; 2583 const char* const expected[MAX_LEN] = { 2584 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */ 2585 "\\u0915\\u094D\\u0930", /* kra */ 2586 "\\u0915\\u094D\\u0937", /* ks\\u0323a */ 2587 "\\u0916\\u094D\\u0930", /* khra */ 2588 "\\u0917\\u094D\\u0930", /* gra */ 2589 "\\u0919\\u094D\\u0930", /* n\\u0307ra */ 2590 "\\u091A\\u094D\\u0930", /* cra */ 2591 "\\u091B\\u094D\\u0930", /* chra */ 2592 "\\u091C\\u094D\\u091E", /* jn\\u0303a */ 2593 "\\u091D\\u094D\\u0930", /* jhra */ 2594 "\\u091E\\u094D\\u0930", /* n\\u0303ra */ 2595 "\\u091F\\u094D\\u092F", /* t\\u0323ya */ 2596 "\\u0920\\u094D\\u0930", /* t\\u0323hra */ 2597 "\\u0921\\u094D\\u092F", /* d\\u0323ya */ 2598 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari 2599 "\\u0922\\u094D\\u092F", /* d\\u0323hya */ 2600 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */ 2601 "\\u0923\\u094D\\u0930", /* n\\u0323ra */ 2602 "\\u0924\\u094D\\u0924", /* tta */ 2603 "\\u0925\\u094D\\u0930", /* thra */ 2604 "\\u0926\\u094D\\u0926", /* dda */ 2605 "\\u0927\\u094D\\u0930", /* dhra */ 2606 "\\u0928\\u094D\\u0928", /* nna */ 2607 "\\u092A\\u094D\\u0930", /* pra */ 2608 "\\u092B\\u094D\\u0930", /* phra */ 2609 "\\u092C\\u094D\\u0930", /* bra */ 2610 "\\u092D\\u094D\\u0930", /* bhra */ 2611 "\\u092E\\u094D\\u0930", /* mra */ 2612 "\\u0929\\u094D\\u0930", /* n\\u0331ra */ 2613 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */ 2614 "\\u092F\\u094D\\u0930", /* yra */ 2615 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */ 2616 //"l-", 2617 "\\u0935\\u094D\\u0930", /* vra */ 2618 "\\u0936\\u094D\\u0930", /* s\\u0301ra */ 2619 "\\u0937\\u094D\\u0930", /* s\\u0323ra */ 2620 "\\u0938\\u094D\\u0930", /* sra */ 2621 "\\u0939\\u094d\\u092E", /* hma */ 2622 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */ 2623 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */ 2624 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/ 2625 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */ 2626 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */ 2627 "\\u091F\\u094D\\u092F", /* t\\u0323ya */ 2628 "\\u0920\\u094D\\u092F", /* t\\u0323hya */ 2629 "\\u0921\\u094D\\u092F", /* d\\u0323ya */ 2630 "\\u0922\\u094D\\u092F", /* d\\u0323hya */ 2631 // "hma", /* hma */ 2632 "\\u0939\\u094D\\u092F", /* hya */ 2633 "\\u0936\\u0943", /* s\\u0301r\\u0325a */ 2634 "\\u0936\\u094D\\u091A", /* s\\u0301ca */ 2635 "\\u090d", /* e\\u0306 */ 2636 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924", 2637 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941", 2638 "\\u0906", 2639 "\\u0905", 2640 }; 2641 UErrorCode status = U_ZERO_ERROR; 2642 UParseError parseError; 2643 UnicodeString message; 2644 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status); 2645 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status); 2646 if(U_FAILURE(status)){ 2647 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status)); 2648 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) ); 2649 return; 2650 } 2651 UnicodeString gotResult; 2652 for(int i= 0; i<MAX_LEN; i++){ 2653 gotResult = source[i]; 2654 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i])); 2655 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i])); 2656 } 2657 delete latinToDev; 2658 delete devToLatin; 2659 } 2660 2661 void TransliteratorTest::TestTeluguLatinRT(){ 2662 const int MAX_LEN=10; 2663 const char* const source[MAX_LEN] = { 2664 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */ 2665 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */ 2666 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */ 2667 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */ 2668 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */ 2669 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */ 2670 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */ 2671 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */ 2672 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */ 2673 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */ 2674 }; 2675 2676 const char* const expected[MAX_LEN] = { 2677 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27", 2678 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41", 2679 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26", 2680 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26", 2681 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24", 2682 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32", 2683 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27", 2684 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32", 2685 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f", 2686 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f", 2687 }; 2688 2689 UErrorCode status = U_ZERO_ERROR; 2690 UParseError parseError; 2691 UnicodeString message; 2692 Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status); 2693 Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status); 2694 if(U_FAILURE(status)){ 2695 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status)); 2696 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) ); 2697 return; 2698 } 2699 UnicodeString gotResult; 2700 for(int i= 0; i<MAX_LEN; i++){ 2701 gotResult = source[i]; 2702 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i])); 2703 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i])); 2704 } 2705 delete latinToDev; 2706 delete devToLatin; 2707 } 2708 2709 void TransliteratorTest::TestSanskritLatinRT(){ 2710 const int MAX_LEN =16; 2711 const char* const source[MAX_LEN] = { 2712 "rmk\\u1E63\\u0113t", 2713 "\\u015Br\\u012Bmad", 2714 "bhagavadg\\u012Bt\\u0101", 2715 "adhy\\u0101ya", 2716 "arjuna", 2717 "vi\\u1E63\\u0101da", 2718 "y\\u014Dga", 2719 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra", 2720 "uv\\u0101cr\\u0325", 2721 "dharmak\\u1E63\\u0113tr\\u0113", 2722 "kuruk\\u1E63\\u0113tr\\u0113", 2723 "samav\\u0113t\\u0101", 2724 "yuyutsava\\u1E25", 2725 "m\\u0101mak\\u0101\\u1E25", 2726 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva", 2727 "kimakurvata", 2728 "san\\u0304java", 2729 }; 2730 const char* const expected[MAX_LEN] = { 2731 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D", 2732 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d", 2733 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e", 2734 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f", 2735 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928", 2736 "\\u0935\\u093f\\u0937\\u093e\\u0926", 2737 "\\u092f\\u094b\\u0917", 2738 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930", 2739 "\\u0909\\u0935\\u093E\\u091A\\u0943", 2740 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 2741 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 2742 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e", 2743 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903", 2744 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903", 2745 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935", 2746 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924", 2747 "\\u0938\\u0902\\u091c\\u0935", 2748 }; 2749 UErrorCode status = U_ZERO_ERROR; 2750 UParseError parseError; 2751 UnicodeString message; 2752 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status); 2753 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status); 2754 if(U_FAILURE(status)){ 2755 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status)); 2756 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) ); 2757 return; 2758 } 2759 UnicodeString gotResult; 2760 for(int i= 0; i<MAX_LEN; i++){ 2761 gotResult = source[i]; 2762 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i])); 2763 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i])); 2764 } 2765 delete latinToDev; 2766 delete devToLatin; 2767 } 2768 2769 2770 void TransliteratorTest::TestCompoundLatinRT(){ 2771 const char* const source[] = { 2772 "rmk\\u1E63\\u0113t", 2773 "\\u015Br\\u012Bmad", 2774 "bhagavadg\\u012Bt\\u0101", 2775 "adhy\\u0101ya", 2776 "arjuna", 2777 "vi\\u1E63\\u0101da", 2778 "y\\u014Dga", 2779 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra", 2780 "uv\\u0101cr\\u0325", 2781 "dharmak\\u1E63\\u0113tr\\u0113", 2782 "kuruk\\u1E63\\u0113tr\\u0113", 2783 "samav\\u0113t\\u0101", 2784 "yuyutsava\\u1E25", 2785 "m\\u0101mak\\u0101\\u1E25", 2786 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva", 2787 "kimakurvata", 2788 "san\\u0304java" 2789 }; 2790 const int MAX_LEN = sizeof(source)/sizeof(source[0]); 2791 const char* const expected[MAX_LEN] = { 2792 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D", 2793 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d", 2794 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e", 2795 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f", 2796 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928", 2797 "\\u0935\\u093f\\u0937\\u093e\\u0926", 2798 "\\u092f\\u094b\\u0917", 2799 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930", 2800 "\\u0909\\u0935\\u093E\\u091A\\u0943", 2801 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 2802 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 2803 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e", 2804 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903", 2805 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903", 2806 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935", 2807 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924", 2808 "\\u0938\\u0902\\u091c\\u0935" 2809 }; 2810 if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) { 2811 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!"); 2812 return; 2813 } 2814 2815 UErrorCode status = U_ZERO_ERROR; 2816 UParseError parseError; 2817 UnicodeString message; 2818 Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status); 2819 Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status); 2820 Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status); 2821 Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status); 2822 2823 if(U_FAILURE(status)){ 2824 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status)); 2825 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) ); 2826 return; 2827 } 2828 UnicodeString gotResult; 2829 for(int i= 0; i<MAX_LEN; i++){ 2830 gotResult = source[i]; 2831 expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i])); 2832 expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i])); 2833 expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i])); 2834 2835 } 2836 delete(latinToDevToLatin); 2837 delete(devToLatinToDev); 2838 delete(devToTelToDev); 2839 delete(latinToTelToLatin); 2840 } 2841 2842 /** 2843 * Test Gurmukhi-Devanagari Tippi and Bindi 2844 */ 2845 void TransliteratorTest::TestGurmukhiDevanagari(){ 2846 // the rule says: 2847 // (\u0902) (when preceded by vowel) ---> (\u0A02) 2848 // (\u0902) (when preceded by consonant) ---> (\u0A70) 2849 UErrorCode status = U_ZERO_ERROR; 2850 UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status); 2851 UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status); 2852 UParseError parseError; 2853 2854 UnicodeSetIterator vIter(vowel); 2855 UnicodeSetIterator nvIter(non_vowel); 2856 Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status); 2857 if(U_FAILURE(status)) { 2858 dataerrln("Error creating transliterator %s", u_errorName(status)); 2859 delete trans; 2860 return; 2861 } 2862 UnicodeString src (" \\u0902", -1, US_INV); 2863 UnicodeString expected(" \\u0A02", -1, US_INV); 2864 src = src.unescape(); 2865 expected= expected.unescape(); 2866 2867 while(vIter.next()){ 2868 src.setCharAt(0,(UChar) vIter.getCodepoint()); 2869 expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100)); 2870 expect(*trans,src,expected); 2871 } 2872 2873 expected.setCharAt(1,0x0A70); 2874 while(nvIter.next()){ 2875 //src.setCharAt(0,(char) nvIter.codepoint); 2876 src.setCharAt(0,(UChar)nvIter.getCodepoint()); 2877 expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100)); 2878 expect(*trans,src,expected); 2879 } 2880 delete trans; 2881 } 2882 /** 2883 * Test instantiation from a locale. 2884 */ 2885 void TransliteratorTest::TestLocaleInstantiation(void) { 2886 UParseError pe; 2887 UErrorCode ec = U_ZERO_ERROR; 2888 Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec); 2889 if (U_FAILURE(ec)) { 2890 dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec)); 2891 delete t; 2892 return; 2893 } 2894 expect(*t, CharsToUnicodeString("\\u0430"), "a"); 2895 delete t; 2896 2897 t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec); 2898 if (U_FAILURE(ec)) { 2899 errln("FAIL: createInstance(en-el)"); 2900 delete t; 2901 return; 2902 } 2903 expect(*t, "a", CharsToUnicodeString("\\u03B1")); 2904 delete t; 2905 } 2906 2907 /** 2908 * Test title case handling of accent (should ignore accents) 2909 */ 2910 void TransliteratorTest::TestTitleAccents(void) { 2911 UParseError pe; 2912 UErrorCode ec = U_ZERO_ERROR; 2913 Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec); 2914 if (U_FAILURE(ec)) { 2915 errln("FAIL: createInstance(Title)"); 2916 delete t; 2917 return; 2918 } 2919 expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe")); 2920 delete t; 2921 } 2922 2923 /** 2924 * Basic test of a locale resource based rule. 2925 */ 2926 void TransliteratorTest::TestLocaleResource() { 2927 const char* DATA[] = { 2928 // id from to 2929 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0", 2930 "Latin-el", "b", "\\u03bc\\u03c0", 2931 "Latin-Greek", "b", "\\u03B2", 2932 "Greek-Latin/UNGEGN", "\\u03B2", "v", 2933 "el-Latin", "\\u03B2", "v", 2934 "Greek-Latin", "\\u03B2", "b", 2935 }; 2936 const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]); 2937 for (int32_t i=0; i<DATA_length; i+=3) { 2938 UParseError pe; 2939 UErrorCode ec = U_ZERO_ERROR; 2940 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec); 2941 if (U_FAILURE(ec)) { 2942 dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec)); 2943 delete t; 2944 continue; 2945 } 2946 expect(*t, CharsToUnicodeString(DATA[i+1]), 2947 CharsToUnicodeString(DATA[i+2])); 2948 delete t; 2949 } 2950 } 2951 2952 /** 2953 * Make sure parse errors reference the right line. 2954 */ 2955 void TransliteratorTest::TestParseError() { 2956 static const char* rule = 2957 "a > b;\n" 2958 "# more stuff\n" 2959 "d << b;"; 2960 UErrorCode ec = U_ZERO_ERROR; 2961 UParseError pe; 2962 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec); 2963 delete t; 2964 if (U_FAILURE(ec)) { 2965 UnicodeString err(pe.preContext); 2966 err.append((UChar)124/*|*/).append(pe.postContext); 2967 if (err.indexOf("d << b") >= 0) { 2968 logln("Ok: " + err); 2969 } else { 2970 errln("FAIL: " + err); 2971 } 2972 } 2973 else { 2974 errln("FAIL: no syntax error"); 2975 } 2976 static const char* maskingRule = 2977 "a>x;\n" 2978 "# more stuff\n" 2979 "ab>y;"; 2980 ec = U_ZERO_ERROR; 2981 delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec); 2982 if (ec != U_RULE_MASK_ERROR) { 2983 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec)); 2984 } 2985 else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) { 2986 errln("FAIL: did not get expected precontext"); 2987 } 2988 else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) { 2989 errln("FAIL: did not get expected postcontext"); 2990 } 2991 } 2992 2993 /** 2994 * Make sure sets on output are disallowed. 2995 */ 2996 void TransliteratorTest::TestOutputSet() { 2997 UnicodeString rule = "$set = [a-cm-n]; b > $set;"; 2998 UErrorCode ec = U_ZERO_ERROR; 2999 UParseError pe; 3000 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec); 3001 delete t; 3002 if (U_FAILURE(ec)) { 3003 UnicodeString err(pe.preContext); 3004 err.append((UChar)124/*|*/).append(pe.postContext); 3005 logln("Ok: " + err); 3006 return; 3007 } 3008 errln("FAIL: No syntax error"); 3009 } 3010 3011 /** 3012 * Test the use variable range pragma, making sure that use of 3013 * variable range characters is detected and flagged as an error. 3014 */ 3015 void TransliteratorTest::TestVariableRange() { 3016 UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;"; 3017 UErrorCode ec = U_ZERO_ERROR; 3018 UParseError pe; 3019 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec); 3020 delete t; 3021 if (U_FAILURE(ec)) { 3022 UnicodeString err(pe.preContext); 3023 err.append((UChar)124/*|*/).append(pe.postContext); 3024 logln("Ok: " + err); 3025 return; 3026 } 3027 errln("FAIL: No syntax error"); 3028 } 3029 3030 /** 3031 * Test invalid post context error handling 3032 */ 3033 void TransliteratorTest::TestInvalidPostContext() { 3034 UnicodeString rule = "a}b{c>d;"; 3035 UErrorCode ec = U_ZERO_ERROR; 3036 UParseError pe; 3037 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec); 3038 delete t; 3039 if (U_FAILURE(ec)) { 3040 UnicodeString err(pe.preContext); 3041 err.append((UChar)124/*|*/).append(pe.postContext); 3042 if (err.indexOf("a}b{c") >= 0) { 3043 logln("Ok: " + err); 3044 } else { 3045 errln("FAIL: " + err); 3046 } 3047 return; 3048 } 3049 errln("FAIL: No syntax error"); 3050 } 3051 3052 /** 3053 * Test ID form variants 3054 */ 3055 void TransliteratorTest::TestIDForms() { 3056 const char* DATA[] = { 3057 "NFC", NULL, "NFD", 3058 "nfd", NULL, "NFC", // make sure case is ignored 3059 "Any-NFKD", NULL, "Any-NFKC", 3060 "Null", NULL, "Null", 3061 "-nfkc", "nfkc", "NFKD", 3062 "-nfkc/", "nfkc", "NFKD", 3063 "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN", 3064 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN", 3065 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali", 3066 "Source-", NULL, NULL, 3067 "Source/Variant-", NULL, NULL, 3068 "Source-/Variant", NULL, NULL, 3069 "/Variant", NULL, NULL, 3070 "/Variant-", NULL, NULL, 3071 "-/Variant", NULL, NULL, 3072 "-/", NULL, NULL, 3073 "-", NULL, NULL, 3074 "/", NULL, NULL, 3075 }; 3076 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]); 3077 3078 for (int32_t i=0; i<DATA_length; i+=3) { 3079 const char* ID = DATA[i]; 3080 const char* expID = DATA[i+1]; 3081 const char* expInvID = DATA[i+2]; 3082 UBool expValid = (expInvID != NULL); 3083 if (expID == NULL) { 3084 expID = ID; 3085 } 3086 UParseError pe; 3087 UErrorCode ec = U_ZERO_ERROR; 3088 Transliterator *t = 3089 Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec); 3090 if (U_FAILURE(ec)) { 3091 if (!expValid) { 3092 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec)); 3093 } else { 3094 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec)); 3095 } 3096 delete t; 3097 continue; 3098 } 3099 Transliterator *u = t->createInverse(ec); 3100 if (U_FAILURE(ec)) { 3101 errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID); 3102 delete t; 3103 delete u; 3104 continue; 3105 } 3106 if (t->getID() == expID && 3107 u->getID() == expInvID) { 3108 logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID); 3109 } else { 3110 errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " + 3111 t->getID() + " x getInverse() => " + u->getID() + 3112 ", expected " + expInvID); 3113 } 3114 delete t; 3115 delete u; 3116 } 3117 } 3118 3119 static const UChar SPACE[] = {32,0}; 3120 static const UChar NEWLINE[] = {10,0}; 3121 static const UChar RETURN[] = {13,0}; 3122 static const UChar EMPTY[] = {0}; 3123 3124 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2, 3125 const UnicodeString& testRulesForward) { 3126 UnicodeString rules2; t2.toRules(rules2, TRUE); 3127 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), ""); 3128 rules2.findAndReplace(SPACE, EMPTY); 3129 rules2.findAndReplace(NEWLINE, EMPTY); 3130 rules2.findAndReplace(RETURN, EMPTY); 3131 3132 UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY); 3133 3134 if (rules2 != testRules) { 3135 errln(label); 3136 logln((UnicodeString)"GENERATED RULES: " + rules2); 3137 logln((UnicodeString)"SHOULD BE: " + testRulesForward); 3138 } 3139 } 3140 3141 /** 3142 * Mark's toRules test. 3143 */ 3144 void TransliteratorTest::TestToRulesMark() { 3145 const char* testRules = 3146 "::[[:Latin:][:Mark:]];" 3147 "::NFKD (NFC);" 3148 "::Lower (Lower);" 3149 "a <> \\u03B1;" // alpha 3150 "::NFKC (NFD);" 3151 "::Upper (Lower);" 3152 "::Lower ();" 3153 "::([[:Greek:][:Mark:]]);" 3154 ; 3155 const char* testRulesForward = 3156 "::[[:Latin:][:Mark:]];" 3157 "::NFKD(NFC);" 3158 "::Lower(Lower);" 3159 "a > \\u03B1;" 3160 "::NFKC(NFD);" 3161 "::Upper (Lower);" 3162 "::Lower ();" 3163 ; 3164 const char* testRulesBackward = 3165 "::[[:Greek:][:Mark:]];" 3166 "::Lower (Upper);" 3167 "::NFD(NFKC);" 3168 "\\u03B1 > a;" 3169 "::Lower(Lower);" 3170 "::NFC(NFKD);" 3171 ; 3172 UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute 3173 UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute 3174 3175 UParseError pe; 3176 UErrorCode ec = U_ZERO_ERROR; 3177 Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec); 3178 Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec); 3179 3180 if (U_FAILURE(ec)) { 3181 delete t2; 3182 delete t3; 3183 dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec)); 3184 return; 3185 } 3186 3187 expect(*t2, source, target); 3188 expect(*t3, target, source); 3189 3190 checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV)); 3191 checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV)); 3192 3193 delete t2; 3194 delete t3; 3195 } 3196 3197 /** 3198 * Test Escape and Unescape transliterators. 3199 */ 3200 void TransliteratorTest::TestEscape() { 3201 UParseError pe; 3202 UErrorCode ec; 3203 Transliterator *t; 3204 3205 ec = U_ZERO_ERROR; 3206 t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec); 3207 if (U_FAILURE(ec)) { 3208 errln((UnicodeString)"FAIL: createInstance"); 3209 } else { 3210 expect(*t, 3211 UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"), 3212 "@12Q"); 3213 } 3214 delete t; 3215 3216 ec = U_ZERO_ERROR; 3217 t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec); 3218 if (U_FAILURE(ec)) { 3219 errln((UnicodeString)"FAIL: createInstance"); 3220 } else { 3221 expect(*t, 3222 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 3223 UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED")); 3224 } 3225 delete t; 3226 3227 ec = U_ZERO_ERROR; 3228 t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec); 3229 if (U_FAILURE(ec)) { 3230 errln((UnicodeString)"FAIL: createInstance"); 3231 } else { 3232 expect(*t, 3233 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 3234 UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED")); 3235 } 3236 delete t; 3237 3238 ec = U_ZERO_ERROR; 3239 t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec); 3240 if (U_FAILURE(ec)) { 3241 errln((UnicodeString)"FAIL: createInstance"); 3242 } else { 3243 expect(*t, 3244 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 3245 UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}")); 3246 } 3247 delete t; 3248 } 3249 3250 3251 void TransliteratorTest::TestAnchorMasking(){ 3252 UnicodeString rule ("^a > Q; a > q;"); 3253 UErrorCode status= U_ZERO_ERROR; 3254 UParseError parseError; 3255 3256 Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status); 3257 if(U_FAILURE(status)){ 3258 errln(UnicodeString("FAIL: ") + "ID" + 3259 ".createFromRules() => bad rules" + 3260 /*", parse error " + parseError.code +*/ 3261 ", line " + parseError.line + 3262 ", offset " + parseError.offset + 3263 ", context " + prettify(parseError.preContext, TRUE) + 3264 ", rules: " + prettify(rule, TRUE)); 3265 } 3266 delete t; 3267 } 3268 3269 /** 3270 * Make sure display names of variants look reasonable. 3271 */ 3272 void TransliteratorTest::TestDisplayName() { 3273 #if UCONFIG_NO_FORMATTING 3274 logln("Skipping, UCONFIG_NO_FORMATTING is set\n"); 3275 return; 3276 #else 3277 static const char* DATA[] = { 3278 // ID, forward name, reverse name 3279 // Update the text as necessary -- the important thing is 3280 // not the text itself, but how various cases are handled. 3281 3282 // Basic test 3283 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any", 3284 3285 // Variants 3286 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl", 3287 3288 // Target-only IDs 3289 "NFC", "Any to NFC", "Any to NFD", 3290 }; 3291 3292 int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]); 3293 3294 Locale US("en", "US"); 3295 3296 for (int32_t i=0; i<DATA_length; i+=3) { 3297 UnicodeString name; 3298 Transliterator::getDisplayName(DATA[i], US, name); 3299 if (name != DATA[i+1]) { 3300 dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " + 3301 name + ", expected " + DATA[i+1]); 3302 } else { 3303 logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name); 3304 } 3305 UErrorCode ec = U_ZERO_ERROR; 3306 UParseError pe; 3307 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec); 3308 if (U_FAILURE(ec)) { 3309 delete t; 3310 dataerrln("FAIL: createInstance failed - %s", u_errorName(ec)); 3311 continue; 3312 } 3313 name = Transliterator::getDisplayName(t->getID(), US, name); 3314 if (name != DATA[i+2]) { 3315 dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " + 3316 name + ", expected " + DATA[i+2]); 3317 } else { 3318 logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name); 3319 } 3320 delete t; 3321 } 3322 #endif 3323 } 3324 3325 void TransliteratorTest::TestSpecialCases(void) { 3326 const UnicodeString registerRules[] = { 3327 "Any-Dev1", "x > X; y > Y;", 3328 "Any-Dev2", "XY > Z", 3329 "Greek-Latin/FAKE", 3330 CharsToUnicodeString 3331 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"), 3332 "" // END MARKER 3333 }; 3334 3335 const UnicodeString testCases[] = { 3336 // NORMALIZATION 3337 // should add more test cases 3338 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "", 3339 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "", 3340 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "", 3341 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "", 3342 3343 // mp -> b BUG 3344 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)", 3345 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)", 3346 3347 // check for devanagari bug 3348 "nfd;Dev1;Dev2;nfc", "xy", "Z", 3349 3350 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE 3351 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, 3352 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee, 3353 3354 //TODO: enable this test once Titlecase works right 3355 /* 3356 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, 3357 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee, 3358 */ 3359 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, 3360 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE, 3361 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, 3362 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee, 3363 3364 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "", 3365 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "", 3366 3367 // FORMS OF S 3368 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"), 3369 CharsToUnicodeString("s ss s\\u0331s\\u0331") , 3370 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"), 3371 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") , 3372 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"), 3373 CharsToUnicodeString("s ss s\\u0331s\\u0331") , 3374 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"), 3375 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"), 3376 // Tatiana bug 3377 // Upper: TAT\\u02B9\\u00C2NA 3378 // Lower: tat\\u02B9\\u00E2na 3379 // Title: Tat\\u02B9\\u00E2na 3380 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"), 3381 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"), 3382 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"), 3383 CharsToUnicodeString("tat\\u02B9\\u00E2na"), 3384 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"), 3385 CharsToUnicodeString("Tat\\u02B9\\u00E2na"), 3386 3387 "" // END MARKER 3388 }; 3389 3390 UParseError pos; 3391 int32_t i; 3392 for (i = 0; registerRules[i].length()!=0; i+=2) { 3393 UErrorCode status = U_ZERO_ERROR; 3394 3395 Transliterator *t = Transliterator::createFromRules(registerRules[0+i], 3396 registerRules[i+1], UTRANS_FORWARD, pos, status); 3397 if (U_FAILURE(status)) { 3398 dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status)); 3399 } else { 3400 Transliterator::registerInstance(t); 3401 } 3402 } 3403 for (i = 0; testCases[i].length()!=0; i+=3) { 3404 UErrorCode ec = U_ZERO_ERROR; 3405 UParseError pe; 3406 const UnicodeString& name = testCases[i]; 3407 Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec); 3408 if (U_FAILURE(ec)) { 3409 dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec)); 3410 delete t; 3411 continue; 3412 } 3413 const UnicodeString& id = t->getID(); 3414 const UnicodeString& source = testCases[i+1]; 3415 UnicodeString target; 3416 3417 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe) 3418 3419 if (testCases[i+2].length() > 0) { 3420 target = testCases[i+2]; 3421 } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) { 3422 Normalizer::normalize(source, UNORM_NFD, 0, target, ec); 3423 } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) { 3424 Normalizer::normalize(source, UNORM_NFC, 0, target, ec); 3425 } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) { 3426 Normalizer::normalize(source, UNORM_NFKD, 0, target, ec); 3427 } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) { 3428 Normalizer::normalize(source, UNORM_NFKC, 0, target, ec); 3429 } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) { 3430 target = source; 3431 target.toLower(Locale::getUS()); 3432 } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) { 3433 target = source; 3434 target.toUpper(Locale::getUS()); 3435 } 3436 if (U_FAILURE(ec)) { 3437 errln((UnicodeString)"FAIL: Internal error normalizing " + source); 3438 continue; 3439 } 3440 3441 expect(*t, source, target); 3442 delete t; 3443 } 3444 for (i = 0; registerRules[i].length()!=0; i+=2) { 3445 Transliterator::unregister(registerRules[i]); 3446 } 3447 } 3448 3449 char* Char32ToEscapedChars(UChar32 ch, char* buffer) { 3450 if (ch <= 0xFFFF) { 3451 sprintf(buffer, "\\u%04x", (int)ch); 3452 } else { 3453 sprintf(buffer, "\\U%08x", (int)ch); 3454 } 3455 return buffer; 3456 } 3457 3458 void TransliteratorTest::TestSurrogateCasing (void) { 3459 // check that casing handles surrogates 3460 // titlecase is currently defective 3461 char buffer[20]; 3462 UChar buffer2[20]; 3463 UChar32 dee; 3464 U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee); 3465 UnicodeString DEE(u_totitle(dee)); 3466 if (DEE != DESERET_DEE) { 3467 err("Fails titlecase of surrogates"); 3468 err(Char32ToEscapedChars(dee, buffer)); 3469 err(", "); 3470 errln(Char32ToEscapedChars(DEE.char32At(0), buffer)); 3471 } 3472 3473 UnicodeString deeDEETest=DESERET_dee + DESERET_DEE; 3474 UnicodeString deedeeTest = DESERET_dee + DESERET_dee; 3475 UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE; 3476 UErrorCode status= U_ZERO_ERROR; 3477 3478 u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status); 3479 if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) { 3480 errln("Fails: Can't uppercase surrogates."); 3481 } 3482 3483 status= U_ZERO_ERROR; 3484 u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status); 3485 if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) { 3486 errln("Fails: Can't lowercase surrogates."); 3487 } 3488 } 3489 3490 static void _trans(Transliterator& t, const UnicodeString& src, 3491 UnicodeString& result) { 3492 result = src; 3493 t.transliterate(result); 3494 } 3495 3496 static void _trans(const UnicodeString& id, const UnicodeString& src, 3497 UnicodeString& result, UErrorCode ec) { 3498 UParseError pe; 3499 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec); 3500 if (U_SUCCESS(ec)) { 3501 _trans(*t, src, result); 3502 } 3503 delete t; 3504 } 3505 3506 static UnicodeString _findMatch(const UnicodeString& source, 3507 const UnicodeString* pairs) { 3508 UnicodeString empty; 3509 for (int32_t i=0; pairs[i].length() > 0; i+=2) { 3510 if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) { 3511 return pairs[i+1]; 3512 } 3513 } 3514 return empty; 3515 } 3516 3517 // Check to see that incremental gets at least part way through a reasonable string. 3518 3519 void TransliteratorTest::TestIncrementalProgress(void) { 3520 UErrorCode ec = U_ZERO_ERROR; 3521 UnicodeString latinTest = "The Quick Brown Fox."; 3522 UnicodeString devaTest; 3523 _trans("Latin-Devanagari", latinTest, devaTest, ec); 3524 UnicodeString kataTest; 3525 _trans("Latin-Katakana", latinTest, kataTest, ec); 3526 if (U_FAILURE(ec)) { 3527 errln("FAIL: Internal error"); 3528 return; 3529 } 3530 const UnicodeString tests[] = { 3531 "Any", latinTest, 3532 "Latin", latinTest, 3533 "Halfwidth", latinTest, 3534 "Devanagari", devaTest, 3535 "Katakana", kataTest, 3536 "" // END MARKER 3537 }; 3538 3539 UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog."); 3540 int32_t i = 0, j=0, k=0; 3541 int32_t sources = Transliterator::countAvailableSources(); 3542 for (i = 0; i < sources; i++) { 3543 UnicodeString source; 3544 Transliterator::getAvailableSource(i, source); 3545 UnicodeString test = _findMatch(source, tests); 3546 if (test.length() == 0) { 3547 logln((UnicodeString)"Skipping " + source + "-X"); 3548 continue; 3549 } 3550 int32_t targets = Transliterator::countAvailableTargets(source); 3551 for (j = 0; j < targets; j++) { 3552 UnicodeString target; 3553 Transliterator::getAvailableTarget(j, source, target); 3554 int32_t variants = Transliterator::countAvailableVariants(source, target); 3555 for (k =0; k< variants; k++) { 3556 UnicodeString variant; 3557 UParseError err; 3558 UErrorCode status = U_ZERO_ERROR; 3559 3560 Transliterator::getAvailableVariant(k, source, target, variant); 3561 UnicodeString id = source + "-" + target + "/" + variant; 3562 3563 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status); 3564 if (U_FAILURE(status)) { 3565 dataerrln((UnicodeString)"FAIL: Could not create " + id); 3566 delete t; 3567 continue; 3568 } 3569 status = U_ZERO_ERROR; 3570 CheckIncrementalAux(t, test); 3571 3572 UnicodeString rev; 3573 _trans(*t, test, rev); 3574 Transliterator *inv = t->createInverse(status); 3575 if (U_FAILURE(status)) { 3576 #if UCONFIG_NO_BREAK_ITERATION 3577 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail. 3578 if (id.compare((UnicodeString)"Latin-Thai/") != 0) 3579 #endif 3580 errln((UnicodeString)"FAIL: Could not create inverse of " + id); 3581 3582 delete t; 3583 delete inv; 3584 continue; 3585 } 3586 CheckIncrementalAux(inv, rev); 3587 delete t; 3588 delete inv; 3589 } 3590 } 3591 } 3592 } 3593 3594 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t, 3595 const UnicodeString& input) { 3596 UErrorCode ec = U_ZERO_ERROR; 3597 UTransPosition pos; 3598 UnicodeString test = input; 3599 3600 pos.contextStart = 0; 3601 pos.contextLimit = input.length(); 3602 pos.start = 0; 3603 pos.limit = input.length(); 3604 3605 t->transliterate(test, pos, ec); 3606 if (U_FAILURE(ec)) { 3607 errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec)); 3608 return; 3609 } 3610 UBool gotError = FALSE; 3611 3612 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X? 3613 3614 if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") { 3615 errln((UnicodeString)"No Progress, " + 3616 t->getID() + ": " + formatInput(test, input, pos)); 3617 gotError = TRUE; 3618 } else { 3619 logln((UnicodeString)"PASS Progress, " + 3620 t->getID() + ": " + formatInput(test, input, pos)); 3621 } 3622 t->finishTransliteration(test, pos); 3623 if (pos.start != pos.limit) { 3624 errln((UnicodeString)"Incomplete, " + 3625 t->getID() + ": " + formatInput(test, input, pos)); 3626 gotError = TRUE; 3627 } 3628 } 3629 3630 void TransliteratorTest::TestFunction() { 3631 // Careful with spacing and ';' here: Phrase this exactly 3632 // as toRules() is going to return it. If toRules() changes 3633 // with regard to spacing or ';', then adjust this string. 3634 UnicodeString rule = 3635 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';"; 3636 3637 UParseError pe; 3638 UErrorCode ec = U_ZERO_ERROR; 3639 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec); 3640 if (t == NULL) { 3641 dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec)); 3642 return; 3643 } 3644 3645 UnicodeString r; 3646 t->toRules(r, TRUE); 3647 if (r == rule) { 3648 logln((UnicodeString)"OK: toRules() => " + r); 3649 } else { 3650 errln((UnicodeString)"FAIL: toRules() => " + r + 3651 ", expected " + rule); 3652 } 3653 3654 expect(*t, "The Quick Brown Fox", 3655 UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox")); 3656 3657 delete t; 3658 } 3659 3660 void TransliteratorTest::TestInvalidBackRef(void) { 3661 UnicodeString rule = ". > $1;"; 3662 UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;"); 3663 UParseError pe; 3664 UErrorCode ec = U_ZERO_ERROR; 3665 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec); 3666 Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec); 3667 3668 if (t != NULL) { 3669 errln("FAIL: createFromRules should have returned NULL"); 3670 delete t; 3671 } 3672 3673 if (t2 != NULL) { 3674 errln("FAIL: createFromRules should have returned NULL"); 3675 delete t2; 3676 } 3677 3678 if (U_SUCCESS(ec)) { 3679 errln("FAIL: Ok: . > $1; => no error"); 3680 } else { 3681 logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec)); 3682 } 3683 } 3684 3685 void TransliteratorTest::TestMulticharStringSet() { 3686 // Basic testing 3687 const char* rule = 3688 " [{aa}] > x;" 3689 " a > y;" 3690 " [b{bc}] > z;" 3691 "[{gd}] { e > q;" 3692 " e } [{fg}] > r;" ; 3693 3694 UParseError pe; 3695 UErrorCode ec = U_ZERO_ERROR; 3696 Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec); 3697 if (t == NULL || U_FAILURE(ec)) { 3698 delete t; 3699 errln("FAIL: createFromRules failed"); 3700 return; 3701 } 3702 3703 expect(*t, "a aa ab bc d gd de gde gdefg ddefg", 3704 "y x yz z d gd de gdq gdqfg ddrfg"); 3705 delete t; 3706 3707 // Overlapped string test. Make sure that when multiple 3708 // strings can match that the longest one is matched. 3709 rule = 3710 " [a {ab} {abc}] > x;" 3711 " b > y;" 3712 " c > z;" 3713 " q [t {st} {rst}] { e > p;" ; 3714 3715 t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec); 3716 if (t == NULL || U_FAILURE(ec)) { 3717 delete t; 3718 errln("FAIL: createFromRules failed"); 3719 return; 3720 } 3721 3722 expect(*t, "a ab abc qte qste qrste", 3723 "x x x qtp qstp qrstp"); 3724 delete t; 3725 } 3726 3727 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 3728 // BEGIN TestUserFunction support factory 3729 3730 Transliterator* _TUFF[4]; 3731 UnicodeString* _TUFID[4]; 3732 3733 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/, 3734 Transliterator::Token context) { 3735 return _TUFF[context.integer]->clone(); 3736 } 3737 3738 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) { 3739 _TUFF[n] = t; 3740 _TUFID[n] = new UnicodeString(ID); 3741 Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n)); 3742 } 3743 3744 static void _TUFUnreg(int32_t n) { 3745 if (_TUFF[n] != NULL) { 3746 Transliterator::unregister(*_TUFID[n]); 3747 delete _TUFF[n]; 3748 delete _TUFID[n]; 3749 } 3750 } 3751 3752 // END TestUserFunction support factory 3753 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 3754 3755 /** 3756 * Test that user-registered transliterators can be used under function 3757 * syntax. 3758 */ 3759 void TransliteratorTest::TestUserFunction() { 3760 3761 Transliterator* t; 3762 UParseError pe; 3763 UErrorCode ec = U_ZERO_ERROR; 3764 3765 // Setup our factory 3766 int32_t i; 3767 for (i=0; i<4; ++i) { 3768 _TUFF[i] = NULL; 3769 } 3770 3771 // There's no need to register inverses if we don't use them 3772 t = Transliterator::createFromRules("gif", 3773 UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"), 3774 UTRANS_FORWARD, pe, ec); 3775 if (t == NULL || U_FAILURE(ec)) { 3776 dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec)); 3777 return; 3778 } 3779 _TUFReg("Any-gif", t, 0); 3780 3781 t = Transliterator::createFromRules("RemoveCurly", 3782 UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"), 3783 UTRANS_FORWARD, pe, ec); 3784 if (t == NULL || U_FAILURE(ec)) { 3785 errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec)); 3786 goto FAIL; 3787 } 3788 expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name"); 3789 _TUFReg("Any-RemoveCurly", t, 1); 3790 3791 logln("Trying &hex"); 3792 t = Transliterator::createFromRules("hex2", 3793 "(.) > &hex($1);", 3794 UTRANS_FORWARD, pe, ec); 3795 if (t == NULL || U_FAILURE(ec)) { 3796 errln("FAIL: createFromRules"); 3797 goto FAIL; 3798 } 3799 logln("Registering"); 3800 _TUFReg("Any-hex2", t, 2); 3801 t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec); 3802 if (t == NULL || U_FAILURE(ec)) { 3803 errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec)); 3804 goto FAIL; 3805 } 3806 expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063")); 3807 delete t; 3808 3809 logln("Trying &gif"); 3810 t = Transliterator::createFromRules("gif2", 3811 "(.) > &Gif(&Hex2($1));", 3812 UTRANS_FORWARD, pe, ec); 3813 if (t == NULL || U_FAILURE(ec)) { 3814 errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec)); 3815 goto FAIL; 3816 } 3817 logln("Registering"); 3818 _TUFReg("Any-gif2", t, 3); 3819 t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec); 3820 if (t == NULL || U_FAILURE(ec)) { 3821 errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec)); 3822 goto FAIL; 3823 } 3824 expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" 3825 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">"); 3826 delete t; 3827 3828 // Test that filters are allowed after & 3829 t = Transliterator::createFromRules("test", 3830 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';", 3831 UTRANS_FORWARD, pe, ec); 3832 if (t == NULL || U_FAILURE(ec)) { 3833 errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec)); 3834 goto FAIL; 3835 } 3836 expect(*t, "abc", 3837 UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ")); 3838 delete t; 3839 3840 FAIL: 3841 for (i=0; i<4; ++i) { 3842 _TUFUnreg(i); 3843 } 3844 } 3845 3846 /** 3847 * Test the Any-X transliterators. 3848 */ 3849 void TransliteratorTest::TestAnyX(void) { 3850 UParseError parseError; 3851 UErrorCode status = U_ZERO_ERROR; 3852 Transliterator* anyLatin = 3853 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status); 3854 if (anyLatin==0) { 3855 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status)); 3856 delete anyLatin; 3857 return; 3858 } 3859 3860 expect(*anyLatin, 3861 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"), 3862 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc")); 3863 3864 delete anyLatin; 3865 } 3866 3867 /** 3868 * Test Any-X transliterators with sample letters from all scripts. 3869 */ 3870 void TransliteratorTest::TestAny(void) { 3871 UErrorCode status = U_ZERO_ERROR; 3872 // Note: there is a lot of implict construction of UnicodeStrings from (char *) in 3873 // function call parameters going on in this test. 3874 UnicodeSet alphabetic("[:alphabetic:]", status); 3875 if (U_FAILURE(status)) { 3876 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status)); 3877 return; 3878 } 3879 alphabetic.freeze(); 3880 3881 UnicodeString testString; 3882 for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) { 3883 const char *scriptName = uscript_getShortName((UScriptCode)i); 3884 if (scriptName == NULL) { 3885 errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i); 3886 return; 3887 } 3888 3889 UnicodeSet sample; 3890 sample.applyPropertyAlias("script", scriptName, status); 3891 if (U_FAILURE(status)) { 3892 errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status)); 3893 return; 3894 } 3895 sample.retainAll(alphabetic); 3896 for (int32_t count=0; count<5; count++) { 3897 UChar32 c = sample.charAt(count); 3898 if (c == -1) { 3899 break; 3900 } 3901 testString.append(c); 3902 } 3903 } 3904 3905 UParseError parseError; 3906 Transliterator* anyLatin = 3907 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status); 3908 if (U_FAILURE(status)) { 3909 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status)); 3910 return; 3911 } 3912 3913 logln(UnicodeString("Sample set for Any-Latin: ") + testString); 3914 anyLatin->transliterate(testString); 3915 logln(UnicodeString("Sample result for Any-Latin: ") + testString); 3916 delete anyLatin; 3917 } 3918 3919 3920 /** 3921 * Test the source and target set API. These are only implemented 3922 * for RBT and CompoundTransliterator at this time. 3923 */ 3924 void TransliteratorTest::TestSourceTargetSet() { 3925 UErrorCode ec = U_ZERO_ERROR; 3926 3927 // Rules 3928 const char* r = 3929 "a > b; " 3930 "r [x{lu}] > q;"; 3931 3932 // Expected source 3933 UnicodeSet expSrc("[arx{lu}]", ec); 3934 3935 // Expected target 3936 UnicodeSet expTrg("[bq]", ec); 3937 3938 UParseError pe; 3939 Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec); 3940 3941 if (U_FAILURE(ec)) { 3942 delete t; 3943 errln("FAIL: Couldn't set up test"); 3944 return; 3945 } 3946 3947 UnicodeSet src; t->getSourceSet(src); 3948 UnicodeSet trg; t->getTargetSet(trg); 3949 3950 if (src == expSrc && trg == expTrg) { 3951 UnicodeString a, b; 3952 logln((UnicodeString)"Ok: " + 3953 r + " => source = " + src.toPattern(a, TRUE) + 3954 ", target = " + trg.toPattern(b, TRUE)); 3955 } else { 3956 UnicodeString a, b, c, d; 3957 errln((UnicodeString)"FAIL: " + 3958 r + " => source = " + src.toPattern(a, TRUE) + 3959 ", expected " + expSrc.toPattern(b, TRUE) + 3960 "; target = " + trg.toPattern(c, TRUE) + 3961 ", expected " + expTrg.toPattern(d, TRUE)); 3962 } 3963 3964 delete t; 3965 } 3966 3967 /** 3968 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet. 3969 */ 3970 void TransliteratorTest::TestPatternWhiteSpace() { 3971 // Rules 3972 const char* r = "a > \\u200E b;"; 3973 3974 UErrorCode ec = U_ZERO_ERROR; 3975 UParseError pe; 3976 Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec); 3977 3978 if (U_FAILURE(ec)) { 3979 errln("FAIL: Couldn't set up test"); 3980 } else { 3981 expect(*t, "a", "b"); 3982 } 3983 delete t; 3984 3985 // UnicodeSet 3986 ec = U_ZERO_ERROR; 3987 UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec); 3988 3989 if (U_FAILURE(ec)) { 3990 errln("FAIL: Couldn't set up test"); 3991 } else { 3992 if (set.contains(0x200E)) { 3993 errln("FAIL: U+200E not being ignored by UnicodeSet"); 3994 } 3995 } 3996 } 3997 //====================================================================== 3998 // this method is in TestUScript.java 3999 //====================================================================== 4000 void TransliteratorTest::TestAllCodepoints(){ 4001 UScriptCode code= USCRIPT_INVALID_CODE; 4002 char id[256]={'\0'}; 4003 char abbr[256]={'\0'}; 4004 char newId[256]={'\0'}; 4005 char newAbbrId[256]={'\0'}; 4006 char oldId[256]={'\0'}; 4007 char oldAbbrId[256]={'\0'}; 4008 4009 UErrorCode status =U_ZERO_ERROR; 4010 UParseError pe; 4011 4012 for(uint32_t i = 0; i<=0x10ffff; i++){ 4013 code = uscript_getScript(i,&status); 4014 if(code == USCRIPT_INVALID_CODE){ 4015 dataerrln("uscript_getScript for codepoint \\U%08X failed.", i); 4016 } 4017 const char* myId = uscript_getName(code); 4018 if(!myId) { 4019 dataerrln("Valid script code returned NULL name. Check your data!"); 4020 return; 4021 } 4022 uprv_strcpy(id,myId); 4023 uprv_strcpy(abbr,uscript_getShortName(code)); 4024 4025 uprv_strcpy(newId,"[:"); 4026 uprv_strcat(newId,id); 4027 uprv_strcat(newId,":];NFD"); 4028 4029 uprv_strcpy(newAbbrId,"[:"); 4030 uprv_strcat(newAbbrId,abbr); 4031 uprv_strcat(newAbbrId,":];NFD"); 4032 4033 if(uprv_strcmp(newId,oldId)!=0){ 4034 Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status); 4035 if(t==NULL || U_FAILURE(status)){ 4036 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status)); 4037 } 4038 delete t; 4039 } 4040 if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){ 4041 Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status); 4042 if(t==NULL || U_FAILURE(status)){ 4043 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status)); 4044 } 4045 delete t; 4046 } 4047 uprv_strcpy(oldId,newId); 4048 uprv_strcpy(oldAbbrId, newAbbrId); 4049 4050 } 4051 4052 } 4053 4054 #define TEST_TRANSLIT_ID(id, cls) { \ 4055 UErrorCode ec = U_ZERO_ERROR; \ 4056 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \ 4057 if (U_FAILURE(ec)) { \ 4058 dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \ 4059 } else { \ 4060 if (t->getDynamicClassID() != cls::getStaticClassID()) { \ 4061 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \ 4062 } \ 4063 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \ 4064 } \ 4065 delete t; \ 4066 } 4067 4068 #define TEST_TRANSLIT_RULE(rule, cls) { \ 4069 UErrorCode ec = U_ZERO_ERROR; \ 4070 UParseError pe; \ 4071 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \ 4072 if (U_FAILURE(ec)) { \ 4073 errln("FAIL: Couldn't create " rule); \ 4074 } else { \ 4075 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \ 4076 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \ 4077 } \ 4078 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \ 4079 } \ 4080 delete t; \ 4081 } 4082 4083 void TransliteratorTest::TestBoilerplate() { 4084 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator); 4085 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator); 4086 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator); 4087 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator); 4088 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator); 4089 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator); 4090 TEST_TRANSLIT_ID("Null", NullTransliterator); 4091 TEST_TRANSLIT_ID("Remove", RemoveTransliterator); 4092 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator); 4093 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator); 4094 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator); 4095 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator); 4096 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator); 4097 } 4098 4099 void TransliteratorTest::TestAlternateSyntax() { 4100 // U+2206 == & 4101 // U+2190 == < 4102 // U+2192 == > 4103 // U+2194 == <> 4104 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"), 4105 "abc", 4106 "xbz"); 4107 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"), 4108 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"), 4109 UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}")); 4110 } 4111 4112 static const char* BEGIN_END_RULES[] = { 4113 // [0] 4114 "abc > xy;" 4115 "aba > z;", 4116 4117 // [1] 4118 /* 4119 "::BEGIN;" 4120 "abc > xy;" 4121 "::END;" 4122 "::BEGIN;" 4123 "aba > z;" 4124 "::END;", 4125 */ 4126 "", // test case commented out below, this is here to keep from messing up the indexes 4127 4128 // [2] 4129 /* 4130 "abc > xy;" 4131 "::BEGIN;" 4132 "aba > z;" 4133 "::END;", 4134 */ 4135 "", // test case commented out below, this is here to keep from messing up the indexes 4136 4137 // [3] 4138 /* 4139 "::BEGIN;" 4140 "abc > xy;" 4141 "::END;" 4142 "aba > z;", 4143 */ 4144 "", // test case commented out below, this is here to keep from messing up the indexes 4145 4146 // [4] 4147 "abc > xy;" 4148 "::Null;" 4149 "aba > z;", 4150 4151 // [5] 4152 "::Upper;" 4153 "ABC > xy;" 4154 "AB > x;" 4155 "C > z;" 4156 "::Upper;" 4157 "XYZ > p;" 4158 "XY > q;" 4159 "Z > r;" 4160 "::Upper;", 4161 4162 // [6] 4163 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 4164 "$delim = [\\-$ws];" 4165 "$ws $delim* > ' ';" 4166 "'-' $delim* > '-';", 4167 4168 // [7] 4169 "::Null;" 4170 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 4171 "$delim = [\\-$ws];" 4172 "$ws $delim* > ' ';" 4173 "'-' $delim* > '-';", 4174 4175 // [8] 4176 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 4177 "$delim = [\\-$ws];" 4178 "$ws $delim* > ' ';" 4179 "'-' $delim* > '-';" 4180 "::Null;", 4181 4182 // [9] 4183 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 4184 "$delim = [\\-$ws];" 4185 "::Null;" 4186 "$ws $delim* > ' ';" 4187 "'-' $delim* > '-';", 4188 4189 // [10] 4190 /* 4191 "::BEGIN;" 4192 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 4193 "$delim = [\\-$ws];" 4194 "::END;" 4195 "$ws $delim* > ' ';" 4196 "'-' $delim* > '-';", 4197 */ 4198 "", // test case commented out below, this is here to keep from messing up the indexes 4199 4200 // [11] 4201 /* 4202 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 4203 "$delim = [\\-$ws];" 4204 "::BEGIN;" 4205 "$ws $delim* > ' ';" 4206 "'-' $delim* > '-';" 4207 "::END;", 4208 */ 4209 "", // test case commented out below, this is here to keep from messing up the indexes 4210 4211 // [12] 4212 /* 4213 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 4214 "$delim = [\\-$ws];" 4215 "$ab = [ab];" 4216 "::BEGIN;" 4217 "$ws $delim* > ' ';" 4218 "'-' $delim* > '-';" 4219 "::END;" 4220 "::BEGIN;" 4221 "$ab { ' ' } $ab > '-';" 4222 "c { ' ' > ;" 4223 "::END;" 4224 "::BEGIN;" 4225 "'a-a' > a\\%|a;" 4226 "::END;", 4227 */ 4228 "", // test case commented out below, this is here to keep from messing up the indexes 4229 4230 // [13] 4231 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 4232 "$delim = [\\-$ws];" 4233 "$ab = [ab];" 4234 "::Null;" 4235 "$ws $delim* > ' ';" 4236 "'-' $delim* > '-';" 4237 "::Null;" 4238 "$ab { ' ' } $ab > '-';" 4239 "c { ' ' > ;" 4240 "::Null;" 4241 "'a-a' > a\\%|a;", 4242 4243 // [14] 4244 /* 4245 "::[abc];" 4246 "::BEGIN;" 4247 "abc > xy;" 4248 "::END;" 4249 "::BEGIN;" 4250 "aba > yz;" 4251 "::END;" 4252 "::Upper;", 4253 */ 4254 "", // test case commented out below, this is here to keep from messing up the indexes 4255 4256 // [15] 4257 "::[abc];" 4258 "abc > xy;" 4259 "::Null;" 4260 "aba > yz;" 4261 "::Upper;", 4262 4263 // [16] 4264 /* 4265 "::[abc];" 4266 "::BEGIN;" 4267 "abc <> xy;" 4268 "::END;" 4269 "::BEGIN;" 4270 "aba <> yz;" 4271 "::END;" 4272 "::Upper(Lower);" 4273 "::([XYZ]);" 4274 */ 4275 "", // test case commented out below, this is here to keep from messing up the indexes 4276 4277 // [17] 4278 "::[abc];" 4279 "abc <> xy;" 4280 "::Null;" 4281 "aba <> yz;" 4282 "::Upper(Lower);" 4283 "::([XYZ]);" 4284 }; 4285 static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0])); 4286 4287 /* 4288 (This entire test is commented out below and will need some heavy revision when we re-add 4289 the ::BEGIN/::END stuff) 4290 static const char* BOGUS_BEGIN_END_RULES[] = { 4291 // [7] 4292 "::BEGIN;" 4293 "abc > xy;" 4294 "::BEGIN;" 4295 "aba > z;" 4296 "::END;" 4297 "::END;", 4298 4299 // [8] 4300 "abc > xy;" 4301 " aba > z;" 4302 "::END;", 4303 4304 // [9] 4305 "::BEGIN;" 4306 "::Upper;" 4307 "::END;" 4308 }; 4309 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0])); 4310 */ 4311 4312 static const char* BEGIN_END_TEST_CASES[] = { 4313 // rules input expected output 4314 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z", 4315 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z", 4316 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z", 4317 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z", 4318 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z", 4319 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR", 4320 4321 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e", 4322 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e", 4323 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e", 4324 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e", 4325 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e", 4326 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e", 4327 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e", 4328 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a", 4329 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a", 4330 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e", 4331 BEGIN_END_RULES[13], "a a a a", "a%a%a%a", 4332 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a", 4333 4334 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 4335 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 4336 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 4337 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ" 4338 }; 4339 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0])); 4340 4341 void TransliteratorTest::TestBeginEnd() { 4342 // run through the list of test cases above 4343 int32_t i = 0; 4344 for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) { 4345 expect((UnicodeString)"Test case #" + (i / 3), 4346 UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV), 4347 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV), 4348 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV)); 4349 } 4350 4351 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing 4352 UParseError parseError; 4353 UErrorCode status = U_ZERO_ERROR; 4354 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]), 4355 UTRANS_REVERSE, parseError, status); 4356 if (reversed == 0 || U_FAILURE(status)) { 4357 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status); 4358 } else { 4359 expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba")); 4360 } 4361 delete reversed; 4362 4363 // finally, run through the list of syntactically-ill-formed rule sets above and make sure 4364 // that all of them cause errors 4365 /* 4366 (commented out until we have the real ::BEGIN/::END stuff in place 4367 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) { 4368 UParseError parseError; 4369 UErrorCode status = U_ZERO_ERROR; 4370 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]), 4371 UTRANS_FORWARD, parseError, status); 4372 if (!U_FAILURE(status)) { 4373 delete t; 4374 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]); 4375 } 4376 } 4377 */ 4378 } 4379 4380 void TransliteratorTest::TestBeginEndToRules() { 4381 // run through the same list of test cases we used above, but this time, instead of just 4382 // instantiating a Transliterator from the rules and running the test against it, we instantiate 4383 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from 4384 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent 4385 // to (i.e., does the same thing as) the original rule set 4386 for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) { 4387 UParseError parseError; 4388 UErrorCode status = U_ZERO_ERROR; 4389 Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV), 4390 UTRANS_FORWARD, parseError, status); 4391 if (U_FAILURE(status)) { 4392 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status); 4393 } else { 4394 UnicodeString rules; 4395 t->toRules(rules, TRUE); 4396 Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules, 4397 UTRANS_FORWARD, parseError, status); 4398 if (U_FAILURE(status)) { 4399 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"), 4400 parseError, status); 4401 delete t; 4402 } else { 4403 expect(*t2, 4404 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV), 4405 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV)); 4406 delete t; 4407 delete t2; 4408 } 4409 } 4410 } 4411 4412 // do the same thing for the reversible test case 4413 UParseError parseError; 4414 UErrorCode status = U_ZERO_ERROR; 4415 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]), 4416 UTRANS_REVERSE, parseError, status); 4417 if (U_FAILURE(status)) { 4418 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status); 4419 } else { 4420 UnicodeString rules; 4421 reversed->toRules(rules, FALSE); 4422 Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD, 4423 parseError, status); 4424 if (U_FAILURE(status)) { 4425 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"), 4426 parseError, status); 4427 delete reversed; 4428 } else { 4429 expect(*reversed2, 4430 UnicodeString("xy XY XYZ yz YZ"), 4431 UnicodeString("xy abc xaba yz aba")); 4432 delete reversed; 4433 delete reversed2; 4434 } 4435 } 4436 } 4437 4438 void TransliteratorTest::TestRegisterAlias() { 4439 UnicodeString longID("Lower;[aeiou]Upper"); 4440 UnicodeString shortID("Any-CapVowels"); 4441 UnicodeString reallyShortID("CapVowels"); 4442 4443 Transliterator::registerAlias(shortID, longID); 4444 4445 UErrorCode err = U_ZERO_ERROR; 4446 Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err); 4447 if (U_FAILURE(err)) { 4448 errln("Failed to instantiate transliterator with long ID"); 4449 Transliterator::unregister(shortID); 4450 return; 4451 } 4452 Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err); 4453 if (U_FAILURE(err)) { 4454 errln("Failed to instantiate transliterator with short ID"); 4455 delete t1; 4456 Transliterator::unregister(shortID); 4457 return; 4458 } 4459 4460 if (t1->getID() != longID) 4461 errln("Transliterator instantiated with long ID doesn't have long ID"); 4462 if (t2->getID() != reallyShortID) 4463 errln("Transliterator instantiated with short ID doesn't have short ID"); 4464 4465 UnicodeString rules1; 4466 UnicodeString rules2; 4467 4468 t1->toRules(rules1, TRUE); 4469 t2->toRules(rules2, TRUE); 4470 if (rules1 != rules2) 4471 errln("Alias transliterators aren't the same"); 4472 4473 delete t1; 4474 delete t2; 4475 Transliterator::unregister(shortID); 4476 4477 t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err); 4478 if (U_SUCCESS(err)) { 4479 errln("Instantiation with short ID succeeded after short ID was unregistered"); 4480 delete t1; 4481 } 4482 4483 // try the same thing again, but this time with something other than 4484 // an instance of CompoundTransliterator 4485 UnicodeString realID("Latin-Greek"); 4486 UnicodeString fakeID("Latin-dlgkjdflkjdl"); 4487 Transliterator::registerAlias(fakeID, realID); 4488 4489 err = U_ZERO_ERROR; 4490 t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err); 4491 if (U_FAILURE(err)) { 4492 dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err)); 4493 Transliterator::unregister(realID); 4494 return; 4495 } 4496 t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err); 4497 if (U_FAILURE(err)) { 4498 errln("Failed to instantiate transliterator with fake ID"); 4499 delete t1; 4500 Transliterator::unregister(realID); 4501 return; 4502 } 4503 4504 t1->toRules(rules1, TRUE); 4505 t2->toRules(rules2, TRUE); 4506 if (rules1 != rules2) 4507 errln("Alias transliterators aren't the same"); 4508 4509 delete t1; 4510 delete t2; 4511 Transliterator::unregister(fakeID); 4512 } 4513 4514 void TransliteratorTest::TestRuleStripping() { 4515 /* 4516 # 4517 \uE001>\u0C01; # SIGN 4518 */ 4519 static const UChar rule[] = { 4520 0x0023,0x0020,0x000D,0x000A, 4521 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0 4522 }; 4523 static const UChar expectedRule[] = { 4524 0xE001,0x003E,0x0C01,0x003B,0 4525 }; 4526 UChar result[sizeof(rule)/sizeof(rule[0])]; 4527 UErrorCode status = U_ZERO_ERROR; 4528 int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status); 4529 if (len != u_strlen(expectedRule)) { 4530 errln("utrans_stripRules return len = %d", len); 4531 } 4532 if (u_strncmp(expectedRule, result, len) != 0) { 4533 errln("utrans_stripRules did not return expected string"); 4534 } 4535 } 4536 4537 /** 4538 * Test the Halfwidth-Fullwidth transliterator (ticket 6281). 4539 */ 4540 void TransliteratorTest::TestHalfwidthFullwidth(void) { 4541 UParseError parseError; 4542 UErrorCode status = U_ZERO_ERROR; 4543 Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status); 4544 Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status); 4545 if (hf == 0 || fh == 0) { 4546 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); 4547 delete hf; 4548 delete fh; 4549 return; 4550 } 4551 4552 // Array of 2n items 4553 // Each item is 4554 // "hf"|"fh"|"both", 4555 // <Halfwidth>, 4556 // <Fullwidth> 4557 const char* DATA[] = { 4558 "both", 4559 "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020", 4560 "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000", 4561 }; 4562 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); 4563 4564 for (int32_t i=0; i<DATA_length; i+=3) { 4565 UnicodeString h = CharsToUnicodeString(DATA[i+1]); 4566 UnicodeString f = CharsToUnicodeString(DATA[i+2]); 4567 switch (*DATA[i]) { 4568 case 0x68: //'h': // Halfwidth-Fullwidth only 4569 expect(*hf, h, f); 4570 break; 4571 case 0x66: //'f': // Fullwidth-Halfwidth only 4572 expect(*fh, f, h); 4573 break; 4574 case 0x62: //'b': // both directions 4575 expect(*hf, h, f); 4576 expect(*fh, f, h); 4577 break; 4578 } 4579 } 4580 delete hf; 4581 delete fh; 4582 } 4583 4584 4585 /** 4586 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site. 4587 * TODO: confirm that the expected results are correct. 4588 * For now, test just confirms that C++ and Java give identical results. 4589 */ 4590 void TransliteratorTest::TestThai(void) { 4591 #if !UCONFIG_NO_BREAK_ITERATION 4592 UParseError parseError; 4593 UErrorCode status = U_ZERO_ERROR; 4594 Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status); 4595 if (tr == 0) { 4596 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); 4597 return; 4598 } 4599 if (U_FAILURE(status)) { 4600 errln("FAIL: createInstance failed with %s", u_errorName(status)); 4601 return; 4602 } 4603 const char *thaiText = 4604 "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d" 4605 "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22" 4606 "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d" 4607 "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d" 4608 "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29" 4609 "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42" 4610 "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25" 4611 "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15" 4612 "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08" 4613 "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49" 4614 "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23" 4615 "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23" 4616 "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48" 4617 "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48" 4618 "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30" 4619 "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d" 4620 "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01" 4621 "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e" 4622 "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49" 4623 "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04" 4624 "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19" 4625 "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43" 4626 "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20" 4627 "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35" 4628 " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b" 4629 "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04" 4630 "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19" 4631 " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40" 4632 "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22" 4633 "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b."; 4634 4635 const char *latinText = 4636 "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300" 4637 "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr" 4638 "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304" 4639 "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304" 4640 "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301" 4641 " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b " 4642 "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101" 4643 "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m" 4644 "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p" 4645 "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304" 4646 "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101" 4647 "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131" 4648 " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9" 4649 "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306" 4650 " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1" 4651 "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6" 4652 "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131" 4653 "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb."; 4654 4655 4656 UnicodeString xlitText(thaiText); 4657 xlitText = xlitText.unescape(); 4658 tr->transliterate(xlitText); 4659 4660 UnicodeString expectedText(latinText); 4661 expectedText = expectedText.unescape(); 4662 expect(*tr, xlitText, expectedText); 4663 4664 delete tr; 4665 #endif 4666 } 4667 4668 4669 //====================================================================== 4670 // Support methods 4671 //====================================================================== 4672 void TransliteratorTest::expectT(const UnicodeString& id, 4673 const UnicodeString& source, 4674 const UnicodeString& expectedResult) { 4675 UErrorCode ec = U_ZERO_ERROR; 4676 UParseError pe; 4677 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec); 4678 if (U_FAILURE(ec)) { 4679 errln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(ec)); 4680 delete t; 4681 return; 4682 } 4683 expect(*t, source, expectedResult); 4684 delete t; 4685 } 4686 4687 void TransliteratorTest::reportParseError(const UnicodeString& message, 4688 const UParseError& parseError, 4689 const UErrorCode& status) { 4690 dataerrln(message + 4691 /*", parse error " + parseError.code +*/ 4692 ", line " + parseError.line + 4693 ", offset " + parseError.offset + 4694 ", pre-context " + prettify(parseError.preContext, TRUE) + 4695 ", post-context " + prettify(parseError.postContext,TRUE) + 4696 ", Error: " + u_errorName(status)); 4697 } 4698 4699 void TransliteratorTest::expect(const UnicodeString& rules, 4700 const UnicodeString& source, 4701 const UnicodeString& expectedResult, 4702 UTransPosition *pos) { 4703 expect("<ID>", rules, source, expectedResult, pos); 4704 } 4705 4706 void TransliteratorTest::expect(const UnicodeString& id, 4707 const UnicodeString& rules, 4708 const UnicodeString& source, 4709 const UnicodeString& expectedResult, 4710 UTransPosition *pos) { 4711 UErrorCode status = U_ZERO_ERROR; 4712 UParseError parseError; 4713 Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status); 4714 if (U_FAILURE(status)) { 4715 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status); 4716 } else { 4717 expect(*t, source, expectedResult, pos); 4718 } 4719 delete t; 4720 } 4721 4722 void TransliteratorTest::expect(const Transliterator& t, 4723 const UnicodeString& source, 4724 const UnicodeString& expectedResult, 4725 const Transliterator& reverseTransliterator) { 4726 expect(t, source, expectedResult); 4727 expect(reverseTransliterator, expectedResult, source); 4728 } 4729 4730 void TransliteratorTest::expect(const Transliterator& t, 4731 const UnicodeString& source, 4732 const UnicodeString& expectedResult, 4733 UTransPosition *pos) { 4734 if (pos == 0) { 4735 UnicodeString result(source); 4736 t.transliterate(result); 4737 expectAux(t.getID() + ":String", source, result, expectedResult); 4738 } 4739 UTransPosition index={0, 0, 0, 0}; 4740 if (pos != 0) { 4741 index = *pos; 4742 } 4743 4744 UnicodeString rsource(source); 4745 if (pos == 0) { 4746 t.transliterate(rsource); 4747 } else { 4748 // Do it all at once -- below we do it incrementally 4749 t.finishTransliteration(rsource, *pos); 4750 } 4751 expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult); 4752 4753 // Test keyboard (incremental) transliteration -- this result 4754 // must be the same after we finalize (see below). 4755 UnicodeString log; 4756 rsource.remove(); 4757 if (pos != 0) { 4758 rsource = source; 4759 formatInput(log, rsource, index); 4760 log.append(" -> "); 4761 UErrorCode status = U_ZERO_ERROR; 4762 t.transliterate(rsource, index, status); 4763 formatInput(log, rsource, index); 4764 } else { 4765 for (int32_t i=0; i<source.length(); ++i) { 4766 if (i != 0) { 4767 log.append(" + "); 4768 } 4769 log.append(source.charAt(i)).append(" -> "); 4770 UErrorCode status = U_ZERO_ERROR; 4771 t.transliterate(rsource, index, source.charAt(i), status); 4772 formatInput(log, rsource, index); 4773 } 4774 } 4775 4776 // As a final step in keyboard transliteration, we must call 4777 // transliterate to finish off any pending partial matches that 4778 // were waiting for more input. 4779 t.finishTransliteration(rsource, index); 4780 log.append(" => ").append(rsource); 4781 4782 expectAux(t.getID() + ":Keyboard", log, 4783 rsource == expectedResult, 4784 expectedResult); 4785 } 4786 4787 4788 /** 4789 * @param appendTo result is appended to this param. 4790 * @param input the string being transliterated 4791 * @param pos the index struct 4792 */ 4793 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo, 4794 const UnicodeString& input, 4795 const UTransPosition& pos) { 4796 // Output a string of the form aaa{bbb|ccc|ddd}eee, where 4797 // the {} indicate the context start and limit, and the || 4798 // indicate the start and limit. 4799 if (0 <= pos.contextStart && 4800 pos.contextStart <= pos.start && 4801 pos.start <= pos.limit && 4802 pos.limit <= pos.contextLimit && 4803 pos.contextLimit <= input.length()) { 4804 4805 UnicodeString a, b, c, d, e; 4806 input.extractBetween(0, pos.contextStart, a); 4807 input.extractBetween(pos.contextStart, pos.start, b); 4808 input.extractBetween(pos.start, pos.limit, c); 4809 input.extractBetween(pos.limit, pos.contextLimit, d); 4810 input.extractBetween(pos.contextLimit, input.length(), e); 4811 appendTo.append(a).append((UChar)123/*{*/).append(b). 4812 append((UChar)PIPE).append(c).append((UChar)PIPE).append(d). 4813 append((UChar)125/*}*/).append(e); 4814 } else { 4815 appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" + 4816 pos.contextStart + ", s=" + pos.start + ", l=" + 4817 pos.limit + ", cl=" + pos.contextLimit + "} on " + 4818 input); 4819 } 4820 return appendTo; 4821 } 4822 4823 void TransliteratorTest::expectAux(const UnicodeString& tag, 4824 const UnicodeString& source, 4825 const UnicodeString& result, 4826 const UnicodeString& expectedResult) { 4827 expectAux(tag, source + " -> " + result, 4828 result == expectedResult, 4829 expectedResult); 4830 } 4831 4832 void TransliteratorTest::expectAux(const UnicodeString& tag, 4833 const UnicodeString& summary, UBool pass, 4834 const UnicodeString& expectedResult) { 4835 if (pass) { 4836 logln(UnicodeString("(")+tag+") " + prettify(summary)); 4837 } else { 4838 dataerrln(UnicodeString("FAIL: (")+tag+") " 4839 + prettify(summary) 4840 + ", expected " + prettify(expectedResult)); 4841 } 4842 } 4843 4844 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 4845