1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 1996-2012, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 package android.icu.dev.test.translit; 11 12 import java.util.ArrayList; 13 import java.util.Enumeration; 14 import java.util.HashMap; 15 import java.util.HashSet; 16 import java.util.Iterator; 17 import java.util.List; 18 import java.util.Locale; 19 import java.util.Map.Entry; 20 21 import org.junit.Ignore; 22 import org.junit.Test; 23 24 import android.icu.dev.test.TestFmwk; 25 import android.icu.dev.test.TestUtil; 26 import android.icu.dev.util.UnicodeMap; 27 import android.icu.impl.Utility; 28 import android.icu.impl.UtilityExtensions; 29 import android.icu.lang.CharSequences; 30 import android.icu.lang.UCharacter; 31 import android.icu.lang.UScript; 32 import android.icu.text.CanonicalIterator; 33 import android.icu.text.Normalizer2; 34 import android.icu.text.Replaceable; 35 import android.icu.text.ReplaceableString; 36 import android.icu.text.StringTransform; 37 import android.icu.text.Transliterator; 38 import android.icu.text.UTF16; 39 import android.icu.text.UnicodeFilter; 40 import android.icu.text.UnicodeSet; 41 import android.icu.text.UnicodeSetIterator; 42 import android.icu.util.CaseInsensitiveString; 43 import android.icu.util.ULocale; 44 45 /*********************************************************************** 46 47 HOW TO USE THIS TEST FILE 48 -or- 49 How I developed on two platforms 50 without losing (too much of) my mind 51 52 53 1. Add new tests by copying/pasting/changing existing tests. On Java, 54 any public void method named Test...() taking no parameters becomes 55 a test. On C++, you need to modify the header and add a line to 56 the runIndexedTest() dispatch method. 57 58 2. Make liberal use of the expect() method; it is your friend. 59 60 3. The tests in this file exactly match those in a sister file on the 61 other side. The two files are: 62 63 icu4j: src/android.icu.dev.test/translit/TransliteratorTest.java 64 icu4c: source/test/intltest/transtst.cpp 65 66 ==> THIS IS THE IMPORTANT PART <== 67 68 When you add a test in this file, add it in transtst.cpp too. 69 Give it the same name and put it in the same relative place. This 70 makes maintenance a lot simpler for any poor soul who ends up 71 trying to synchronize the tests between icu4j and icu4c. 72 73 4. If you MUST enter a test that is NOT paralleled in the sister file, 74 then add it in the special non-mirrored section. These are 75 labeled 76 77 "icu4j ONLY" 78 79 or 80 81 "icu4c ONLY" 82 83 Make sure you document the reason the test is here and not there. 84 85 86 Thank you. 87 The Management 88 ***********************************************************************/ 89 90 /** 91 * @test 92 * @summary General test of Transliterator 93 */ 94 public class TransliteratorTest extends TestFmwk { 95 @Test 96 public void TestHangul() { 97 98 Transliterator lh = Transliterator.getInstance("Latin-Hangul"); 99 Transliterator hl = lh.getInverse(); 100 101 assertTransform("Transform", "\uCE20", lh, "ch"); 102 103 assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta"); 104 assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa"); 105 assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja"); 106 assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka"); 107 assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa"); 108 assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha"); 109 assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa"); 110 assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja"); 111 assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha"); 112 assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga"); 113 assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma"); 114 assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba"); 115 assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa"); 116 assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta"); 117 assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa"); 118 assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha"); 119 assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa"); 120 assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga"); 121 assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa"); 122 assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja"); 123 assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa"); 124 assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta"); 125 assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa"); 126 assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa"); 127 assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka"); 128 assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa"); 129 130 } 131 132 @Test 133 public void TestChinese() { 134 Transliterator hanLatin = Transliterator.getInstance("Han-Latin"); 135 assertTransform("Transform", "z\u00E0o Unicode", hanLatin, "\u9020Unicode"); 136 assertTransform("Transform", "z\u00E0i chu\u00E0ng z\u00E0o Unicode zh\u012B qi\u00E1n", hanLatin, "\u5728\u5275\u9020Unicode\u4E4B\u524D"); 137 } 138 139 @Test 140 public void TestRegistry() { 141 checkRegistry("foo3", "::[a-z]; ::NFC; [:letter:] a > b;"); // check compound 142 checkRegistry("foo2", "::NFC; [:letter:] a > b;"); // check compound 143 checkRegistry("foo1", "[:letter:] a > b;"); 144 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) { 145 String id = (String) e.nextElement(); 146 checkRegistry(id); 147 } 148 } 149 150 private void checkRegistry (String id, String rules) { 151 Transliterator foo = Transliterator.createFromRules(id, rules, Transliterator.FORWARD); 152 Transliterator.registerInstance(foo); 153 checkRegistry(id); 154 } 155 156 private void checkRegistry(String id) { 157 Transliterator fie = Transliterator.getInstance(id); 158 final UnicodeSet fae = new UnicodeSet("[a-z5]"); 159 fie.setFilter(fae); 160 Transliterator foe = Transliterator.getInstance(id); 161 UnicodeFilter fee = foe.getFilter(); 162 if (fae.equals(fee)) { 163 errln("Changed what is in registry for " + id); 164 } 165 } 166 167 // Android-changed: increase timeout. 168 @Test(timeout = 3000000L) 169 public void TestInstantiation() { 170 long ms = System.currentTimeMillis(); 171 String ID; 172 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) { 173 ID = (String) e.nextElement(); 174 if (ID.equals("Latin-Han/definition")) { 175 System.out.println("\nTODO: disabling Latin-Han/definition check for now: fix later"); 176 continue; 177 } 178 Transliterator t = null; 179 try { 180 t = Transliterator.getInstance(ID); 181 // This is only true for some subclasses 182 // // We should get a new instance if we try again 183 // Transliterator t2 = Transliterator.getInstance(ID); 184 // if (t != t2) { 185 // logln("OK: " + Transliterator.getDisplayName(ID) + " (" + ID + "): " + t); 186 // } else { 187 // errln("FAIL: " + ID + " returned identical instances"); 188 // t = null; 189 // } 190 } catch (IllegalArgumentException ex) { 191 errln("FAIL: " + ID); 192 throw ex; 193 } 194 195 // if (t.getFilter() != null) { 196 // errln("Fail: Should never have filter on transliterator unless we started with one: " + ID + ", " + t.getFilter()); 197 // } 198 199 if (t != null) { 200 // Now test toRules 201 String rules = null; 202 try { 203 rules = t.toRules(true); 204 205 Transliterator.createFromRules("x", rules, Transliterator.FORWARD); 206 } catch (IllegalArgumentException ex2) { 207 errln("FAIL: " + ID + ".toRules() => bad rules: " + 208 rules); 209 throw ex2; 210 } 211 } 212 } 213 214 // Now test the failure path 215 try { 216 ID = "<Not a valid Transliterator ID>"; 217 Transliterator t = Transliterator.getInstance(ID); 218 errln("FAIL: " + ID + " returned " + t); 219 } catch (IllegalArgumentException ex) { 220 logln("OK: Bogus ID handled properly"); 221 } 222 223 ms = System.currentTimeMillis() - ms; 224 logln("Elapsed time: " + ms + " ms"); 225 } 226 227 @Test 228 public void TestSimpleRules() { 229 /* Example: rules 1. ab>x|y 230 * 2. yc>z 231 * 232 * []|eabcd start - no match, copy e to tranlated buffer 233 * [e]|abcd match rule 1 - copy output & adjust cursor 234 * [ex|y]cd match rule 2 - copy output & adjust cursor 235 * [exz]|d no match, copy d to transliterated buffer 236 * [exzd]| done 237 */ 238 expect("ab>x|y;" + 239 "yc>z", 240 "eabcd", "exzd"); 241 242 /* Another set of rules: 243 * 1. ab>x|yzacw 244 * 2. za>q 245 * 3. qc>r 246 * 4. cw>n 247 * 248 * []|ab Rule 1 249 * [x|yzacw] No match 250 * [xy|zacw] Rule 2 251 * [xyq|cw] Rule 4 252 * [xyqn]| Done 253 */ 254 expect("ab>x|yzacw;" + 255 "za>q;" + 256 "qc>r;" + 257 "cw>n", 258 "ab", "xyqn"); 259 260 /* Test categories 261 */ 262 Transliterator t = Transliterator.createFromRules("<ID>", 263 "$dummy=\uE100;" + 264 "$vowel=[aeiouAEIOU];" + 265 "$lu=[:Lu:];" + 266 "$vowel } $lu > '!';" + 267 "$vowel > '&';" + 268 "'!' { $lu > '^';" + 269 "$lu > '*';" + 270 "a>ERROR", 271 Transliterator.FORWARD); 272 expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&"); 273 } 274 275 /** 276 * Test inline set syntax and set variable syntax. 277 */ 278 @Test 279 public void TestInlineSet() { 280 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz"); 281 expect("a[0-9]b > qrs", "1a7b9", "1qrs9"); 282 283 expect("$digit = [0-9];" + 284 "$alpha = [a-zA-Z];" + 285 "$alphanumeric = [$digit $alpha];" + // *** 286 "$special = [^$alphanumeric];" + // *** 287 "$alphanumeric > '-';" + 288 "$special > '*';", 289 290 "thx-1138", "---*----"); 291 } 292 293 /** 294 * Create some inverses and confirm that they work. We have to be 295 * careful how we do this, since the inverses will not be true 296 * inverses -- we can't throw any random string at the composition 297 * of the transliterators and expect the identity function. F x 298 * F' != I. However, if we are careful about the input, we will 299 * get the expected results. 300 */ 301 @Test 302 public void TestRuleBasedInverse() { 303 String RULES = 304 "abc>zyx;" + 305 "ab>yz;" + 306 "bc>zx;" + 307 "ca>xy;" + 308 "a>x;" + 309 "b>y;" + 310 "c>z;" + 311 312 "abc<zyx;" + 313 "ab<yz;" + 314 "bc<zx;" + 315 "ca<xy;" + 316 "a<x;" + 317 "b<y;" + 318 "c<z;" + 319 320 ""; 321 322 String[] DATA = { 323 // Careful here -- random strings will not work. If we keep 324 // the left side to the domain and the right side to the range 325 // we will be okay though (left, abc; right xyz). 326 "a", "x", 327 "abcacab", "zyxxxyy", 328 "caccb", "xyzzy", 329 }; 330 331 Transliterator fwd = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD); 332 Transliterator rev = Transliterator.createFromRules("<ID>", RULES, Transliterator.REVERSE); 333 for (int i=0; i<DATA.length; i+=2) { 334 expect(fwd, DATA[i], DATA[i+1]); 335 expect(rev, DATA[i+1], DATA[i]); 336 } 337 } 338 339 /** 340 * Basic test of keyboard. 341 */ 342 @Test 343 public void TestKeyboard() { 344 Transliterator t = Transliterator.createFromRules("<ID>", 345 "psch>Y;" 346 +"ps>y;" 347 +"ch>x;" 348 +"a>A;", Transliterator.FORWARD); 349 String DATA[] = { 350 // insertion, buffer 351 "a", "A", 352 "p", "Ap", 353 "s", "Aps", 354 "c", "Apsc", 355 "a", "AycA", 356 "psch", "AycAY", 357 null, "AycAY", // null means finishKeyboardTransliteration 358 }; 359 360 keyboardAux(t, DATA); 361 } 362 363 /** 364 * Basic test of keyboard with cursor. 365 */ 366 @Test 367 public void TestKeyboard2() { 368 Transliterator t = Transliterator.createFromRules("<ID>", 369 "ych>Y;" 370 +"ps>|y;" 371 +"ch>x;" 372 +"a>A;", Transliterator.FORWARD); 373 String DATA[] = { 374 // insertion, buffer 375 "a", "A", 376 "p", "Ap", 377 "s", "Aps", // modified for rollback - "Ay", 378 "c", "Apsc", // modified for rollback - "Ayc", 379 "a", "AycA", 380 "p", "AycAp", 381 "s", "AycAps", // modified for rollback - "AycAy", 382 "c", "AycApsc", // modified for rollback - "AycAyc", 383 "h", "AycAY", 384 null, "AycAY", // null means finishKeyboardTransliteration 385 }; 386 387 keyboardAux(t, DATA); 388 } 389 390 /** 391 * Test keyboard transliteration with back-replacement. 392 */ 393 @Test 394 public void TestKeyboard3() { 395 // We want th>z but t>y. Furthermore, during keyboard 396 // transliteration we want t>y then yh>z if t, then h are 397 // typed. 398 String RULES = 399 "t>|y;" + 400 "yh>z;" + 401 ""; 402 403 String[] DATA = { 404 // Column 1: characters to add to buffer (as if typed) 405 // Column 2: expected appearance of buffer after 406 // keyboard xliteration. 407 "a", "a", 408 "b", "ab", 409 "t", "abt", // modified for rollback - "aby", 410 "c", "abyc", 411 "t", "abyct", // modified for rollback - "abycy", 412 "h", "abycz", 413 null, "abycz", // null means finishKeyboardTransliteration 414 }; 415 416 Transliterator t = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD); 417 keyboardAux(t, DATA); 418 } 419 420 private void keyboardAux(Transliterator t, String[] DATA) { 421 Transliterator.Position index = new Transliterator.Position(); 422 ReplaceableString s = new ReplaceableString(); 423 for (int i=0; i<DATA.length; i+=2) { 424 StringBuffer log; 425 if (DATA[i] != null) { 426 log = new StringBuffer(s.toString() + " + " 427 + DATA[i] 428 + " -> "); 429 t.transliterate(s, index, DATA[i]); 430 } else { 431 log = new StringBuffer(s.toString() + " => "); 432 t.finishTransliteration(s, index); 433 } 434 UtilityExtensions.formatInput(log, s, index); 435 if (s.toString().equals(DATA[i+1])) { 436 logln(log.toString()); 437 } else { 438 errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]); 439 } 440 } 441 } 442 443 // Latin-Arabic has been temporarily removed until it can be 444 // done correctly. 445 446 // public void TestArabic() { 447 // String DATA[] = { 448 // "Arabic", 449 // "\u062a\u062a\u0645\u062a\u0639 "+ 450 // "\u0627\u0644\u0644\u063a\u0629 "+ 451 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+ 452 // "\u0628\u0628\u0646\u0638\u0645 "+ 453 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+ 454 // "\u062c\u0645\u064a\u0644\u0629" 455 // }; 456 457 // Transliterator t = Transliterator.getInstance("Latin-Arabic"); 458 // for (int i=0; i<DATA.length; i+=2) { 459 // expect(t, DATA[i], DATA[i+1]); 460 // } 461 // } 462 463 /** 464 * Compose the Kana transliterator forward and reverse and try 465 * some strings that should come out unchanged. 466 */ 467 @Test 468 public void TestCompoundKana() { 469 Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin"); 470 expect(t, "aaaaa", "aaaaa"); 471 } 472 473 /** 474 * Compose the hex transliterators forward and reverse. 475 */ 476 @Test 477 public void TestCompoundHex() { 478 Transliterator a = Transliterator.getInstance("Any-Hex"); 479 Transliterator b = Transliterator.getInstance("Hex-Any"); 480 // Transliterator[] trans = { a, b }; 481 // Transliterator ab = Transliterator.getInstance(trans); 482 Transliterator ab = Transliterator.getInstance("Any-Hex;Hex-Any"); 483 484 // Do some basic tests of b 485 expect(b, "\\u0030\\u0031", "01"); 486 487 String s = "abcde"; 488 expect(ab, s, s); 489 490 // trans = new Transliterator[] { b, a }; 491 // Transliterator ba = Transliterator.getInstance(trans); 492 Transliterator ba = Transliterator.getInstance("Hex-Any;Any-Hex"); 493 ReplaceableString str = new ReplaceableString(s); 494 a.transliterate(str); 495 expect(ba, str.toString(), str.toString()); 496 } 497 498 /** 499 * Do some basic tests of filtering. 500 */ 501 @Test 502 public void TestFiltering() { 503 504 Transliterator tempTrans = Transliterator.createFromRules("temp", "x > y; x{a} > b; ", Transliterator.FORWARD); 505 tempTrans.setFilter(new UnicodeSet("[a]")); 506 String tempResult = tempTrans.transform("xa"); 507 assertEquals("context should not be filtered ", "xb", tempResult); 508 509 tempTrans = Transliterator.createFromRules("temp", "::[a]; x > y; x{a} > b; ", Transliterator.FORWARD); 510 tempResult = tempTrans.transform("xa"); 511 assertEquals("context should not be filtered ", "xb", tempResult); 512 513 Transliterator hex = Transliterator.getInstance("Any-Hex"); 514 hex.setFilter(new UnicodeFilter() { 515 public boolean contains(int c) { 516 return c != 'c'; 517 } 518 public String toPattern(boolean escapeUnprintable) { 519 return ""; 520 } 521 public boolean matchesIndexValue(int v) { 522 return false; 523 } 524 public void addMatchSetTo(UnicodeSet toUnionTo) {} 525 }); 526 String s = "abcde"; 527 String out = hex.transliterate(s); 528 String exp = "\\u0061\\u0062c\\u0064\\u0065"; 529 if (out.equals(exp)) { 530 logln("Ok: \"" + exp + "\""); 531 } else { 532 logln("FAIL: \"" + out + "\", wanted \"" + exp + "\""); 533 } 534 } 535 536 /** 537 * Test anchors 538 */ 539 @Test 540 public void TestAnchors() { 541 expect("^ab > 01 ;" + 542 " ab > |8 ;" + 543 " b > k ;" + 544 " 8x$ > 45 ;" + 545 " 8x > 77 ;", 546 547 "ababbabxabx", 548 "018k7745"); 549 expect("$s = [z$] ;" + 550 "$s{ab > 01 ;" + 551 " ab > |8 ;" + 552 " b > k ;" + 553 " 8x}$s > 45 ;" + 554 " 8x > 77 ;", 555 556 "abzababbabxzabxabx", 557 "01z018k45z01x45"); 558 } 559 560 /** 561 * Test pattern quoting and escape mechanisms. 562 */ 563 @Test 564 public void TestPatternQuoting() { 565 // Array of 3n items 566 // Each item is <rules>, <input>, <expected output> 567 String[] DATA = { 568 "\u4E01>'[male adult]'", "\u4E01", "[male adult]", 569 }; 570 571 for (int i=0; i<DATA.length; i+=3) { 572 logln("Pattern: " + Utility.escape(DATA[i])); 573 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 574 expect(t, DATA[i+1], DATA[i+2]); 575 } 576 } 577 578 @Test 579 public void TestVariableNames() { 580 Transliterator gl = Transliterator.createFromRules("foo5", "$\u2DC0 = qy; a>b;", Transliterator.FORWARD); 581 if (gl == null) { 582 errln("FAIL: null Transliterator returned."); 583 } 584 } 585 586 /** 587 * Regression test for bugs found in Greek transliteration. 588 */ 589 @Test 590 public void TestJ277() { 591 Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC"); 592 593 char sigma = (char)0x3C3; 594 char upsilon = (char)0x3C5; 595 char nu = (char)0x3BD; 596 // not used char PHI = (char)0x3A6; 597 char alpha = (char)0x3B1; 598 // not used char omega = (char)0x3C9; 599 // not used char omicron = (char)0x3BF; 600 // not used char epsilon = (char)0x3B5; 601 602 // sigma upsilon nu -> syn 603 StringBuffer buf = new StringBuffer(); 604 buf.append(sigma).append(upsilon).append(nu); 605 String syn = buf.toString(); 606 expect(gl, syn, "syn"); 607 608 // sigma alpha upsilon nu -> saun 609 buf.setLength(0); 610 buf.append(sigma).append(alpha).append(upsilon).append(nu); 611 String sayn = buf.toString(); 612 expect(gl, sayn, "saun"); 613 614 // Again, using a smaller rule set 615 String rules = 616 "$alpha = \u03B1;" + 617 "$nu = \u03BD;" + 618 "$sigma = \u03C3;" + 619 "$ypsilon = \u03C5;" + 620 "$vowel = [aeiouAEIOU$alpha$ypsilon];" + 621 "s <> $sigma;" + 622 "a <> $alpha;" + 623 "u <> $vowel { $ypsilon;" + 624 "y <> $ypsilon;" + 625 "n <> $nu;"; 626 Transliterator mini = Transliterator.createFromRules 627 ("mini", rules, Transliterator.REVERSE); 628 expect(mini, syn, "syn"); 629 expect(mini, sayn, "saun"); 630 631 //| // Transliterate the Greek locale data 632 //| Locale el("el"); 633 //| DateFormatSymbols syms(el, status); 634 //| if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; } 635 //| int32_t i, count; 636 //| const UnicodeString* data = syms.getMonths(count); 637 //| for (i=0; i<count; ++i) { 638 //| if (data[i].length() == 0) { 639 //| continue; 640 //| } 641 //| UnicodeString out(data[i]); 642 //| gl->transliterate(out); 643 //| bool_t ok = TRUE; 644 //| if (data[i].length() >= 2 && out.length() >= 2 && 645 //| u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) { 646 //| if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) { 647 //| ok = FALSE; 648 //| } 649 //| } 650 //| if (ok) { 651 //| logln(prettify(data[i] + " -> " + out)); 652 //| } else { 653 //| errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out)); 654 //| } 655 //| } 656 } 657 658 // /** 659 // * Prefix, suffix support in hex transliterators 660 // */ 661 // public void TestJ243() { 662 // // Test default Hex-Any, which should handle 663 // // \\u, \\U, u+, and U+ 664 // HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator(); 665 // expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz"); 666 // 667 // // Try a custom Hex-Any 668 // // \\uXXXX and &#xXXXX; 669 // HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;"); 670 // expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x0123", 671 // "abcd5fx0123"); 672 // 673 // // Try custom Any-Hex (default is tested elsewhere) 674 // UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;"); 675 // expect(hex3, "012", "012"); 676 // } 677 678 @Test 679 public void TestJ329() { 680 681 Object[] DATA = { 682 Boolean.FALSE, "a > b; c > d", 683 Boolean.TRUE, "a > b; no operator; c > d", 684 }; 685 686 for (int i=0; i<DATA.length; i+=2) { 687 String err = null; 688 try { 689 Transliterator.createFromRules("<ID>", 690 (String) DATA[i+1], 691 Transliterator.FORWARD); 692 } catch (IllegalArgumentException e) { 693 err = e.getMessage(); 694 } 695 boolean gotError = (err != null); 696 String desc = (String) DATA[i+1] + 697 (gotError ? (" -> error: " + err) : " -> no error"); 698 if ((err != null) == ((Boolean)DATA[i]).booleanValue()) { 699 logln("Ok: " + desc); 700 } else { 701 errln("FAIL: " + desc); 702 } 703 } 704 } 705 706 /** 707 * Test segments and segment references. 708 */ 709 @Test 710 public void TestSegments() { 711 // Array of 3n items 712 // Each item is <rules>, <input>, <expected output> 713 String[] DATA = { 714 "([a-z]) '.' ([0-9]) > $2 '-' $1", 715 "abc.123.xyz.456", 716 "ab1-c23.xy4-z56", 717 }; 718 719 for (int i=0; i<DATA.length; i+=3) { 720 logln("Pattern: " + Utility.escape(DATA[i])); 721 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 722 expect(t, DATA[i+1], DATA[i+2]); 723 } 724 } 725 726 /** 727 * Test cursor positioning outside of the key 728 */ 729 @Test 730 public void TestCursorOffset() { 731 // Array of 3n items 732 // Each item is <rules>, <input>, <expected output> 733 String[] DATA = { 734 "pre {alpha} post > | @ ALPHA ;" + 735 "eALPHA > beta ;" + 736 "pre {beta} post > BETA @@ | ;" + 737 "post > xyz", 738 739 "prealphapost prebetapost", 740 "prbetaxyz preBETApost", 741 }; 742 743 for (int i=0; i<DATA.length; i+=3) { 744 logln("Pattern: " + Utility.escape(DATA[i])); 745 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 746 expect(t, DATA[i+1], DATA[i+2]); 747 } 748 } 749 750 /** 751 * Test zero length and > 1 char length variable values. Test 752 * use of variable refs in UnicodeSets. 753 */ 754 @Test 755 public void TestArbitraryVariableValues() { 756 // Array of 3n items 757 // Each item is <rules>, <input>, <expected output> 758 String[] DATA = { 759 "$abe = ab;" + 760 "$pat = x[yY]z;" + 761 "$ll = 'a-z';" + 762 "$llZ = [$ll];" + 763 "$llY = [$ll$pat];" + 764 "$emp = ;" + 765 766 "$abe > ABE;" + 767 "$pat > END;" + 768 "$llZ > 1;" + 769 "$llY > 2;" + 770 "7$emp 8 > 9;" + 771 "", 772 773 "ab xYzxyz stY78", 774 "ABE ENDEND 1129", 775 }; 776 777 for (int i=0; i<DATA.length; i+=3) { 778 logln("Pattern: " + Utility.escape(DATA[i])); 779 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 780 expect(t, DATA[i+1], DATA[i+2]); 781 } 782 } 783 784 /** 785 * Confirm that the contextStart, contextLimit, start, and limit 786 * behave correctly. 787 */ 788 @Test 789 public void TestPositionHandling() { 790 // Array of 3n items 791 // Each item is <rules>, <input>, <expected output> 792 String[] DATA = { 793 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 794 "xtat txtb", // pos 0,9,0,9 795 "xTTaSS TTxUUb", 796 797 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 798 "xtat txtb", // pos 2,9,3,8 799 "xtaSS TTxUUb", 800 801 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 802 "xtat txtb", // pos 3,8,3,8 803 "xtaTT TTxTTb", 804 }; 805 806 // Array of 4n positions -- these go with the DATA array 807 // They are: contextStart, contextLimit, start, limit 808 int[] POS = { 809 0, 9, 0, 9, 810 2, 9, 3, 8, 811 3, 8, 3, 8, 812 }; 813 814 int n = DATA.length/3; 815 for (int i=0; i<n; i++) { 816 Transliterator t = Transliterator.createFromRules("<ID>", DATA[3*i], Transliterator.FORWARD); 817 Transliterator.Position pos = new Transliterator.Position( 818 POS[4*i], POS[4*i+1], POS[4*i+2], POS[4*i+3]); 819 ReplaceableString rsource = new ReplaceableString(DATA[3*i+1]); 820 t.transliterate(rsource, pos); 821 t.finishTransliteration(rsource, pos); 822 String result = rsource.toString(); 823 String exp = DATA[3*i+2]; 824 expectAux(Utility.escape(DATA[3*i]), 825 DATA[3*i+1], 826 result, 827 result.equals(exp), 828 exp); 829 } 830 } 831 832 /** 833 * Test the Hiragana-Katakana transliterator. 834 */ 835 @Test 836 public void TestHiraganaKatakana() { 837 Transliterator hk = Transliterator.getInstance("Hiragana-Katakana"); 838 Transliterator kh = Transliterator.getInstance("Katakana-Hiragana"); 839 840 // Array of 3n items 841 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana> 842 String[] DATA = { 843 "both", 844 "\u3042\u3090\u3099\u3092\u3050", 845 "\u30A2\u30F8\u30F2\u30B0", 846 847 "kh", 848 "\u307C\u3051\u3060\u3042\u3093\u30FC", 849 "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC", 850 }; 851 852 for (int i=0; i<DATA.length; i+=3) { 853 switch (DATA[i].charAt(0)) { 854 case 'h': // Hiragana-Katakana 855 expect(hk, DATA[i+1], DATA[i+2]); 856 break; 857 case 'k': // Katakana-Hiragana 858 expect(kh, DATA[i+2], DATA[i+1]); 859 break; 860 case 'b': // both 861 expect(hk, DATA[i+1], DATA[i+2]); 862 expect(kh, DATA[i+2], DATA[i+1]); 863 break; 864 } 865 } 866 867 } 868 869 @Test 870 public void TestCopyJ476() { 871 // This is a C++-only copy constructor test 872 } 873 874 /** 875 * Test inter-Indic transliterators. These are composed. 876 */ 877 @Test 878 public void TestInterIndic() { 879 String ID = "Devanagari-Gujarati"; 880 Transliterator dg = Transliterator.getInstance(ID); 881 if (dg == null) { 882 errln("FAIL: getInstance(" + ID + ") returned null"); 883 return; 884 } 885 String id = dg.getID(); 886 if (!id.equals(ID)) { 887 errln("FAIL: getInstance(" + ID + ").getID() => " + id); 888 } 889 String dev = "\u0901\u090B\u0925"; 890 String guj = "\u0A81\u0A8B\u0AA5"; 891 expect(dg, dev, guj); 892 } 893 894 /** 895 * Test filter syntax in IDs. (J23) 896 */ 897 @Test 898 public void TestFilterIDs() { 899 String[] DATA = { 900 "[aeiou]Any-Hex", // ID 901 "[aeiou]Hex-Any", // expected inverse ID 902 "quizzical", // src 903 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src) 904 905 "[aeiou]Any-Hex;[^5]Hex-Any", 906 "[^5]Any-Hex;[aeiou]Hex-Any", 907 "quizzical", 908 "q\\u0075izzical", 909 910 "[abc]Null", 911 "[abc]Null", 912 "xyz", 913 "xyz", 914 }; 915 916 for (int i=0; i<DATA.length; i+=4) { 917 String ID = DATA[i]; 918 Transliterator t = Transliterator.getInstance(ID); 919 expect(t, DATA[i+2], DATA[i+3]); 920 921 // Check the ID 922 if (!ID.equals(t.getID())) { 923 errln("FAIL: getInstance(" + ID + ").getID() => " + 924 t.getID()); 925 } 926 927 // Check the inverse 928 String uID = DATA[i+1]; 929 Transliterator u = t.getInverse(); 930 if (u == null) { 931 errln("FAIL: " + ID + ".getInverse() returned NULL"); 932 } else if (!u.getID().equals(uID)) { 933 errln("FAIL: " + ID + ".getInverse().getID() => " + 934 u.getID() + ", expected " + uID); 935 } 936 } 937 } 938 939 /** 940 * Test the case mapping transliterators. 941 */ 942 @Test 943 public void TestCaseMap() { 944 Transliterator toUpper = 945 Transliterator.getInstance("Any-Upper[^xyzXYZ]"); 946 Transliterator toLower = 947 Transliterator.getInstance("Any-Lower[^xyzXYZ]"); 948 Transliterator toTitle = 949 Transliterator.getInstance("Any-Title[^xyzXYZ]"); 950 951 expect(toUpper, "The quick brown fox jumped over the lazy dogs.", 952 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS."); 953 expect(toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.", 954 "the quick brown foX jumped over the lazY dogs."); 955 expect(toTitle, "the quick brown foX caN'T jump over the laZy dogs.", 956 "The Quick Brown FoX Can't Jump Over The LaZy Dogs."); 957 } 958 959 /** 960 * Test the name mapping transliterators. 961 */ 962 @Test 963 public void TestNameMap() { 964 Transliterator uni2name = 965 Transliterator.getInstance("Any-Name[^abc]"); 966 Transliterator name2uni = 967 Transliterator.getInstance("Name-Any"); 968 969 expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF", 970 "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}"); 971 expect(name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{", 972 "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{"); 973 974 // round trip 975 Transliterator t = Transliterator.getInstance("Any-Name;Name-Any"); 976 977 String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{"; 978 expect(t, s, s); 979 } 980 981 /** 982 * Test liberalized ID syntax. 1006c 983 */ 984 @Test 985 public void TestLiberalizedID() { 986 // Some test cases have an expected getID() value of NULL. This 987 // means I have disabled the test case for now. This stuff is 988 // still under development, and I haven't decided whether to make 989 // getID() return canonical case yet. It will all get rewritten 990 // with the move to Source-Target/Variant IDs anyway. [aliu] 991 String DATA[] = { 992 "latin-greek", null /*"Latin-Greek"*/, "case insensitivity", 993 " Null ", "Null", "whitespace", 994 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter", 995 " null ; latin-greek ", null /*"Null;Latin-Greek"*/, "compound whitespace", 996 }; 997 998 for (int i=0; i<DATA.length; i+=3) { 999 try { 1000 Transliterator t = Transliterator.getInstance(DATA[i]); 1001 if (DATA[i+1] == null || DATA[i+1].equals(t.getID())) { 1002 logln("Ok: " + DATA[i+2] + 1003 " create ID \"" + DATA[i] + "\" => \"" + 1004 t.getID() + "\""); 1005 } else { 1006 errln("FAIL: " + DATA[i+2] + 1007 " create ID \"" + DATA[i] + "\" => \"" + 1008 t.getID() + "\", exp \"" + DATA[i+1] + "\""); 1009 } 1010 } catch (IllegalArgumentException e) { 1011 errln("FAIL: " + DATA[i+2] + 1012 " create ID \"" + DATA[i] + "\""); 1013 } 1014 } 1015 } 1016 1017 @Test 1018 public void TestCreateInstance() { 1019 String FORWARD = "F"; 1020 String REVERSE = "R"; 1021 String DATA[] = { 1022 // Column 1: id 1023 // Column 2: direction 1024 // Column 3: expected ID, or "" if expect failure 1025 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912 1026 1027 // JB#2689: bad compound causes crash 1028 "InvalidSource-InvalidTarget", FORWARD, "", 1029 "InvalidSource-InvalidTarget", REVERSE, "", 1030 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "", 1031 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "", 1032 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "", 1033 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "", 1034 1035 null 1036 }; 1037 1038 for (int i=0; DATA[i]!=null; i+=3) { 1039 String id=DATA[i]; 1040 int dir = (DATA[i+1]==FORWARD)? 1041 Transliterator.FORWARD:Transliterator.REVERSE; 1042 String expID=DATA[i+2]; 1043 Exception e = null; 1044 Transliterator t; 1045 try { 1046 t = Transliterator.getInstance(id,dir); 1047 } catch (Exception e1) { 1048 e = e1; 1049 t = null; 1050 } 1051 String newID = (t!=null)?t.getID():""; 1052 boolean ok = (newID.equals(expID)); 1053 if (t==null) { 1054 newID = e.getMessage(); 1055 } 1056 if (ok) { 1057 logln("Ok: createInstance(" + 1058 id + "," + DATA[i+1] + ") => " + newID); 1059 } else { 1060 errln("FAIL: createInstance(" + 1061 id + "," + DATA[i+1] + ") => " + newID + 1062 ", expected " + expID); 1063 } 1064 } 1065 } 1066 1067 /** 1068 * Test the normalization transliterator. 1069 */ 1070 @Test 1071 public void TestNormalizationTransliterator() { 1072 // THE FOLLOWING TWO TABLES ARE COPIED FROM android.icu.dev.test.normalizer.BasicTest 1073 // PLEASE KEEP THEM IN SYNC WITH BasicTest. 1074 String[][] CANON = { 1075 // Input Decomposed Composed 1076 {"cat", "cat", "cat" }, 1077 {"\u00e0ardvark", "a\u0300ardvark", "\u00e0ardvark" }, 1078 1079 {"\u1e0a", "D\u0307", "\u1e0a" }, // D-dot_above 1080 {"D\u0307", "D\u0307", "\u1e0a" }, // D dot_above 1081 1082 {"\u1e0c\u0307", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_below dot_above 1083 {"\u1e0a\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_above dot_below 1084 {"D\u0307\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D dot_below dot_above 1085 1086 {"\u1e10\u0307\u0323", "D\u0327\u0323\u0307","\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above 1087 {"D\u0307\u0328\u0323","D\u0328\u0323\u0307","\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below 1088 1089 {"\u1E14", "E\u0304\u0300", "\u1E14" }, // E-macron-grave 1090 {"\u0112\u0300", "E\u0304\u0300", "\u1E14" }, // E-macron + grave 1091 {"\u00c8\u0304", "E\u0300\u0304", "\u00c8\u0304" }, // E-grave + macron 1092 1093 {"\u212b", "A\u030a", "\u00c5" }, // angstrom_sign 1094 {"\u00c5", "A\u030a", "\u00c5" }, // A-ring 1095 1096 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated with 3.0 1097 {"\u00fd\uFB03n", "y\u0301\uFB03n", "\u00fd\uFB03n" }, //updated with 3.0 1098 1099 {"Henry IV", "Henry IV", "Henry IV" }, 1100 {"Henry \u2163", "Henry \u2163", "Henry \u2163" }, 1101 1102 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana) 1103 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten 1104 {"\uFF76\uFF9E", "\uFF76\uFF9E", "\uFF76\uFF9E" }, // hw_ka + hw_ten 1105 {"\u30AB\uFF9E", "\u30AB\uFF9E", "\u30AB\uFF9E" }, // ka + hw_ten 1106 {"\uFF76\u3099", "\uFF76\u3099", "\uFF76\u3099" }, // hw_ka + ten 1107 1108 {"A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" }, 1109 }; 1110 1111 String[][] COMPAT = { 1112 // Input Decomposed Composed 1113 {"\uFB4f", "\u05D0\u05DC", "\u05D0\u05DC" }, // Alef-Lamed vs. Alef, Lamed 1114 1115 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated for 3.0 1116 {"\u00fd\uFB03n", "y\u0301ffin", "\u00fdffin" }, // ffi ligature -> f + f + i 1117 1118 {"Henry IV", "Henry IV", "Henry IV" }, 1119 {"Henry \u2163", "Henry IV", "Henry IV" }, 1120 1121 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana) 1122 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten 1123 1124 {"\uFF76\u3099", "\u30AB\u3099", "\u30AC" }, // hw_ka + ten 1125 }; 1126 1127 Transliterator NFD = Transliterator.getInstance("NFD"); 1128 Transliterator NFC = Transliterator.getInstance("NFC"); 1129 for (int i=0; i<CANON.length; ++i) { 1130 String in = CANON[i][0]; 1131 String expd = CANON[i][1]; 1132 String expc = CANON[i][2]; 1133 expect(NFD, in, expd); 1134 expect(NFC, in, expc); 1135 } 1136 1137 Transliterator NFKD = Transliterator.getInstance("NFKD"); 1138 Transliterator NFKC = Transliterator.getInstance("NFKC"); 1139 for (int i=0; i<COMPAT.length; ++i) { 1140 String in = COMPAT[i][0]; 1141 String expkd = COMPAT[i][1]; 1142 String expkc = COMPAT[i][2]; 1143 expect(NFKD, in, expkd); 1144 expect(NFKC, in, expkc); 1145 } 1146 1147 Transliterator t = Transliterator.getInstance("NFD; [x]Remove"); 1148 expect(t, "\u010dx", "c\u030C"); 1149 } 1150 1151 /** 1152 * Test compound RBT rules. 1153 */ 1154 @Test 1155 public void TestCompoundRBT() { 1156 // Careful with spacing and ';' here: Phrase this exactly 1157 // as toRules() is going to return it. If toRules() changes 1158 // with regard to spacing or ';', then adjust this string. 1159 String rule = "::Hex-Any;\n" + 1160 "::Any-Lower;\n" + 1161 "a > '.A.';\n" + 1162 "b > '.B.';\n" + 1163 "::[^t]Any-Upper;"; 1164 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 1165 if (t == null) { 1166 errln("FAIL: createFromRules failed"); 1167 return; 1168 } 1169 expect(t, "\u0043at in the hat, bat on the mat", 1170 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t"); 1171 String r = t.toRules(true); 1172 if (r.equals(rule)) { 1173 logln("OK: toRules() => " + r); 1174 } else { 1175 errln("FAIL: toRules() => " + r + 1176 ", expected " + rule); 1177 } 1178 1179 // Now test toRules 1180 t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD); 1181 if (t == null) { 1182 errln("FAIL: createInstance failed"); 1183 return; 1184 } 1185 String exp = "::Greek-Latin;\n::Latin-Cyrillic;"; 1186 r = t.toRules(true); 1187 if (!r.equals(exp)) { 1188 errln("FAIL: toRules() => " + r + 1189 ", expected " + exp); 1190 } else { 1191 logln("OK: toRules() => " + r); 1192 } 1193 1194 // Round trip the result of toRules 1195 t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD); 1196 if (t == null) { 1197 errln("FAIL: createFromRules #2 failed"); 1198 return; 1199 } else { 1200 logln("OK: createFromRules(" + r + ") succeeded"); 1201 } 1202 1203 // Test toRules again 1204 r = t.toRules(true); 1205 if (!r.equals(exp)) { 1206 errln("FAIL: toRules() => " + r + 1207 ", expected " + exp); 1208 } else { 1209 logln("OK: toRules() => " + r); 1210 } 1211 1212 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform 1213 // to what the regenerated ID will look like. 1214 String id = "Upper(Lower);(NFKC)"; 1215 t = Transliterator.getInstance(id, Transliterator.FORWARD); 1216 if (t == null) { 1217 errln("FAIL: createInstance #2 failed"); 1218 return; 1219 } 1220 if (t.getID().equals(id)) { 1221 logln("OK: created " + id); 1222 } else { 1223 errln("FAIL: createInstance(" + id + 1224 ").getID() => " + t.getID()); 1225 } 1226 1227 Transliterator u = t.getInverse(); 1228 if (u == null) { 1229 errln("FAIL: createInverse failed"); 1230 return; 1231 } 1232 exp = "NFKC();Lower(Upper)"; 1233 if (u.getID().equals(exp)) { 1234 logln("OK: createInverse(" + id + ") => " + 1235 u.getID()); 1236 } else { 1237 errln("FAIL: createInverse(" + id + ") => " + 1238 u.getID()); 1239 } 1240 } 1241 1242 /** 1243 * Compound filter semantics were orginially not implemented 1244 * correctly. Originally, each component filter f(i) is replaced by 1245 * f'(i) = f(i) && g, where g is the filter for the compound 1246 * transliterator. 1247 * 1248 * From Mark: 1249 * 1250 * Suppose and I have a transliterator X. Internally X is 1251 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A]. 1252 * 1253 * The compound should convert all greek characters (through latin) to 1254 * cyrillic, then lowercase the result. The filter should say "don't 1255 * touch 'A' in the original". But because an intermediate result 1256 * happens to go through "A", the Greek Alpha gets hung up. 1257 */ 1258 @Test 1259 public void TestCompoundFilter() { 1260 Transliterator t = Transliterator.getInstance 1261 ("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD); 1262 t.setFilter(new UnicodeSet("[^A]")); 1263 1264 // Only the 'A' at index 1 should remain unchanged 1265 expect(t, 1266 CharsToUnicodeString("BA\\u039A\\u0391"), 1267 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1")); 1268 } 1269 1270 /** 1271 * Test the "Remove" transliterator. 1272 */ 1273 @Test 1274 public void TestRemove() { 1275 Transliterator t = Transliterator.getInstance("Remove[aeiou]"); 1276 expect(t, "The quick brown fox.", 1277 "Th qck brwn fx."); 1278 } 1279 1280 @Test 1281 public void TestToRules() { 1282 String RBT = "rbt"; 1283 String SET = "set"; 1284 String[] DATA = { 1285 RBT, 1286 "$a=\\u4E61; [$a] > A;", 1287 "[\\u4E61] > A;", 1288 1289 RBT, 1290 "$white=[[:Zs:][:Zl:]]; $white{a} > A;", 1291 "[[:Zs:][:Zl:]]{a} > A;", 1292 1293 SET, 1294 "[[:Zs:][:Zl:]]", 1295 "[[:Zs:][:Zl:]]", 1296 1297 SET, 1298 "[:Ps:]", 1299 "[:Ps:]", 1300 1301 SET, 1302 "[:L:]", 1303 "[:L:]", 1304 1305 SET, 1306 "[[:L:]-[A]]", 1307 "[[:L:]-[A]]", 1308 1309 SET, 1310 "[~[:Lu:][:Ll:]]", 1311 "[~[:Lu:][:Ll:]]", 1312 1313 SET, 1314 "[~[a-z]]", 1315 "[~[a-z]]", 1316 1317 RBT, 1318 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;", 1319 "[^[:Zs:]]{a} > A;", 1320 1321 RBT, 1322 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;", 1323 "[[a-z]-[:Zs:]]{a} > A;", 1324 1325 RBT, 1326 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;", 1327 "[[:Zs:]&[a-z]]{a} > A;", 1328 1329 RBT, 1330 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;", 1331 "[x[:Zs:]]{a} > A;", 1332 1333 RBT, 1334 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"+ 1335 "$macron = \\u0304 ;"+ 1336 "$evowel = [aeiouyAEIOUY] ;"+ 1337 "$iotasub = \\u0345 ;"+ 1338 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;", 1339 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;", 1340 1341 RBT, 1342 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1343 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1344 }; 1345 1346 for (int d=0; d < DATA.length; d+=3) { 1347 if (DATA[d] == RBT) { 1348 // Transliterator test 1349 Transliterator t = Transliterator.createFromRules("ID", 1350 DATA[d+1], Transliterator.FORWARD); 1351 if (t == null) { 1352 errln("FAIL: createFromRules failed"); 1353 return; 1354 } 1355 String rules, escapedRules; 1356 rules = t.toRules(false); 1357 escapedRules = t.toRules(true); 1358 String expRules = Utility.unescape(DATA[d+2]); 1359 String expEscapedRules = DATA[d+2]; 1360 if (rules.equals(expRules)) { 1361 logln("Ok: " + DATA[d+1] + 1362 " => " + Utility.escape(rules)); 1363 } else { 1364 errln("FAIL: " + DATA[d+1] + 1365 " => " + Utility.escape(rules + ", exp " + expRules)); 1366 } 1367 if (escapedRules.equals(expEscapedRules)) { 1368 logln("Ok: " + DATA[d+1] + 1369 " => " + escapedRules); 1370 } else { 1371 errln("FAIL: " + DATA[d+1] + 1372 " => " + escapedRules + ", exp " + expEscapedRules); 1373 } 1374 1375 } else { 1376 // UnicodeSet test 1377 String pat = DATA[d+1]; 1378 String expToPat = DATA[d+2]; 1379 UnicodeSet set = new UnicodeSet(pat); 1380 1381 // Adjust spacing etc. as necessary. 1382 String toPat; 1383 toPat = set.toPattern(true); 1384 if (expToPat.equals(toPat)) { 1385 logln("Ok: " + pat + 1386 " => " + toPat); 1387 } else { 1388 errln("FAIL: " + pat + 1389 " => " + Utility.escape(toPat) + 1390 ", exp " + Utility.escape(pat)); 1391 } 1392 } 1393 } 1394 } 1395 1396 @Test 1397 public void TestContext() { 1398 Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l 1399 1400 expect("de > x; {d}e > y;", 1401 "de", 1402 "ye", 1403 pos); 1404 1405 expect("ab{c} > z;", 1406 "xadabdabcy", 1407 "xadabdabzy"); 1408 } 1409 1410 static final String CharsToUnicodeString(String s) { 1411 return Utility.unescape(s); 1412 } 1413 1414 @Test 1415 public void TestSupplemental() { 1416 1417 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" + 1418 "a > $a; $s > i;"), 1419 CharsToUnicodeString("ab\\U0001030Fx"), 1420 CharsToUnicodeString("\\U00010300bix")); 1421 1422 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" + 1423 "$b=[A-Z\\U00010400-\\U0001044D];" + 1424 "($a)($b) > $2 $1;"), 1425 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"), 1426 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301")); 1427 1428 // k|ax\\U00010300xm 1429 1430 // k|a\\U00010400\\U00010300xm 1431 // ky|\\U00010400\\U00010300xm 1432 // ky\\U00010400|\\U00010300xm 1433 1434 // ky\\U00010400|\\U00010300\\U00010400m 1435 // ky\\U00010400y|\\U00010400m 1436 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" + 1437 "$a {x} > | @ \\U00010400;" + 1438 "{$a} [^\\u0000-\\uFFFF] > y;"), 1439 CharsToUnicodeString("kax\\U00010300xm"), 1440 CharsToUnicodeString("ky\\U00010400y\\U00010400m")); 1441 1442 expect(Transliterator.getInstance("Any-Name"), 1443 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"), 1444 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"); 1445 1446 expect(Transliterator.getInstance("Name-Any"), 1447 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}", 1448 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0")); 1449 1450 expect(Transliterator.getInstance("Any-Hex/Unicode"), 1451 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1452 "U+10330U+10FF00U+E0061U+00A0"); 1453 1454 expect(Transliterator.getInstance("Any-Hex/C"), 1455 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1456 "\\U00010330\\U0010FF00\\U000E0061\\u00A0"); 1457 1458 expect(Transliterator.getInstance("Any-Hex/Perl"), 1459 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1460 "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"); 1461 1462 expect(Transliterator.getInstance("Any-Hex/Java"), 1463 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1464 "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"); 1465 1466 expect(Transliterator.getInstance("Any-Hex/XML"), 1467 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1468 "𐌰􏼀󠁡 "); 1469 1470 expect(Transliterator.getInstance("Any-Hex/XML10"), 1471 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1472 "𐌰􏼀󠁡 "); 1473 1474 expect(Transliterator.getInstance("[\\U000E0000-\\U000E0FFF] Remove"), 1475 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1476 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0")); 1477 } 1478 1479 @Test 1480 public void TestQuantifier() { 1481 1482 // Make sure @ in a quantified anteContext works 1483 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';", 1484 "AAAAAb", 1485 "aaa(aac)"); 1486 1487 // Make sure @ in a quantified postContext works 1488 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';", 1489 "baaaaa", 1490 "caa(aaa)"); 1491 1492 // Make sure @ in a quantified postContext with seg ref works 1493 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';", 1494 "baaaaa", 1495 "baa(aaa)"); 1496 1497 // Make sure @ past ante context doesn't enter ante context 1498 Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5); 1499 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';", 1500 "xxxab", 1501 "xxx(ac)", 1502 pos); 1503 1504 // Make sure @ past post context doesn't pass limit 1505 Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2); 1506 expect("{b} a+ > c @@ |; x > y; a > A;", 1507 "baxx", 1508 "caxx", 1509 pos2); 1510 1511 // Make sure @ past post context doesn't enter post context 1512 expect("{b} a+ > c @@ |; x > y; a > A;", 1513 "baxx", 1514 "cayy"); 1515 1516 expect("(ab)? c > d;", 1517 "c abc ababc", 1518 "d d abd"); 1519 1520 // NOTE: The (ab)+ when referenced just yields a single "ab", 1521 // not the full sequence of them. This accords with perl behavior. 1522 expect("(ab)+ {x} > '(' $1 ')';", 1523 "x abx ababxy", 1524 "x ab(ab) abab(ab)y"); 1525 1526 expect("b+ > x;", 1527 "ac abc abbc abbbc", 1528 "ac axc axc axc"); 1529 1530 expect("[abc]+ > x;", 1531 "qac abrc abbcs abtbbc", 1532 "qx xrx xs xtx"); 1533 1534 expect("q{(ab)+} > x;", 1535 "qa qab qaba qababc qaba", 1536 "qa qx qxa qxc qxa"); 1537 1538 expect("q(ab)* > x;", 1539 "qa qab qaba qababc", 1540 "xa x xa xc"); 1541 1542 // NOTE: The (ab)+ when referenced just yields a single "ab", 1543 // not the full sequence of them. This accords with perl behavior. 1544 expect("q(ab)* > '(' $1 ')';", 1545 "qa qab qaba qababc", 1546 "()a (ab) (ab)a (ab)c"); 1547 1548 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire 1549 // quoted string 1550 expect("'ab'+ > x;", 1551 "bb ab ababb", 1552 "bb x xb"); 1553 1554 // $foo+ and $foo* -- the quantifier should apply to the entire 1555 // variable reference 1556 expect("$var = ab; $var+ > x;", 1557 "bb ab ababb", 1558 "bb x xb"); 1559 } 1560 1561 static class TestFact implements Transliterator.Factory { 1562 static class NameableNullTrans extends Transliterator { 1563 public NameableNullTrans(String id) { 1564 super(id, null); 1565 } 1566 protected void handleTransliterate(Replaceable text, 1567 Position offsets, boolean incremental) { 1568 offsets.start = offsets.limit; 1569 } 1570 } 1571 String id; 1572 public TestFact(String theID) { 1573 id = theID; 1574 } 1575 public Transliterator getInstance(String ignoredID) { 1576 return new NameableNullTrans(id); 1577 } 1578 } 1579 1580 @Test 1581 public void TestSTV() { 1582 Enumeration es = Transliterator.getAvailableSources(); 1583 for (int i=0; es.hasMoreElements(); ++i) { 1584 String source = (String) es.nextElement(); 1585 logln("" + i + ": " + source); 1586 if (source.length() == 0) { 1587 errln("FAIL: empty source"); 1588 continue; 1589 } 1590 Enumeration et = Transliterator.getAvailableTargets(source); 1591 for (int j=0; et.hasMoreElements(); ++j) { 1592 String target = (String) et.nextElement(); 1593 logln(" " + j + ": " + target); 1594 if (target.length() == 0) { 1595 errln("FAIL: empty target"); 1596 continue; 1597 } 1598 Enumeration ev = Transliterator.getAvailableVariants(source, target); 1599 for (int k=0; ev.hasMoreElements(); ++k) { 1600 String variant = (String) ev.nextElement(); 1601 if (variant.length() == 0) { 1602 logln(" " + k + ": <empty>"); 1603 } else { 1604 logln(" " + k + ": " + variant); 1605 } 1606 } 1607 } 1608 } 1609 1610 // Test registration 1611 String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 1612 String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 1613 String[] SOURCES = { null, "Seoridf", "Oewoir" }; 1614 for (int i=0; i<3; ++i) { 1615 Transliterator.registerFactory(IDS[i], new TestFact(IDS[i])); 1616 try { 1617 Transliterator t = Transliterator.getInstance(IDS[i]); 1618 if (t.getID().equals(IDS[i])) { 1619 logln("Ok: Registration/creation succeeded for ID " + 1620 IDS[i]); 1621 } else { 1622 errln("FAIL: Registration of ID " + 1623 IDS[i] + " creates ID " + t.getID()); 1624 } 1625 Transliterator.unregister(IDS[i]); 1626 try { 1627 t = Transliterator.getInstance(IDS[i]); 1628 errln("FAIL: Unregistration failed for ID " + 1629 IDS[i] + "; still receiving ID " + t.getID()); 1630 } catch (IllegalArgumentException e2) { 1631 // Good; this is what we expect 1632 logln("Ok; Unregistered " + IDS[i]); 1633 } 1634 } catch (IllegalArgumentException e) { 1635 errln("FAIL: Registration/creation failed for ID " + 1636 IDS[i]); 1637 } finally { 1638 Transliterator.unregister(IDS[i]); 1639 } 1640 } 1641 1642 // Make sure getAvailable API reflects removal 1643 for (Enumeration e = Transliterator.getAvailableIDs(); 1644 e.hasMoreElements(); ) { 1645 String id = (String) e.nextElement(); 1646 for (int i=0; i<3; ++i) { 1647 if (id.equals(FULL_IDS[i])) { 1648 errln("FAIL: unregister(" + id + ") failed"); 1649 } 1650 } 1651 } 1652 for (Enumeration e = Transliterator.getAvailableTargets("Any"); 1653 e.hasMoreElements(); ) { 1654 String t = (String) e.nextElement(); 1655 if (t.equals(IDS[0])) { 1656 errln("FAIL: unregister(Any-" + t + ") failed"); 1657 } 1658 } 1659 for (Enumeration e = Transliterator.getAvailableSources(); 1660 e.hasMoreElements(); ) { 1661 String s = (String) e.nextElement(); 1662 for (int i=0; i<3; ++i) { 1663 if (SOURCES[i] == null) continue; 1664 if (s.equals(SOURCES[i])) { 1665 errln("FAIL: unregister(" + s + "-*) failed"); 1666 } 1667 } 1668 } 1669 } 1670 1671 /** 1672 * Test inverse of Greek-Latin; Title() 1673 */ 1674 @Test 1675 public void TestCompoundInverse() { 1676 Transliterator t = Transliterator.getInstance 1677 ("Greek-Latin; Title()", Transliterator.REVERSE); 1678 if (t == null) { 1679 errln("FAIL: createInstance"); 1680 return; 1681 } 1682 String exp = "(Title);Latin-Greek"; 1683 if (t.getID().equals(exp)) { 1684 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" + 1685 t.getID()); 1686 } else { 1687 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" + 1688 t.getID() + "\", expected \"" + exp + "\""); 1689 } 1690 } 1691 1692 /** 1693 * Test NFD chaining with RBT 1694 */ 1695 @Test 1696 public void TestNFDChainRBT() { 1697 Transliterator t = Transliterator.createFromRules( 1698 "TEST", "::NFD; aa > Q; a > q;", 1699 Transliterator.FORWARD); 1700 logln(t.toRules(true)); 1701 expect(t, "aa", "Q"); 1702 } 1703 1704 /** 1705 * Inverse of "Null" should be "Null". (J21) 1706 */ 1707 @Test 1708 public void TestNullInverse() { 1709 Transliterator t = Transliterator.getInstance("Null"); 1710 Transliterator u = t.getInverse(); 1711 if (!u.getID().equals("Null")) { 1712 errln("FAIL: Inverse of Null should be Null"); 1713 } 1714 } 1715 1716 /** 1717 * Check ID of inverse of alias. (J22) 1718 */ 1719 @Test 1720 public void TestAliasInverseID() { 1721 String ID = "Latin-Hangul"; // This should be any alias ID with an inverse 1722 Transliterator t = Transliterator.getInstance(ID); 1723 Transliterator u = t.getInverse(); 1724 String exp = "Hangul-Latin"; 1725 String got = u.getID(); 1726 if (!got.equals(exp)) { 1727 errln("FAIL: Inverse of " + ID + " is " + got + 1728 ", expected " + exp); 1729 } 1730 } 1731 1732 /** 1733 * Test IDs of inverses of compound transliterators. (J20) 1734 */ 1735 @Test 1736 public void TestCompoundInverseID() { 1737 String ID = "Latin-Jamo;NFC(NFD)"; 1738 Transliterator t = Transliterator.getInstance(ID); 1739 Transliterator u = t.getInverse(); 1740 String exp = "NFD(NFC);Jamo-Latin"; 1741 String got = u.getID(); 1742 if (!got.equals(exp)) { 1743 errln("FAIL: Inverse of " + ID + " is " + got + 1744 ", expected " + exp); 1745 } 1746 } 1747 1748 /** 1749 * Test undefined variable. 1750 */ 1751 @Test 1752 public void TestUndefinedVariable() { 1753 String rule = "$initial } a <> \u1161;"; 1754 try { 1755 Transliterator.createFromRules("<ID>", rule,Transliterator.FORWARD); 1756 } catch (IllegalArgumentException e) { 1757 logln("OK: Got exception for " + rule + ", as expected: " + 1758 e.getMessage()); 1759 return; 1760 } 1761 errln("Fail: bogus rule " + rule + " compiled without error"); 1762 } 1763 1764 /** 1765 * Test empty context. 1766 */ 1767 @Test 1768 public void TestEmptyContext() { 1769 expect(" { a } > b;", "xay a ", "xby b "); 1770 } 1771 1772 /** 1773 * Test compound filter ID syntax 1774 */ 1775 @Test 1776 public void TestCompoundFilterID() { 1777 String[] DATA = { 1778 // Col. 1 = ID or rule set (latter must start with #) 1779 1780 // = columns > 1 are null if expect col. 1 to be illegal = 1781 1782 // Col. 2 = direction, "F..." or "R..." 1783 // Col. 3 = source string 1784 // Col. 4 = exp result 1785 1786 "[abc]; [abc]", null, null, null, // multiple filters 1787 "Latin-Greek; [abc];", null, null, null, // misplaced filter 1788 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\u0392c", 1789 "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])", "R", "\u0391\u0392\u0393", "\u0391b\u0393", 1790 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\u0392c", 1791 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);", "R", "\u0391\u0392\u0393", "\u0391b\u0393", 1792 }; 1793 1794 for (int i=0; i<DATA.length; i+=4) { 1795 String id = DATA[i]; 1796 int direction = (DATA[i+1] != null && DATA[i+1].charAt(0) == 'R') ? 1797 Transliterator.REVERSE : Transliterator.FORWARD; 1798 String source = DATA[i+2]; 1799 String exp = DATA[i+3]; 1800 boolean expOk = (DATA[i+1] != null); 1801 Transliterator t = null; 1802 IllegalArgumentException e = null; 1803 try { 1804 if (id.charAt(0) == '#') { 1805 t = Transliterator.createFromRules("ID", id, direction); 1806 } else { 1807 t = Transliterator.getInstance(id, direction); 1808 } 1809 } catch (IllegalArgumentException ee) { 1810 e = ee; 1811 } 1812 boolean ok = (t != null && e == null); 1813 if (ok == expOk) { 1814 logln("Ok: " + id + " => " + t + 1815 (e != null ? (", " + e.getMessage()) : "")); 1816 if (source != null) { 1817 expect(t, source, exp); 1818 } 1819 } else { 1820 errln("FAIL: " + id + " => " + t + 1821 (e != null ? (", " + e.getMessage()) : "")); 1822 } 1823 } 1824 } 1825 1826 /** 1827 * Test new property set syntax 1828 */ 1829 @Test 1830 public void TestPropertySet() { 1831 expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx"); 1832 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9", 1833 "[ a stitch ]\n[ in time ]\r[ saves 9]"); 1834 } 1835 1836 /** 1837 * Test various failure points of the new 2.0 engine. 1838 */ 1839 @Test 1840 public void TestNewEngine() { 1841 Transliterator t = Transliterator.getInstance("Latin-Hiragana"); 1842 // Katakana should be untouched 1843 expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2"); 1844 1845 if (true) { 1846 // This test will only work if Transliterator.ROLLBACK is 1847 // true. Otherwise, this test will fail, revealing a 1848 // limitation of global filters in incremental mode. 1849 1850 Transliterator a = 1851 Transliterator.createFromRules("a_to_A", "a > A;", Transliterator.FORWARD); 1852 Transliterator A = 1853 Transliterator.createFromRules("A_to_b", "A > b;", Transliterator.FORWARD); 1854 1855 //Transliterator array[] = new Transliterator[] { 1856 // a, 1857 // Transliterator.getInstance("NFD"), 1858 // A }; 1859 //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]")); 1860 1861 try { 1862 Transliterator.registerInstance(a); 1863 Transliterator.registerInstance(A); 1864 1865 t = Transliterator.getInstance("[:Ll:];a_to_A;NFD;A_to_b"); 1866 expect(t, "aAaA", "bAbA"); 1867 1868 Transliterator[] u = t.getElements(); 1869 assertTrue("getElements().length", u.length == 3); 1870 assertEquals("getElements()[0]", u[0].getID(), "a_to_A"); 1871 assertEquals("getElements()[1]", u[1].getID(), "NFD"); 1872 assertEquals("getElements()[2]", u[2].getID(), "A_to_b"); 1873 1874 t = Transliterator.getInstance("a_to_A;NFD;A_to_b"); 1875 t.setFilter(new UnicodeSet("[:Ll:]")); 1876 expect(t, "aAaA", "bAbA"); 1877 } finally { 1878 Transliterator.unregister("a_to_A"); 1879 Transliterator.unregister("A_to_b"); 1880 } 1881 } 1882 1883 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;", 1884 "a", 1885 "ax"); 1886 1887 String gr = 1888 "$ddot = \u0308 ;" + 1889 "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;" + 1890 "$rough = \u0314 ;" + 1891 "($lcgvowel+ $ddot?) $rough > h | $1 ;" + 1892 "\u03b1 <> a ;" + 1893 "$rough <> h ;"; 1894 1895 expect(gr, "\u03B1\u0314", "ha"); 1896 } 1897 1898 /** 1899 * Test quantified segment behavior. We want: 1900 * ([abc])+ > x $1 x; applied to "cba" produces "xax" 1901 */ 1902 @Test 1903 public void TestQuantifiedSegment() { 1904 // The normal case 1905 expect("([abc]+) > x $1 x;", "cba", "xcbax"); 1906 1907 // The tricky case; the quantifier is around the segment 1908 expect("([abc])+ > x $1 x;", "cba", "xax"); 1909 1910 // Tricky case in reverse direction 1911 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax"); 1912 1913 // Check post-context segment 1914 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba"); 1915 1916 // Test toRule/toPattern for non-quantified segment. 1917 // Careful with spacing here. 1918 String r = "([a-c]){q} > x $1 x;"; 1919 Transliterator t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD); 1920 String rr = t.toRules(true); 1921 if (!r.equals(rr)) { 1922 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 1923 } else { 1924 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 1925 } 1926 1927 // Test toRule/toPattern for quantified segment. 1928 // Careful with spacing here. 1929 r = "([a-c])+{q} > x $1 x;"; 1930 t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD); 1931 rr = t.toRules(true); 1932 if (!r.equals(rr)) { 1933 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 1934 } else { 1935 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 1936 } 1937 } 1938 1939 //====================================================================== 1940 // Ram's tests 1941 //====================================================================== 1942 /* this test performs test of rules in ISO 15915 */ 1943 @Test 1944 public void TestDevanagariLatinRT(){ 1945 String[] source = { 1946 "bh\u0101rata", 1947 "kra", 1948 "k\u1E63a", 1949 "khra", 1950 "gra", 1951 "\u1E45ra", 1952 "cra", 1953 "chra", 1954 "j\u00F1a", 1955 "jhra", 1956 "\u00F1ra", 1957 "\u1E6Dya", 1958 "\u1E6Dhra", 1959 "\u1E0Dya", 1960 //"r\u0323ya", // \u095c is not valid in Devanagari 1961 "\u1E0Dhya", 1962 "\u1E5Bhra", 1963 "\u1E47ra", 1964 "tta", 1965 "thra", 1966 "dda", 1967 "dhra", 1968 "nna", 1969 "pra", 1970 "phra", 1971 "bra", 1972 "bhra", 1973 "mra", 1974 "\u1E49ra", 1975 //"l\u0331ra", 1976 "yra", 1977 "\u1E8Fra", 1978 //"l-", 1979 "vra", 1980 "\u015Bra", 1981 "\u1E63ra", 1982 "sra", 1983 "hma", 1984 "\u1E6D\u1E6Da", 1985 "\u1E6D\u1E6Dha", 1986 "\u1E6Dh\u1E6Dha", 1987 "\u1E0D\u1E0Da", 1988 "\u1E0D\u1E0Dha", 1989 "\u1E6Dya", 1990 "\u1E6Dhya", 1991 "\u1E0Dya", 1992 "\u1E0Dhya", 1993 // Not roundtrippable -- 1994 // \u0939\u094d\u094d\u092E - hma 1995 // \u0939\u094d\u092E - hma 1996 // CharsToUnicodeString("hma"), 1997 "hya", 1998 "\u015Br\u0325", 1999 "\u015Bca", 2000 "\u0115", 2001 "san\u0304j\u012Bb s\u0113nagupta", 2002 "\u0101nand vaddir\u0101ju", 2003 }; 2004 String[] expected = { 2005 "\u092D\u093E\u0930\u0924", /* bha\u0304rata */ 2006 "\u0915\u094D\u0930", /* kra */ 2007 "\u0915\u094D\u0937", /* ks\u0323a */ 2008 "\u0916\u094D\u0930", /* khra */ 2009 "\u0917\u094D\u0930", /* gra */ 2010 "\u0919\u094D\u0930", /* n\u0307ra */ 2011 "\u091A\u094D\u0930", /* cra */ 2012 "\u091B\u094D\u0930", /* chra */ 2013 "\u091C\u094D\u091E", /* jn\u0303a */ 2014 "\u091D\u094D\u0930", /* jhra */ 2015 "\u091E\u094D\u0930", /* n\u0303ra */ 2016 "\u091F\u094D\u092F", /* t\u0323ya */ 2017 "\u0920\u094D\u0930", /* t\u0323hra */ 2018 "\u0921\u094D\u092F", /* d\u0323ya */ 2019 //"\u095C\u094D\u092F", /* r\u0323ya */ // \u095c is not valid in Devanagari 2020 "\u0922\u094D\u092F", /* d\u0323hya */ 2021 "\u0922\u093C\u094D\u0930", /* r\u0323hra */ 2022 "\u0923\u094D\u0930", /* n\u0323ra */ 2023 "\u0924\u094D\u0924", /* tta */ 2024 "\u0925\u094D\u0930", /* thra */ 2025 "\u0926\u094D\u0926", /* dda */ 2026 "\u0927\u094D\u0930", /* dhra */ 2027 "\u0928\u094D\u0928", /* nna */ 2028 "\u092A\u094D\u0930", /* pra */ 2029 "\u092B\u094D\u0930", /* phra */ 2030 "\u092C\u094D\u0930", /* bra */ 2031 "\u092D\u094D\u0930", /* bhra */ 2032 "\u092E\u094D\u0930", /* mra */ 2033 "\u0929\u094D\u0930", /* n\u0331ra */ 2034 //"\u0934\u094D\u0930", /* l\u0331ra */ 2035 "\u092F\u094D\u0930", /* yra */ 2036 "\u092F\u093C\u094D\u0930", /* y\u0307ra */ 2037 //"l-", 2038 "\u0935\u094D\u0930", /* vra */ 2039 "\u0936\u094D\u0930", /* s\u0301ra */ 2040 "\u0937\u094D\u0930", /* s\u0323ra */ 2041 "\u0938\u094D\u0930", /* sra */ 2042 "\u0939\u094d\u092E", /* hma */ 2043 "\u091F\u094D\u091F", /* t\u0323t\u0323a */ 2044 "\u091F\u094D\u0920", /* t\u0323t\u0323ha */ 2045 "\u0920\u094D\u0920", /* t\u0323ht\u0323ha*/ 2046 "\u0921\u094D\u0921", /* d\u0323d\u0323a */ 2047 "\u0921\u094D\u0922", /* d\u0323d\u0323ha */ 2048 "\u091F\u094D\u092F", /* t\u0323ya */ 2049 "\u0920\u094D\u092F", /* t\u0323hya */ 2050 "\u0921\u094D\u092F", /* d\u0323ya */ 2051 "\u0922\u094D\u092F", /* d\u0323hya */ 2052 // "hma", /* hma */ 2053 "\u0939\u094D\u092F", /* hya */ 2054 "\u0936\u0943", /* s\u0301r\u0325a */ 2055 "\u0936\u094D\u091A", /* s\u0301ca */ 2056 "\u090d", /* e\u0306 */ 2057 "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924", 2058 "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941", 2059 }; 2060 2061 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD ); 2062 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD); 2063 2064 for(int i= 0; i<source.length; i++){ 2065 expect(latinToDev,(source[i]),(expected[i])); 2066 expect(devToLatin,(expected[i]),(source[i])); 2067 } 2068 2069 } 2070 @Test 2071 public void TestTeluguLatinRT(){ 2072 String[] source = { 2073 "raghur\u0101m vi\u015Bvan\u0101dha", /* Raghuram Viswanadha */ 2074 "\u0101nand vaddir\u0101ju", /* Anand Vaddiraju */ 2075 "r\u0101j\u012Bv ka\u015Barab\u0101da", /* Rajeev Kasarabada */ 2076 "san\u0304j\u012Bv ka\u015Barab\u0101da", /* sanjeev kasarabada */ 2077 "san\u0304j\u012Bb sen'gupta", /* sanjib sengupata */ 2078 "amar\u0113ndra hanum\u0101nula", /* Amarendra hanumanula */ 2079 "ravi kum\u0101r vi\u015Bvan\u0101dha", /* Ravi Kumar Viswanadha */ 2080 "\u0101ditya kandr\u0113gula", /* Aditya Kandregula */ 2081 "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di", /* Shridhar Kantamsetty */ 2082 "m\u0101dhav de\u015Be\u1E6D\u1E6Di" /* Madhav Desetty */ 2083 }; 2084 2085 String[] expected = { 2086 "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27", 2087 "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41", 2088 "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26", 2089 "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26", 2090 "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24", 2091 "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32", 2092 "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27", 2093 "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32", 2094 "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f", 2095 "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f", 2096 }; 2097 2098 2099 Transliterator latinToDev=Transliterator.getInstance("Latin-Telugu", Transliterator.FORWARD); 2100 Transliterator devToLatin=Transliterator.getInstance("Telugu-Latin", Transliterator.FORWARD); 2101 2102 for(int i= 0; i<source.length; i++){ 2103 expect(latinToDev,(source[i]),(expected[i])); 2104 expect(devToLatin,(expected[i]),(source[i])); 2105 } 2106 } 2107 2108 @Test 2109 public void TestSanskritLatinRT(){ 2110 int MAX_LEN =15; 2111 String[] source = { 2112 "rmk\u1E63\u0113t", 2113 "\u015Br\u012Bmad", 2114 "bhagavadg\u012Bt\u0101", 2115 "adhy\u0101ya", 2116 "arjuna", 2117 "vi\u1E63\u0101da", 2118 "y\u014Dga", 2119 "dhr\u0325tar\u0101\u1E63\u1E6Dra", 2120 "uv\u0101cr\u0325", 2121 "dharmak\u1E63\u0113tr\u0113", 2122 "kuruk\u1E63\u0113tr\u0113", 2123 "samav\u0113t\u0101", 2124 "yuyutsava\u1E25", 2125 "m\u0101mak\u0101\u1E25", 2126 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva", 2127 "kimakurvata", 2128 "san\u0304java", 2129 }; 2130 String[] expected = { 2131 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D", 2132 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d", 2133 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e", 2134 "\u0905\u0927\u094d\u092f\u093e\u092f", 2135 "\u0905\u0930\u094d\u091c\u0941\u0928", 2136 "\u0935\u093f\u0937\u093e\u0926", 2137 "\u092f\u094b\u0917", 2138 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930", 2139 "\u0909\u0935\u093E\u091A\u0943", 2140 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2141 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2142 "\u0938\u092e\u0935\u0947\u0924\u093e", 2143 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903", 2144 "\u092e\u093e\u092e\u0915\u093e\u0903", 2145 //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935", 2146 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924", 2147 "\u0938\u0902\u091c\u0935", 2148 }; 2149 2150 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD); 2151 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD); 2152 for(int i= 0; i<MAX_LEN; i++){ 2153 expect(latinToDev,(source[i]),(expected[i])); 2154 expect(devToLatin,(expected[i]),(source[i])); 2155 } 2156 } 2157 2158 @Test 2159 public void TestCompoundLatinRT(){ 2160 int MAX_LEN =15; 2161 String[] source = { 2162 "rmk\u1E63\u0113t", 2163 "\u015Br\u012Bmad", 2164 "bhagavadg\u012Bt\u0101", 2165 "adhy\u0101ya", 2166 "arjuna", 2167 "vi\u1E63\u0101da", 2168 "y\u014Dga", 2169 "dhr\u0325tar\u0101\u1E63\u1E6Dra", 2170 "uv\u0101cr\u0325", 2171 "dharmak\u1E63\u0113tr\u0113", 2172 "kuruk\u1E63\u0113tr\u0113", 2173 "samav\u0113t\u0101", 2174 "yuyutsava\u1E25", 2175 "m\u0101mak\u0101\u1E25", 2176 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva", 2177 "kimakurvata", 2178 "san\u0304java" 2179 }; 2180 String[] expected = { 2181 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D", 2182 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d", 2183 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e", 2184 "\u0905\u0927\u094d\u092f\u093e\u092f", 2185 "\u0905\u0930\u094d\u091c\u0941\u0928", 2186 "\u0935\u093f\u0937\u093e\u0926", 2187 "\u092f\u094b\u0917", 2188 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930", 2189 "\u0909\u0935\u093E\u091A\u0943", 2190 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2191 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2192 "\u0938\u092e\u0935\u0947\u0924\u093e", 2193 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903", 2194 "\u092e\u093e\u092e\u0915\u093e\u0903", 2195 // "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935", 2196 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924", 2197 "\u0938\u0902\u091c\u0935" 2198 }; 2199 2200 Transliterator latinToDevToLatin=Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD); 2201 Transliterator devToLatinToDev=Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD); 2202 for(int i= 0; i<MAX_LEN; i++){ 2203 expect(latinToDevToLatin,(source[i]),(source[i])); 2204 expect(devToLatinToDev,(expected[i]),(expected[i])); 2205 } 2206 } 2207 /** 2208 * Test Gurmukhi-Devanagari Tippi and Bindi 2209 */ 2210 @Test 2211 public void TestGurmukhiDevanagari(){ 2212 // the rule says: 2213 // (\u0902) (when preceded by vowel) ---> (\u0A02) 2214 // (\u0902) (when preceded by consonant) ---> (\u0A70) 2215 2216 UnicodeSet vowel =new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]"); 2217 UnicodeSet non_vowel =new UnicodeSet("[\u0915-\u0928\u092A-\u0930]"); 2218 2219 UnicodeSetIterator vIter = new UnicodeSetIterator(vowel); 2220 UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel); 2221 Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi"); 2222 StringBuffer src = new StringBuffer(" \u0902"); 2223 StringBuffer expect = new StringBuffer(" \u0A02"); 2224 while(vIter.next()){ 2225 src.setCharAt(0,(char) vIter.codepoint); 2226 expect.setCharAt(0,(char) (vIter.codepoint+0x0100)); 2227 expect(trans,src.toString(),expect.toString()); 2228 } 2229 2230 expect.setCharAt(1,'\u0A70'); 2231 while(nvIter.next()){ 2232 //src.setCharAt(0,(char) nvIter.codepoint); 2233 src.setCharAt(0,(char)nvIter.codepoint); 2234 expect.setCharAt(0,(char) (nvIter.codepoint+0x0100)); 2235 expect(trans,src.toString(),expect.toString()); 2236 } 2237 } 2238 /** 2239 * Test instantiation from a locale. 2240 */ 2241 @Test 2242 public void TestLocaleInstantiation() { 2243 Transliterator t; 2244 try{ 2245 t = Transliterator.getInstance("te_IN-Latin"); 2246 //expect(t, "\u0430", "a"); 2247 }catch(IllegalArgumentException ex){ 2248 warnln("Could not load locale data for obtaining the script used in the locale te_IN. "+ex.getMessage()); 2249 } 2250 try{ 2251 t = Transliterator.getInstance("ru_RU-Latin"); 2252 expect(t, "\u0430", "a"); 2253 }catch(IllegalArgumentException ex){ 2254 warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "+ex.getMessage()); 2255 } 2256 try{ 2257 t = Transliterator.getInstance("en-el"); 2258 expect(t, "a", "\u03B1"); 2259 }catch(IllegalArgumentException ex){ 2260 warnln("Could not load locale data for obtaining the script used in the locale el. "+ ex.getMessage()); 2261 } 2262 } 2263 2264 /** 2265 * Test title case handling of accent (should ignore accents) 2266 */ 2267 @Test 2268 public void TestTitleAccents() { 2269 Transliterator t = Transliterator.getInstance("Title"); 2270 expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe"); 2271 } 2272 2273 /** 2274 * Basic test of a locale resource based rule. 2275 */ 2276 @Test 2277 public void TestLocaleResource() { 2278 String DATA[] = { 2279 // id from to 2280 "Latin-Greek/UNGEGN", "b", "\u03bc\u03c0", 2281 "Latin-el", "b", "\u03bc\u03c0", 2282 "Latin-Greek", "b", "\u03B2", 2283 "Greek-Latin/UNGEGN", "\u03B2", "v", 2284 "el-Latin", "\u03B2", "v", 2285 "Greek-Latin", "\u03B2", "b", 2286 }; 2287 for (int i=0; i<DATA.length; i+=3) { 2288 Transliterator t = Transliterator.getInstance(DATA[i]); 2289 expect(t, DATA[i+1], DATA[i+2]); 2290 } 2291 } 2292 2293 /** 2294 * Make sure parse errors reference the right line. 2295 */ 2296 @Test 2297 public void TestParseError() { 2298 String rule = 2299 "a > b;\n" + 2300 "# more stuff\n" + 2301 "d << b;"; 2302 try { 2303 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2304 if(t!=null){ 2305 errln("FAIL: Did not get expected exception"); 2306 } 2307 } catch (IllegalArgumentException e) { 2308 String err = e.getMessage(); 2309 if (err.indexOf("d << b") >= 0) { 2310 logln("Ok: " + err); 2311 } else { 2312 errln("FAIL: " + err); 2313 } 2314 return; 2315 } 2316 errln("FAIL: no syntax error"); 2317 } 2318 2319 /** 2320 * Make sure sets on output are disallowed. 2321 */ 2322 @Test 2323 public void TestOutputSet() { 2324 String rule = "$set = [a-cm-n]; b > $set;"; 2325 Transliterator t = null; 2326 try { 2327 t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2328 if(t!=null){ 2329 errln("FAIL: Did not get the expected exception"); 2330 } 2331 } catch (IllegalArgumentException e) { 2332 logln("Ok: " + e.getMessage()); 2333 return; 2334 } 2335 errln("FAIL: No syntax error"); 2336 } 2337 2338 /** 2339 * Test the use variable range pragma, making sure that use of 2340 * variable range characters is detected and flagged as an error. 2341 */ 2342 @Test 2343 public void TestVariableRange() { 2344 String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;"; 2345 try { 2346 Transliterator t = 2347 Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2348 if(t!=null){ 2349 errln("FAIL: Did not get the expected exception"); 2350 } 2351 } catch (IllegalArgumentException e) { 2352 logln("Ok: " + e.getMessage()); 2353 return; 2354 } 2355 errln("FAIL: No syntax error"); 2356 } 2357 2358 /** 2359 * Test invalid post context error handling 2360 */ 2361 @Test 2362 public void TestInvalidPostContext() { 2363 try { 2364 Transliterator t = 2365 Transliterator.createFromRules("ID", "a}b{c>d;", Transliterator.FORWARD); 2366 if(t!=null){ 2367 errln("FAIL: Did not get the expected exception"); 2368 } 2369 } catch (IllegalArgumentException e) { 2370 String msg = e.getMessage(); 2371 if (msg.indexOf("a}b{c") >= 0) { 2372 logln("Ok: " + msg); 2373 } else { 2374 errln("FAIL: " + msg); 2375 } 2376 return; 2377 } 2378 errln("FAIL: No syntax error"); 2379 } 2380 2381 /** 2382 * Test ID form variants 2383 */ 2384 @Test 2385 public void TestIDForms() { 2386 String DATA[] = { 2387 "NFC", null, "NFD", 2388 "nfd", null, "NFC", // make sure case is ignored 2389 "Any-NFKD", null, "Any-NFKC", 2390 "Null", null, "Null", 2391 "-nfkc", "nfkc", "NFKD", 2392 "-nfkc/", "nfkc", "NFKD", 2393 "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN", 2394 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN", 2395 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali", 2396 "Source-", null, null, 2397 "Source/Variant-", null, null, 2398 "Source-/Variant", null, null, 2399 "/Variant", null, null, 2400 "/Variant-", null, null, 2401 "-/Variant", null, null, 2402 "-/", null, null, 2403 "-", null, null, 2404 "/", null, null, 2405 }; 2406 2407 for (int i=0; i<DATA.length; i+=3) { 2408 String ID = DATA[i]; 2409 String expID = DATA[i+1]; 2410 String expInvID = DATA[i+2]; 2411 boolean expValid = (expInvID != null); 2412 if (expID == null) { 2413 expID = ID; 2414 } 2415 try { 2416 Transliterator t = 2417 Transliterator.getInstance(ID); 2418 Transliterator u = t.getInverse(); 2419 if (t.getID().equals(expID) && 2420 u.getID().equals(expInvID)) { 2421 logln("Ok: " + ID + ".getInverse() => " + expInvID); 2422 } else { 2423 errln("FAIL: getInstance(" + ID + ") => " + 2424 t.getID() + " x getInverse() => " + u.getID() + 2425 ", expected " + expInvID); 2426 } 2427 } catch (IllegalArgumentException e) { 2428 if (!expValid) { 2429 logln("Ok: getInstance(" + ID + ") => " + e.getMessage()); 2430 } else { 2431 errln("FAIL: getInstance(" + ID + ") => " + e.getMessage()); 2432 } 2433 } 2434 } 2435 } 2436 2437 void checkRules(String label, Transliterator t2, String testRulesForward) { 2438 String rules2 = t2.toRules(true); 2439 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), ""); 2440 rules2 = TestUtility.replace(rules2, " ", ""); 2441 rules2 = TestUtility.replace(rules2, "\n", ""); 2442 rules2 = TestUtility.replace(rules2, "\r", ""); 2443 testRulesForward = TestUtility.replace(testRulesForward, " ", ""); 2444 2445 if (!rules2.equals(testRulesForward)) { 2446 errln(label); 2447 logln("GENERATED RULES: " + rules2); 2448 logln("SHOULD BE: " + testRulesForward); 2449 } 2450 } 2451 2452 /** 2453 * Mark's toRules test. 2454 */ 2455 @Test 2456 public void TestToRulesMark() { 2457 2458 String testRules = 2459 "::[[:Latin:][:Mark:]];" 2460 + "::NFKD (NFC);" 2461 + "::Lower (Lower);" 2462 + "a <> \\u03B1;" // alpha 2463 + "::NFKC (NFD);" 2464 + "::Upper (Lower);" 2465 + "::Lower ();" 2466 + "::([[:Greek:][:Mark:]]);" 2467 ; 2468 String testRulesForward = 2469 "::[[:Latin:][:Mark:]];" 2470 + "::NFKD(NFC);" 2471 + "::Lower(Lower);" 2472 + "a > \\u03B1;" 2473 + "::NFKC(NFD);" 2474 + "::Upper (Lower);" 2475 + "::Lower ();" 2476 ; 2477 String testRulesBackward = 2478 "::[[:Greek:][:Mark:]];" 2479 + "::Lower (Upper);" 2480 + "::NFD(NFKC);" 2481 + "\\u03B1 > a;" 2482 + "::Lower(Lower);" 2483 + "::NFC(NFKD);" 2484 ; 2485 String source = "\u00E1"; // a-acute 2486 String target = "\u03AC"; // alpha-acute 2487 2488 Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD); 2489 Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE); 2490 2491 expect(t2, source, target); 2492 expect(t3, target, source); 2493 2494 checkRules("Failed toRules FORWARD", t2, testRulesForward); 2495 checkRules("Failed toRules BACKWARD", t3, testRulesBackward); 2496 } 2497 2498 /** 2499 * Test Escape and Unescape transliterators. 2500 */ 2501 @Test 2502 public void TestEscape() { 2503 expect(Transliterator.getInstance("Hex-Any"), 2504 "\\x{40}\\U000000312Q", 2505 "@12Q"); 2506 expect(Transliterator.getInstance("Any-Hex/C"), 2507 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2508 "\\u0041\\U0010BEEF\\uFEED"); 2509 expect(Transliterator.getInstance("Any-Hex/Java"), 2510 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2511 "\\u0041\\uDBEF\\uDEEF\\uFEED"); 2512 expect(Transliterator.getInstance("Any-Hex/Perl"), 2513 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2514 "\\x{41}\\x{10BEEF}\\x{FEED}"); 2515 } 2516 2517 /** 2518 * Make sure display names of variants look reasonable. 2519 */ 2520 @Test 2521 public void TestDisplayName() { 2522 String DATA[] = { 2523 // ID, forward name, reverse name 2524 // Update the text as necessary -- the important thing is 2525 // not the text itself, but how various cases are handled. 2526 2527 // Basic test 2528 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any", 2529 2530 // Variants 2531 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl", 2532 2533 // Target-only IDs 2534 "NFC", "Any to NFC", "Any to NFD", 2535 }; 2536 2537 Locale US = Locale.US; 2538 2539 for (int i=0; i<DATA.length; i+=3) { 2540 String name = Transliterator.getDisplayName(DATA[i], US); 2541 if (!name.equals(DATA[i+1])) { 2542 errln("FAIL: " + DATA[i] + ".getDisplayName() => " + 2543 name + ", expected " + DATA[i+1]); 2544 } else { 2545 logln("Ok: " + DATA[i] + ".getDisplayName() => " + name); 2546 } 2547 Transliterator t = Transliterator.getInstance(DATA[i], Transliterator.REVERSE); 2548 name = Transliterator.getDisplayName(t.getID(), US); 2549 if (!name.equals(DATA[i+2])) { 2550 errln("FAIL: " + t.getID() + ".getDisplayName() => " + 2551 name + ", expected " + DATA[i+2]); 2552 } else { 2553 logln("Ok: " + t.getID() + ".getDisplayName() => " + name); 2554 } 2555 2556 // Cover getDisplayName(String) 2557 ULocale save = ULocale.getDefault(); 2558 ULocale.setDefault(ULocale.US); 2559 String name2 = Transliterator.getDisplayName(t.getID()); 2560 if (!name.equals(name2)) 2561 errln("FAIL: getDisplayName with default locale failed"); 2562 ULocale.setDefault(save); 2563 } 2564 } 2565 2566 /** 2567 * Test anchor masking 2568 */ 2569 @Test 2570 public void TestAnchorMasking() { 2571 String rule = "^a > Q; a > q;"; 2572 try { 2573 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2574 if(t==null){ 2575 errln("FAIL: Did not get the expected exception"); 2576 } 2577 } catch (IllegalArgumentException e) { 2578 errln("FAIL: " + rule + " => " + e); 2579 } 2580 } 2581 2582 /** 2583 * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java 2584 * during ICU4J modularization to remove dependency of tests on Transliterator. 2585 */ 2586 @Test 2587 public void TestScriptAllCodepoints(){ 2588 int code; 2589 HashSet scriptIdsChecked = new HashSet(); 2590 HashSet scriptAbbrsChecked = new HashSet(); 2591 for( int i =0; i <= 0x10ffff; i++){ 2592 code = UScript.getScript(i); 2593 if(code==UScript.INVALID_CODE){ 2594 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed"); 2595 } 2596 String id =UScript.getName(code); 2597 String abbr = UScript.getShortName(code); 2598 if (!scriptIdsChecked.contains(id)) { 2599 scriptIdsChecked.add(id); 2600 String newId ="[:"+id+":];NFD"; 2601 try{ 2602 Transliterator t = Transliterator.getInstance(newId); 2603 if(t==null){ 2604 errln("Failed to create transliterator for "+hex(i)+ 2605 " script code: " +id); 2606 } 2607 }catch(Exception e){ 2608 errln("Failed to create transliterator for "+hex(i) 2609 +" script code: " +id 2610 + " Exception: "+e.getMessage()); 2611 } 2612 } 2613 if (!scriptAbbrsChecked.contains(abbr)) { 2614 scriptAbbrsChecked.add(abbr); 2615 String newAbbrId ="[:"+abbr+":];NFD"; 2616 try{ 2617 Transliterator t = Transliterator.getInstance(newAbbrId); 2618 if(t==null){ 2619 errln("Failed to create transliterator for "+hex(i)+ 2620 " script code: " +abbr); 2621 } 2622 }catch(Exception e){ 2623 errln("Failed to create transliterator for "+hex(i) 2624 +" script code: " +abbr 2625 + " Exception: "+e.getMessage()); 2626 } 2627 } 2628 } 2629 } 2630 2631 2632 static final String[][] registerRules = { 2633 {"Any-Dev1", "x > X; y > Y;"}, 2634 {"Any-Dev2", "XY > Z"}, 2635 {"Greek-Latin/FAKE", 2636 "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "+ 2637 "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "+ 2638 "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "+ 2639 "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;" 2640 }, 2641 }; 2642 2643 static final String DESERET_DEE = UTF16.valueOf(0x10414); 2644 static final String DESERET_dee = UTF16.valueOf(0x1043C); 2645 2646 static final String[][] testCases = { 2647 2648 // NORMALIZATION 2649 // should add more test cases 2650 {"NFD" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2651 {"NFC" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2652 {"NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2653 {"NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2654 2655 // mp -> b BUG 2656 {"Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)"}, 2657 {"Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)"}, 2658 2659 // check for devanagari bug 2660 {"nfd;Dev1;Dev2;nfc", "xy", "Z"}, 2661 2662 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE 2663 {"Title", "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2664 "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee}, 2665 //TODO: enable this test once Titlecase works right 2666 //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2667 // "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee}, 2668 2669 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2670 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + DESERET_DEE + DESERET_DEE}, 2671 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2672 "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + DESERET_dee + DESERET_dee}, 2673 2674 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE}, 2675 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE}, 2676 2677 // FORMS OF S 2678 {"Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"}, 2679 {"Latin-Greek/UNGEGN", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"}, 2680 {"Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"}, 2681 {"Latin-Greek", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"}, 2682 2683 // Tatiana bug 2684 // Upper: TAT\u02B9\u00C2NA 2685 // Lower: tat\u02B9\u00E2na 2686 // Title: Tat\u02B9\u00E2na 2687 {"Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA"}, 2688 {"Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na"}, 2689 {"Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na"}, 2690 }; 2691 2692 @Test 2693 public void TestSpecialCases() { 2694 2695 for (int i = 0; i < registerRules.length; ++i) { 2696 Transliterator t = Transliterator.createFromRules(registerRules[i][0], 2697 registerRules[i][1], Transliterator.FORWARD); 2698 DummyFactory.add(registerRules[i][0], t); 2699 } 2700 for (int i = 0; i < testCases.length; ++i) { 2701 String name = testCases[i][0]; 2702 Transliterator t = Transliterator.getInstance(name); 2703 String id = t.getID(); 2704 String source = testCases[i][1]; 2705 String target = null; 2706 2707 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe) 2708 2709 if (testCases[i].length > 2) target = testCases[i][2]; 2710 else if (id.equalsIgnoreCase("NFD")) target = android.icu.text.Normalizer.normalize(source, android.icu.text.Normalizer.NFD); 2711 else if (id.equalsIgnoreCase("NFC")) target = android.icu.text.Normalizer.normalize(source, android.icu.text.Normalizer.NFC); 2712 else if (id.equalsIgnoreCase("NFKD")) target = android.icu.text.Normalizer.normalize(source, android.icu.text.Normalizer.NFKD); 2713 else if (id.equalsIgnoreCase("NFKC")) target = android.icu.text.Normalizer.normalize(source, android.icu.text.Normalizer.NFKC); 2714 else if (id.equalsIgnoreCase("Lower")) target = UCharacter.toLowerCase(Locale.US, source); 2715 else if (id.equalsIgnoreCase("Upper")) target = UCharacter.toUpperCase(Locale.US, source); 2716 2717 expect(t, source, target); 2718 } 2719 for (int i = 0; i < registerRules.length; ++i) { 2720 Transliterator.unregister(registerRules[i][0]); 2721 } 2722 } 2723 2724 // seems like there should be an easier way to just register an instance of a transliterator 2725 2726 static class DummyFactory implements Transliterator.Factory { 2727 static DummyFactory singleton = new DummyFactory(); 2728 static HashMap m = new HashMap(); 2729 2730 // Since Transliterators are immutable, we don't have to clone on set & get 2731 static void add(String ID, Transliterator t) { 2732 m.put(ID, t); 2733 //System.out.println("Registering: " + ID + ", " + t.toRules(true)); 2734 Transliterator.registerFactory(ID, singleton); 2735 } 2736 public Transliterator getInstance(String ID) { 2737 return (Transliterator) m.get(ID); 2738 } 2739 } 2740 2741 @Test 2742 public void TestCasing() { 2743 Transliterator toLower = Transliterator.getInstance("lower"); 2744 Transliterator toCasefold = Transliterator.getInstance("casefold"); 2745 Transliterator toUpper = Transliterator.getInstance("upper"); 2746 Transliterator toTitle = Transliterator.getInstance("title"); 2747 for (int i = 0; i < 0x600; ++i) { 2748 String s = UTF16.valueOf(i); 2749 2750 String lower = UCharacter.toLowerCase(ULocale.ROOT, s); 2751 assertEquals("Lowercase", lower, toLower.transform(s)); 2752 2753 String casefold = UCharacter.foldCase(s, true); 2754 assertEquals("Casefold", casefold, toCasefold.transform(s)); 2755 2756 String title = UCharacter.toTitleCase(ULocale.ROOT, s, null); 2757 assertEquals("Title", title, toTitle.transform(s)); 2758 2759 String upper = UCharacter.toUpperCase(ULocale.ROOT, s); 2760 assertEquals("Upper", upper, toUpper.transform(s)); 2761 } 2762 } 2763 2764 @Test 2765 public void TestSurrogateCasing () { 2766 // check that casing handles surrogates 2767 // titlecase is currently defective 2768 int dee = UTF16.charAt(DESERET_dee,0); 2769 int DEE = UCharacter.toTitleCase(dee); 2770 if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) { 2771 errln("Fails titlecase of surrogates" + Integer.toString(dee,16) + ", " + Integer.toString(DEE,16)); 2772 } 2773 2774 if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(DESERET_DEE + DESERET_DEE)) { 2775 errln("Fails uppercase of surrogates"); 2776 } 2777 2778 if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(DESERET_dee + DESERET_dee)) { 2779 errln("Fails lowercase of surrogates"); 2780 } 2781 } 2782 2783 // Check to see that incremental gets at least part way through a reasonable string. 2784 // TODO(junit): should be working - also should be converted to parameterized test 2785 @Ignore 2786 @Test 2787 public void TestIncrementalProgress() { 2788 String latinTest = "The Quick Brown Fox."; 2789 String devaTest = Transliterator.getInstance("Latin-Devanagari").transliterate(latinTest); 2790 String kataTest = Transliterator.getInstance("Latin-Katakana").transliterate(latinTest); 2791 String[][] tests = { 2792 {"Any", latinTest}, 2793 {"Latin", latinTest}, 2794 {"Halfwidth", latinTest}, 2795 {"Devanagari", devaTest}, 2796 {"Katakana", kataTest}, 2797 }; 2798 2799 Enumeration sources = Transliterator.getAvailableSources(); 2800 while(sources.hasMoreElements()) { 2801 String source = (String) sources.nextElement(); 2802 String test = findMatch(source, tests); 2803 if (test == null) { 2804 logln("Skipping " + source + "-X"); 2805 continue; 2806 } 2807 Enumeration targets = Transliterator.getAvailableTargets(source); 2808 while(targets.hasMoreElements()) { 2809 String target = (String) targets.nextElement(); 2810 Enumeration variants = Transliterator.getAvailableVariants(source, target); 2811 while(variants.hasMoreElements()) { 2812 String variant = (String) variants.nextElement(); 2813 String id = source + "-" + target + "/" + variant; 2814 logln("id: " + id); 2815 2816 Transliterator t = Transliterator.getInstance(id); 2817 CheckIncrementalAux(t, test); 2818 2819 String rev = t.transliterate(test); 2820 Transliterator inv = t.getInverse(); 2821 CheckIncrementalAux(inv, rev); 2822 } 2823 } 2824 } 2825 } 2826 2827 public String findMatch (String source, String[][] pairs) { 2828 for (int i = 0; i < pairs.length; ++i) { 2829 if (source.equalsIgnoreCase(pairs[i][0])) return pairs[i][1]; 2830 } 2831 return null; 2832 } 2833 2834 public void CheckIncrementalAux(Transliterator t, String input) { 2835 2836 Replaceable test = new ReplaceableString(input); 2837 Transliterator.Position pos = new Transliterator.Position(0, test.length(), 0, test.length()); 2838 t.transliterate(test, pos); 2839 boolean gotError = false; 2840 2841 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X? 2842 2843 if (pos.start == 0 && pos.limit != 0 && !t.getID().equals("Hex-Any/Unicode")) { 2844 errln("No Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos)); 2845 gotError = true; 2846 } else { 2847 logln("PASS Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos)); 2848 } 2849 t.finishTransliteration(test, pos); 2850 if (pos.start != pos.limit) { 2851 errln("Incomplete, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos)); 2852 gotError = true; 2853 } 2854 if(!gotError){ 2855 //errln("FAIL: Did not get expected error"); 2856 } 2857 } 2858 2859 @Test 2860 public void TestFunction() { 2861 // Careful with spacing and ';' here: Phrase this exactly 2862 // as toRules() is going to return it. If toRules() changes 2863 // with regard to spacing or ';', then adjust this string. 2864 String rule = 2865 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';"; 2866 2867 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2868 if (t == null) { 2869 errln("FAIL: createFromRules failed"); 2870 return; 2871 } 2872 2873 String r = t.toRules(true); 2874 if (r.equals(rule)) { 2875 logln("OK: toRules() => " + r); 2876 } else { 2877 errln("FAIL: toRules() => " + r + 2878 ", expected " + rule); 2879 } 2880 2881 expect(t, "The Quick Brown Fox", 2882 "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"); 2883 rule = 2884 "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;"; 2885 2886 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2887 if (t == null) { 2888 errln("FAIL: createFromRules failed"); 2889 return; 2890 } 2891 2892 r = t.toRules(true); 2893 if (r.equals(rule)) { 2894 logln("OK: toRules() => " + r); 2895 } else { 2896 errln("FAIL: toRules() => " + r + 2897 ", expected " + rule); 2898 } 2899 2900 expect(t, "\u0301", 2901 "U+0301 \\N{COMBINING ACUTE ACCENT}"); 2902 } 2903 2904 @Test 2905 public void TestInvalidBackRef() { 2906 String rule = ". > $1;"; 2907 String rule2 ="(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;"; 2908 try { 2909 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2910 if (t != null) { 2911 errln("FAIL: createFromRules should have returned NULL"); 2912 } 2913 errln("FAIL: Ok: . > $1; => no error"); 2914 Transliterator t2= Transliterator.createFromRules("Test2", rule2, Transliterator.FORWARD); 2915 if (t2 != null) { 2916 errln("FAIL: createFromRules should have returned NULL"); 2917 } 2918 errln("FAIL: Ok: . > $1; => no error"); 2919 } catch (IllegalArgumentException e) { 2920 logln("Ok: . > $1; => " + e.getMessage()); 2921 } 2922 } 2923 2924 @Test 2925 public void TestMulticharStringSet() { 2926 // Basic testing 2927 String rule = 2928 " [{aa}] > x;" + 2929 " a > y;" + 2930 " [b{bc}] > z;" + 2931 "[{gd}] { e > q;" + 2932 " e } [{fg}] > r;" ; 2933 2934 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2935 if (t == null) { 2936 errln("FAIL: createFromRules failed"); 2937 return; 2938 } 2939 2940 expect(t, "a aa ab bc d gd de gde gdefg ddefg", 2941 "y x yz z d gd de gdq gdqfg ddrfg"); 2942 2943 // Overlapped string test. Make sure that when multiple 2944 // strings can match that the longest one is matched. 2945 rule = 2946 " [a {ab} {abc}] > x;" + 2947 " b > y;" + 2948 " c > z;" + 2949 " q [t {st} {rst}] { e > p;" ; 2950 2951 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2952 if (t == null) { 2953 errln("FAIL: createFromRules failed"); 2954 return; 2955 } 2956 2957 expect(t, "a ab abc qte qste qrste", 2958 "x x x qtp qstp qrstp"); 2959 } 2960 2961 /** 2962 * Test that user-registered transliterators can be used under function 2963 * syntax. 2964 */ 2965 @Test 2966 public void TestUserFunction() { 2967 Transliterator t; 2968 2969 // There's no need to register inverses if we don't use them 2970 TestUserFunctionFactory.add("Any-gif", 2971 Transliterator.createFromRules("gif", 2972 "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';", 2973 Transliterator.FORWARD)); 2974 //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null")); 2975 2976 TestUserFunctionFactory.add("Any-RemoveCurly", 2977 Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ; \\\\N > ;", Transliterator.FORWARD)); 2978 //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null")); 2979 2980 logln("Trying &hex"); 2981 t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD); 2982 logln("Registering"); 2983 TestUserFunctionFactory.add("Any-hex2", t); 2984 t = Transliterator.getInstance("Any-hex2"); 2985 expect(t, "abc", "\\u0061\\u0062\\u0063"); 2986 2987 logln("Trying &gif"); 2988 t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD); 2989 logln("Registering"); 2990 TestUserFunctionFactory.add("Any-gif2", t); 2991 t = Transliterator.getInstance("Any-gif2"); 2992 expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" + 2993 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">"); 2994 2995 // Test that filters are allowed after & 2996 t = Transliterator.createFromRules("test", 2997 "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';", Transliterator.FORWARD); 2998 expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "); 2999 3000 // Unregister our test stuff 3001 TestUserFunctionFactory.unregister(); 3002 } 3003 3004 static class TestUserFunctionFactory implements Transliterator.Factory { 3005 static TestUserFunctionFactory singleton = new TestUserFunctionFactory(); 3006 static HashMap m = new HashMap(); 3007 3008 static void add(String ID, Transliterator t) { 3009 m.put(new CaseInsensitiveString(ID), t); 3010 Transliterator.registerFactory(ID, singleton); 3011 } 3012 3013 public Transliterator getInstance(String ID) { 3014 return (Transliterator) m.get(new CaseInsensitiveString(ID)); 3015 } 3016 3017 static void unregister() { 3018 Iterator ids = m.keySet().iterator(); 3019 while (ids.hasNext()) { 3020 CaseInsensitiveString id = (CaseInsensitiveString) ids.next(); 3021 Transliterator.unregister(id.getString()); 3022 ids.remove(); // removes pair from m 3023 } 3024 } 3025 } 3026 3027 /** 3028 * Test the Any-X transliterators. 3029 */ 3030 @Test 3031 public void TestAnyX() { 3032 Transliterator anyLatin = 3033 Transliterator.getInstance("Any-Latin", Transliterator.FORWARD); 3034 3035 expect(anyLatin, 3036 "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446", 3037 "greek:abkABK hiragana:abuku cyrillic:abc"); 3038 } 3039 3040 /** 3041 * Test Any-X transliterators with sample letters from all scripts. 3042 */ 3043 @Test 3044 public void TestAny() { 3045 UnicodeSet alphabetic = (UnicodeSet) new UnicodeSet("[:alphabetic:]").freeze(); 3046 StringBuffer testString = new StringBuffer(); 3047 for (int i = 0; i < UScript.CODE_LIMIT; ++i) { 3048 UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic); 3049 int count = 5; 3050 for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next();) { 3051 testString.append(it.getString()); 3052 if (--count < 0) break; 3053 } 3054 } 3055 logln("Sample set for Any-Latin: " + testString); 3056 Transliterator anyLatin = Transliterator.getInstance("any-Latn"); 3057 String result = anyLatin.transliterate(testString.toString()); 3058 logln("Sample result for Any-Latin: " + result); 3059 } 3060 3061 3062 /** 3063 * Test the source and target set API. These are only implemented 3064 * for RBT and CompoundTransliterator at this time. 3065 */ 3066 @Test 3067 public void TestSourceTargetSet() { 3068 // Rules 3069 String r = 3070 "a > b; " + 3071 "r [x{lu}] > q;"; 3072 3073 // Expected source 3074 UnicodeSet expSrc = new UnicodeSet("[arx{lu}]"); 3075 3076 // Expected target 3077 UnicodeSet expTrg = new UnicodeSet("[bq]"); 3078 3079 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD); 3080 UnicodeSet src = t.getSourceSet(); 3081 UnicodeSet trg = t.getTargetSet(); 3082 3083 if (src.equals(expSrc) && trg.equals(expTrg)) { 3084 logln("Ok: " + r + " => source = " + src.toPattern(true) + 3085 ", target = " + trg.toPattern(true)); 3086 } else { 3087 errln("FAIL: " + r + " => source = " + src.toPattern(true) + 3088 ", expected " + expSrc.toPattern(true) + 3089 "; target = " + trg.toPattern(true) + 3090 ", expected " + expTrg.toPattern(true)); 3091 } 3092 } 3093 3094 @Test 3095 public void TestSourceTargetSet2() { 3096 3097 3098 Normalizer2 nfc = Normalizer2.getNFCInstance(); 3099 Normalizer2 nfd = Normalizer2.getNFDInstance(); 3100 3101 // Normalizer2 nfkd = Normalizer2.getInstance(null, "nfkd", Mode.DECOMPOSE); 3102 // UnicodeSet nfkdSource = new UnicodeSet(); 3103 // UnicodeSet nfkdTarget = new UnicodeSet(); 3104 // for (int i = 0; i <= 0x10FFFF; ++i) { 3105 // if (nfkd.isInert(i)) { 3106 // continue; 3107 // } 3108 // nfkdSource.add(i); 3109 // String t = nfkd.getDecomposition(i); 3110 // if (t != null) { 3111 // nfkdTarget.addAll(t); 3112 // } else { 3113 // nfkdTarget.add(i); 3114 // } 3115 // } 3116 // nfkdSource.freeze(); 3117 // nfkdTarget.freeze(); 3118 // logln("NFKD Source: " + nfkdSource.toPattern(false)); 3119 // logln("NFKD Target: " + nfkdTarget.toPattern(false)); 3120 3121 UnicodeMap<UnicodeSet> leadToTrail = new UnicodeMap(); 3122 UnicodeMap<UnicodeSet> leadToSources = new UnicodeMap(); 3123 UnicodeSet nonStarters = new UnicodeSet("[:^ccc=0:]").freeze(); 3124 CanonicalIterator can = new CanonicalIterator(""); 3125 3126 UnicodeSet disorderedMarks = new UnicodeSet(); 3127 3128 for (int i = 0; i <= 0x10FFFF; ++i) { 3129 String s = nfd.getDecomposition(i); 3130 if (s == null) { 3131 continue; 3132 } 3133 3134 can.setSource(s); 3135 for (String t = can.next(); t != null; t = can.next()) { 3136 disorderedMarks.add(t); 3137 } 3138 3139 // if s has two code points, (or more), add the lead/trail information 3140 int first = s.codePointAt(0); 3141 int firstCount = Character.charCount(first); 3142 if (s.length() == firstCount) continue; 3143 String trailString = s.substring(firstCount); 3144 3145 // add all the trail characters 3146 if (!nonStarters.containsSome(trailString)) { 3147 continue; 3148 } 3149 UnicodeSet trailSet = leadToTrail.get(first); 3150 if (trailSet == null) { 3151 leadToTrail.put(first, trailSet = new UnicodeSet()); 3152 } 3153 trailSet.addAll(trailString); // add remaining trails 3154 3155 // add the sources 3156 UnicodeSet sourcesSet = leadToSources.get(first); 3157 if (sourcesSet == null) { 3158 leadToSources.put(first, sourcesSet = new UnicodeSet()); 3159 } 3160 sourcesSet.add(i); 3161 } 3162 3163 3164 for (Entry<String, UnicodeSet> x : leadToSources.entrySet()) { 3165 String lead = x.getKey(); 3166 UnicodeSet sources = x.getValue(); 3167 UnicodeSet trailSet = leadToTrail.get(lead); 3168 for (String source : sources) { 3169 for (String trail : trailSet) { 3170 can.setSource(source + trail); 3171 for (String t = can.next(); t != null; t = can.next()) { 3172 if (t.endsWith(trail)) continue; 3173 disorderedMarks.add(t); 3174 } 3175 } 3176 } 3177 } 3178 3179 3180 for (String s : nonStarters) { 3181 disorderedMarks.add("\u0345" + s); 3182 disorderedMarks.add(s+"\u0323"); 3183 String xx = nfc.normalize("\u01EC" + s); 3184 if (!xx.startsWith("\u01EC")) { 3185 logln("??"); 3186 } 3187 } 3188 3189 // for (int i = 0; i <= 0x10FFFF; ++i) { 3190 // String s = nfkd.getDecomposition(i); 3191 // if (s != null) { 3192 // disorderedMarks.add(s); 3193 // disorderedMarks.add(nfc.normalize(s)); 3194 // addDerivedStrings(nfc, disorderedMarks, s); 3195 // } 3196 // s = nfd.getDecomposition(i); 3197 // if (s != null) { 3198 // disorderedMarks.add(s); 3199 // } 3200 // if (!nfc.isInert(i)) { 3201 // if (i == 0x00C0) { 3202 // logln("\u00C0"); 3203 // } 3204 // can.setSource(s+"\u0334"); 3205 // for (String t = can.next(); t != null; t = can.next()) { 3206 // addDerivedStrings(nfc, disorderedMarks, t); 3207 // } 3208 // can.setSource(s+"\u0345"); 3209 // for (String t = can.next(); t != null; t = can.next()) { 3210 // addDerivedStrings(nfc, disorderedMarks, t); 3211 // } 3212 // can.setSource(s+"\u0323"); 3213 // for (String t = can.next(); t != null; t = can.next()) { 3214 // addDerivedStrings(nfc, disorderedMarks, t); 3215 // } 3216 // } 3217 // } 3218 logln("Test cases: " + disorderedMarks.size()); 3219 disorderedMarks.addAll(0,0x10FFFF).freeze(); 3220 logln("isInert \u0104 " + nfc.isInert('\u0104')); 3221 3222 Object[][] rules = { 3223 {":: [:sc=COMMON:] any-name;", null}, 3224 3225 {":: [:Greek:] hex-any/C;", null}, 3226 {":: [:Greek:] any-hex/C;", null}, 3227 3228 {":: [[:Mn:][:Me:]] remove;", null}, 3229 {":: [[:Mn:][:Me:]] null;", null}, 3230 3231 3232 {":: lower;", null}, 3233 {":: upper;", null}, 3234 {":: title;", null}, 3235 {":: CaseFold;", null}, 3236 3237 {":: NFD;", null}, 3238 {":: NFC;", null}, 3239 {":: NFKD;", null}, 3240 {":: NFKC;", null}, 3241 3242 {":: [[:Mn:][:Me:]] NFKD;", null}, 3243 {":: Latin-Greek;", null}, 3244 {":: [:Latin:] NFKD;", null}, 3245 {":: NFKD;", null}, 3246 {":: NFKD;\n" + 3247 ":: [[:Mn:][:Me:]] remove;\n" + 3248 ":: NFC;", null}, 3249 }; 3250 for (Object[] rulex : rules) { 3251 String rule = (String) rulex[0]; 3252 Transliterator trans = Transliterator.createFromRules("temp", rule, Transliterator.FORWARD); 3253 UnicodeSet actualSource = trans.getSourceSet(); 3254 UnicodeSet actualTarget = trans.getTargetSet(); 3255 UnicodeSet empiricalSource = new UnicodeSet(); 3256 UnicodeSet empiricalTarget = new UnicodeSet(); 3257 String ruleDisplay = rule.replace("\n", "\t\t"); 3258 UnicodeSet toTest = disorderedMarks; 3259 // if (rulex[1] != null) { 3260 // toTest = new UnicodeSet(disorderedMarks); 3261 // toTest.addAll((UnicodeSet) rulex[1]); 3262 // } 3263 3264 String test = nfd.normalize("\u0104"); 3265 boolean DEBUG = true; 3266 @SuppressWarnings("unused") 3267 int count = 0; // for debugging 3268 for (String s : toTest) { 3269 if (s.equals(test)) { 3270 logln(test); 3271 } 3272 String t = trans.transform(s); 3273 if (!s.equals(t)) { 3274 if (!isAtomic(s, t, trans)) { 3275 isAtomic(s, t, trans); 3276 continue; 3277 } 3278 3279 // only keep the part that changed; so skip the front and end. 3280 // int start = findSharedStartLength(s,t); 3281 // int end = findSharedEndLength(s,t); 3282 // if (start != 0 || end != 0) { 3283 // s = s.substring(start, s.length() - end); 3284 // t = t.substring(start, t.length() - end); 3285 // } 3286 if (DEBUG) { 3287 if (!actualSource.containsAll(s)) { 3288 count++; 3289 } 3290 if (!actualTarget.containsAll(t)) { 3291 count++; 3292 } 3293 } 3294 addSourceTarget(s, empiricalSource, t, empiricalTarget); 3295 } 3296 } 3297 assertEquals("getSource(" + ruleDisplay + ")", empiricalSource, actualSource, SetAssert.MISSING_OK); 3298 assertEquals("getTarget(" + ruleDisplay + ")", empiricalTarget, actualTarget, SetAssert.MISSING_OK); 3299 } 3300 } 3301 3302 @Test 3303 public void TestSourceTargetSetFilter() { 3304 String[][] tests = { 3305 // rules, expectedTarget-FORWARD, expectedTarget-REVERSE 3306 {"[] Latin-Greek", null, "[\']"}, 3307 {"::[] ; ::NFD ; ::NFKC ; :: ([]) ;"}, 3308 {"[] Any-Latin"}, 3309 {"[] casefold"}, 3310 {"[] NFKD;"}, 3311 {"[] NFKC;"}, 3312 {"[] hex"}, 3313 {"[] lower"}, 3314 {"[] null"}, 3315 {"[] remove"}, 3316 {"[] title"}, 3317 {"[] upper"}, 3318 }; 3319 UnicodeSet expectedSource = UnicodeSet.EMPTY; 3320 for (String[] testPair : tests) { 3321 String test = testPair[0]; 3322 Transliterator t0; 3323 try { 3324 t0 = Transliterator.getInstance(test); 3325 } catch (Exception e) { 3326 t0 = Transliterator.createFromRules("temp", test, Transliterator.FORWARD); 3327 } 3328 Transliterator t1; 3329 try { 3330 t1 = t0.getInverse(); 3331 } catch (Exception e) { 3332 t1 = Transliterator.createFromRules("temp", test, Transliterator.REVERSE); 3333 } 3334 int targetIndex = 0; 3335 for (Transliterator t : new Transliterator[]{t0, t1}) { 3336 boolean ok; 3337 UnicodeSet source = t.getSourceSet(); 3338 String direction = t == t0 ? "FORWARD\t" : "REVERSE\t"; 3339 targetIndex++; 3340 UnicodeSet expectedTarget = testPair.length <= targetIndex ? expectedSource 3341 : testPair[targetIndex] == null ? expectedSource 3342 : testPair[targetIndex].length() == 0 ? expectedSource 3343 : new UnicodeSet(testPair[targetIndex]); 3344 ok = assertEquals(direction + "getSource\t\"" + test + '"', expectedSource, source); 3345 if (!ok) { // for debugging 3346 source = t.getSourceSet(); 3347 } 3348 UnicodeSet target = t.getTargetSet(); 3349 ok = assertEquals(direction + "getTarget\t\"" + test + '"', expectedTarget, target); 3350 if (!ok) { // for debugging 3351 target = t.getTargetSet(); 3352 } 3353 } 3354 } 3355 } 3356 3357 private boolean isAtomic(String s, String t, Transliterator trans) { 3358 for (int i = 1; i < s.length(); ++i) { 3359 if (!CharSequences.onCharacterBoundary(s, i)) { 3360 continue; 3361 } 3362 String q = trans.transform(s.substring(0,i)); 3363 if (t.startsWith(q)) { 3364 String r = trans.transform(s.substring(i)); 3365 if (t.length() == q.length() + r.length() && t.endsWith(r)) { 3366 return false; 3367 } 3368 } 3369 } 3370 return true; 3371 // // make sure that every part is different 3372 // if (s.codePointCount(0, s.length()) > 1) { 3373 // int[] codePoints = It.codePoints(s); 3374 // for (int k = 0; k < codePoints.length; ++k) { 3375 // int pos = indexOf(t,codePoints[k]); 3376 // if (pos >= 0) { 3377 // int x; 3378 // } 3379 // } 3380 // if (s.contains("\u00C0")) { 3381 // logln("\u00C0"); 3382 // } 3383 // } 3384 } 3385 3386 private void addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget) { 3387 expectedSource.addAll(s); 3388 if (t.length() > 0) { 3389 expectedTarget.addAll(t); 3390 } 3391 } 3392 3393 // private void addDerivedStrings(Normalizer2 nfc, UnicodeSet disorderedMarks, String s) { 3394 // disorderedMarks.add(s); 3395 // for (int j = 1; j < s.length(); ++j) { 3396 // if (CharSequences.onCharacterBoundary(s, j)) { 3397 // String shorter = s.substring(0,j); 3398 // disorderedMarks.add(shorter); 3399 // disorderedMarks.add(nfc.normalize(shorter) + s.substring(j)); 3400 // } 3401 // } 3402 // } 3403 3404 @Test 3405 public void TestCharUtils() { 3406 String[][] startTests = { 3407 {"1", "a", "ab"}, 3408 {"0", "a", "xb"}, 3409 {"0", "\uD800", "\uD800\uDC01"}, 3410 {"1", "\uD800a", "\uD800b"}, 3411 {"0", "\uD800\uDC00", "\uD800\uDC01"}, 3412 }; 3413 for (String[] row : startTests) { 3414 int actual = findSharedStartLength(row[1], row[2]); 3415 assertEquals("findSharedStartLength(" + row[1] + "," + row[2] + ")", 3416 Integer.parseInt(row[0]), 3417 actual); 3418 } 3419 String[][] endTests = { 3420 {"0", "\uDC00", "\uD801\uDC00"}, 3421 {"1", "a", "ba"}, 3422 {"0", "a", "bx"}, 3423 {"1", "a\uDC00", "b\uDC00"}, 3424 {"0", "\uD800\uDC00", "\uD801\uDC00"}, 3425 }; 3426 for (String[] row : endTests) { 3427 int actual = findSharedEndLength(row[1], row[2]); 3428 assertEquals("findSharedEndLength(" + row[1] + "," + row[2] + ")", 3429 Integer.parseInt(row[0]), 3430 actual); 3431 } 3432 } 3433 3434 /** 3435 * @param s 3436 * @param t 3437 * @return 3438 */ 3439 // TODO make generally available 3440 private static int findSharedStartLength(CharSequence s, CharSequence t) { 3441 int min = Math.min(s.length(), t.length()); 3442 int i; 3443 char sch, tch; 3444 for (i = 0; i < min; ++i) { 3445 sch = s.charAt(i); 3446 tch = t.charAt(i); 3447 if (sch != tch) { 3448 break; 3449 } 3450 } 3451 return CharSequences.onCharacterBoundary(s,i) && CharSequences.onCharacterBoundary(t,i) ? i : i - 1; 3452 } 3453 3454 /** 3455 * @param s 3456 * @param t 3457 * @return 3458 */ 3459 // TODO make generally available 3460 private static int findSharedEndLength(CharSequence s, CharSequence t) { 3461 int slength = s.length(); 3462 int tlength = t.length(); 3463 int min = Math.min(slength, tlength); 3464 int i; 3465 char sch, tch; 3466 // TODO can make the calculations slightly faster... Not sure if it is worth the complication, tho' 3467 for (i = 0; i < min; ++i) { 3468 sch = s.charAt(slength - i - 1); 3469 tch = t.charAt(tlength - i - 1); 3470 if (sch != tch) { 3471 break; 3472 } 3473 } 3474 return CharSequences.onCharacterBoundary(s,slength - i) && CharSequences.onCharacterBoundary(t,tlength - i) ? i : i - 1; 3475 } 3476 3477 enum SetAssert {EQUALS, MISSING_OK, EXTRA_OK} 3478 3479 void assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert) { 3480 boolean haveError = false; 3481 if (!actual.containsAll(empirical)) { 3482 UnicodeSet missing = new UnicodeSet(empirical).removeAll(actual); 3483 errln(message + " \tgetXSet < empirical (" + missing.size() + "): " + toPattern(missing)); 3484 haveError = true; 3485 } 3486 if (!empirical.containsAll(actual)) { 3487 UnicodeSet extra = new UnicodeSet(actual).removeAll(empirical); 3488 logln("WARNING: " + message + " \tgetXSet > empirical (" + extra.size() + "): " + toPattern(extra)); 3489 haveError = true; 3490 } 3491 if (!haveError) { 3492 logln("OK " + message + ' ' + toPattern(empirical)); 3493 } 3494 } 3495 3496 private String toPattern(UnicodeSet missing) { 3497 String result = missing.toPattern(false); 3498 if (result.length() < 200) { 3499 return result; 3500 } 3501 return result.substring(0, CharSequences.onCharacterBoundary(result, 200) ? 200 : 199) + "\u2026"; 3502 } 3503 3504 3505 /** 3506 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet. 3507 */ 3508 @Test 3509 public void TestPatternWhitespace() { 3510 // Rules 3511 String r = "a > \u200E b;"; 3512 3513 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD); 3514 3515 expect(t, "a", "b"); 3516 3517 // UnicodeSet 3518 UnicodeSet set = new UnicodeSet("[a \u200E]"); 3519 3520 if (set.contains(0x200E)) { 3521 errln("FAIL: U+200E not being ignored by UnicodeSet"); 3522 } 3523 } 3524 3525 @Test 3526 public void TestAlternateSyntax() { 3527 // U+2206 == & 3528 // U+2190 == < 3529 // U+2192 == > 3530 // U+2194 == <> 3531 expect("a \u2192 x; b \u2190 y; c \u2194 z", 3532 "abc", 3533 "xbz"); 3534 expect("([:^ASCII:]) \u2192 \u2206Name($1);", 3535 "<=\u2190; >=\u2192; <>=\u2194; &=\u2206", 3536 "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"); 3537 } 3538 3539 @Test 3540 public void TestPositionAPI() { 3541 Transliterator.Position a = new Transliterator.Position(3,5,7,11); 3542 Transliterator.Position b = new Transliterator.Position(a); 3543 Transliterator.Position c = new Transliterator.Position(); 3544 c.set(a); 3545 // Call the toString() API: 3546 if (a.equals(b) && a.equals(c)) { 3547 logln("Ok: " + a + " == " + b + " == " + c); 3548 } else { 3549 errln("FAIL: " + a + " != " + b + " != " + c); 3550 } 3551 } 3552 3553 //====================================================================== 3554 // New tests for the ::BEGIN/::END syntax 3555 //====================================================================== 3556 3557 private static final String[] BEGIN_END_RULES = new String[] { 3558 // [0] 3559 "abc > xy;" 3560 + "aba > z;", 3561 3562 // [1] 3563 /* 3564 "::BEGIN;" 3565 + "abc > xy;" 3566 + "::END;" 3567 + "::BEGIN;" 3568 + "aba > z;" 3569 + "::END;", 3570 */ 3571 "", // test case commented out below, this is here to keep from messing up the indexes 3572 3573 // [2] 3574 /* 3575 "abc > xy;" 3576 + "::BEGIN;" 3577 + "aba > z;" 3578 + "::END;", 3579 */ 3580 "", // test case commented out below, this is here to keep from messing up the indexes 3581 3582 // [3] 3583 /* 3584 "::BEGIN;" 3585 + "abc > xy;" 3586 + "::END;" 3587 + "aba > z;", 3588 */ 3589 "", // test case commented out below, this is here to keep from messing up the indexes 3590 3591 // [4] 3592 "abc > xy;" 3593 + "::Null;" 3594 + "aba > z;", 3595 3596 // [5] 3597 "::Upper;" 3598 + "ABC > xy;" 3599 + "AB > x;" 3600 + "C > z;" 3601 + "::Upper;" 3602 + "XYZ > p;" 3603 + "XY > q;" 3604 + "Z > r;" 3605 + "::Upper;", 3606 3607 // [6] 3608 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3609 + "$delim = [\\-$ws];" 3610 + "$ws $delim* > ' ';" 3611 + "'-' $delim* > '-';", 3612 3613 // [7] 3614 "::Null;" 3615 + "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3616 + "$delim = [\\-$ws];" 3617 + "$ws $delim* > ' ';" 3618 + "'-' $delim* > '-';", 3619 3620 // [8] 3621 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3622 + "$delim = [\\-$ws];" 3623 + "$ws $delim* > ' ';" 3624 + "'-' $delim* > '-';" 3625 + "::Null;", 3626 3627 // [9] 3628 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3629 + "$delim = [\\-$ws];" 3630 + "::Null;" 3631 + "$ws $delim* > ' ';" 3632 + "'-' $delim* > '-';", 3633 3634 // [10] 3635 /* 3636 "::BEGIN;" 3637 + "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3638 + "$delim = [\\-$ws];" 3639 + "::END;" 3640 + "$ws $delim* > ' ';" 3641 + "'-' $delim* > '-';", 3642 */ 3643 "", // test case commented out below, this is here to keep from messing up the indexes 3644 3645 // [11] 3646 /* 3647 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3648 + "$delim = [\\-$ws];" 3649 + "::BEGIN;" 3650 + "$ws $delim* > ' ';" 3651 + "'-' $delim* > '-';" 3652 + "::END;", 3653 */ 3654 "", // test case commented out below, this is here to keep from messing up the indexes 3655 3656 // [12] 3657 /* 3658 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3659 + "$delim = [\\-$ws];" 3660 + "$ab = [ab];" 3661 + "::BEGIN;" 3662 + "$ws $delim* > ' ';" 3663 + "'-' $delim* > '-';" 3664 + "::END;" 3665 + "::BEGIN;" 3666 + "$ab { ' ' } $ab > '-';" 3667 + "c { ' ' > ;" 3668 + "::END;" 3669 + "::BEGIN;" 3670 + "'a-a' > a\\%|a;" 3671 + "::END;", 3672 */ 3673 "", // test case commented out below, this is here to keep from messing up the indexes 3674 3675 // [13] 3676 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3677 + "$delim = [\\-$ws];" 3678 + "$ab = [ab];" 3679 + "::Null;" 3680 + "$ws $delim* > ' ';" 3681 + "'-' $delim* > '-';" 3682 + "::Null;" 3683 + "$ab { ' ' } $ab > '-';" 3684 + "c { ' ' > ;" 3685 + "::Null;" 3686 + "'a-a' > a\\%|a;", 3687 3688 // [14] 3689 /* 3690 "::[abc];" 3691 + "::BEGIN;" 3692 + "abc > xy;" 3693 + "::END;" 3694 + "::BEGIN;" 3695 + "aba > yz;" 3696 + "::END;" 3697 + "::Upper;", 3698 */ 3699 "", // test case commented out below, this is here to keep from messing up the indexes 3700 3701 // [15] 3702 "::[abc];" 3703 + "abc > xy;" 3704 + "::Null;" 3705 + "aba > yz;" 3706 + "::Upper;", 3707 3708 // [16] 3709 /* 3710 "::[abc];" 3711 + "::BEGIN;" 3712 + "abc <> xy;" 3713 + "::END;" 3714 + "::BEGIN;" 3715 + "aba <> yz;" 3716 + "::END;" 3717 + "::Upper(Lower);" 3718 + "::([XYZ]);", 3719 */ 3720 "", // test case commented out below, this is here to keep from messing up the indexes 3721 3722 // [17] 3723 "::[abc];" 3724 + "abc <> xy;" 3725 + "::Null;" 3726 + "aba <> yz;" 3727 + "::Upper(Lower);" 3728 + "::([XYZ]);" 3729 }; 3730 3731 /* 3732 (This entire test is commented out below and will need some heavy revision when we re-add 3733 the ::BEGIN/::END stuff) 3734 private static final String[] BOGUS_BEGIN_END_RULES = new String[] { 3735 // [7] 3736 "::BEGIN;" 3737 + "abc > xy;" 3738 + "::BEGIN;" 3739 + "aba > z;" 3740 + "::END;" 3741 + "::END;", 3742 3743 // [8] 3744 "abc > xy;" 3745 + " aba > z;" 3746 + "::END;", 3747 3748 // [9] 3749 "::BEGIN;" 3750 + "::Upper;" 3751 + "::END;" 3752 }; 3753 */ 3754 3755 private static final String[] BEGIN_END_TEST_CASES = new String[] { 3756 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z", 3757 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z", 3758 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z", 3759 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z", 3760 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z", 3761 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR", 3762 3763 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e", 3764 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e", 3765 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e", 3766 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e", 3767 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e", 3768 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e", 3769 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e", 3770 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a", 3771 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a", 3772 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e", 3773 BEGIN_END_RULES[13], "a a a a", "a%a%a%a", 3774 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a", 3775 3776 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3777 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3778 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3779 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ" 3780 }; 3781 3782 @Test 3783 public void TestBeginEnd() { 3784 // run through the list of test cases above 3785 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) { 3786 expect(BEGIN_END_TEST_CASES[i], BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]); 3787 } 3788 3789 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing 3790 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17], 3791 Transliterator.REVERSE); 3792 expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba"); 3793 3794 // finally, run through the list of syntactically-ill-formed rule sets above and make sure 3795 // that all of them cause errors 3796 /* 3797 (commented out until we have the real ::BEGIN/::END stuff in place 3798 for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) { 3799 try { 3800 Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i], 3801 Transliterator.FORWARD); 3802 errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]); 3803 } 3804 catch (IllegalArgumentException e) { 3805 // this is supposed to happen; do nothing here 3806 } 3807 } 3808 */ 3809 } 3810 3811 @Test 3812 public void TestBeginEndToRules() { 3813 // run through the same list of test cases we used above, but this time, instead of just 3814 // instantiating a Transliterator from the rules and running the test against it, we instantiate 3815 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from 3816 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent 3817 // to (i.e., does the same thing as) the original rule set 3818 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) { 3819 Transliterator t = Transliterator.createFromRules("--", BEGIN_END_TEST_CASES[i], 3820 Transliterator.FORWARD); 3821 String rules = t.toRules(false); 3822 Transliterator t2 = Transliterator.createFromRules("Test case #" + (i / 3), rules, Transliterator.FORWARD); 3823 expect(t2, BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]); 3824 } 3825 3826 // do the same thing for the reversible test case 3827 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17], 3828 Transliterator.REVERSE); 3829 String rules = reversed.toRules(false); 3830 Transliterator reversed2 = Transliterator.createFromRules("Reversed", rules, Transliterator.FORWARD); 3831 expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba"); 3832 } 3833 3834 @Test 3835 public void TestRegisterAlias() { 3836 String longID = "Lower;[aeiou]Upper"; 3837 String shortID = "Any-CapVowels"; 3838 String reallyShortID = "CapVowels"; 3839 3840 Transliterator.registerAlias(shortID, longID); 3841 3842 Transliterator t1 = Transliterator.getInstance(longID); 3843 Transliterator t2 = Transliterator.getInstance(reallyShortID); 3844 3845 if (!t1.getID().equals(longID)) 3846 errln("Transliterator instantiated with long ID doesn't have long ID"); 3847 if (!t2.getID().equals(reallyShortID)) 3848 errln("Transliterator instantiated with short ID doesn't have short ID"); 3849 3850 if (!t1.toRules(true).equals(t2.toRules(true))) 3851 errln("Alias transliterators aren't the same"); 3852 3853 Transliterator.unregister(shortID); 3854 3855 try { 3856 t1 = Transliterator.getInstance(shortID); 3857 errln("Instantiation with short ID succeeded after short ID was unregistered"); 3858 } 3859 catch (IllegalArgumentException e) { 3860 } 3861 3862 // try the same thing again, but this time with something other than 3863 // an instance of CompoundTransliterator 3864 String realID = "Latin-Greek"; 3865 String fakeID = "Latin-dlgkjdflkjdl"; 3866 Transliterator.registerAlias(fakeID, realID); 3867 3868 t1 = Transliterator.getInstance(realID); 3869 t2 = Transliterator.getInstance(fakeID); 3870 3871 if (!t1.toRules(true).equals(t2.toRules(true))) 3872 errln("Alias transliterators aren't the same"); 3873 3874 Transliterator.unregister(fakeID); 3875 } 3876 3877 /** 3878 * Test the Halfwidth-Fullwidth transliterator (ticket 6281). 3879 */ 3880 @Test 3881 public void TestHalfwidthFullwidth() { 3882 Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth"); 3883 Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth"); 3884 3885 // Array of 3n items 3886 // Each item is 3887 // "hf"|"fh"|"both", 3888 // <Halfwidth>, 3889 // <Fullwidth> 3890 String[] DATA = { 3891 "both", 3892 "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020", 3893 "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000", 3894 }; 3895 3896 for (int i=0; i<DATA.length; i+=3) { 3897 switch (DATA[i].charAt(0)) { 3898 case 'h': // Halfwidth-Fullwidth only 3899 expect(hf, DATA[i+1], DATA[i+2]); 3900 break; 3901 case 'f': // Fullwidth-Halfwidth only 3902 expect(fh, DATA[i+2], DATA[i+1]); 3903 break; 3904 case 'b': // both directions 3905 expect(hf, DATA[i+1], DATA[i+2]); 3906 expect(fh, DATA[i+2], DATA[i+1]); 3907 break; 3908 } 3909 } 3910 3911 } 3912 3913 /** 3914 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site. 3915 * TODO: confirm that the expected results are correct. 3916 * For now, test just confirms that C++ and Java give identical results. 3917 */ 3918 @Test 3919 public void TestThai() { 3920 Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD); 3921 String thaiText = 3922 "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" + 3923 "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" + 3924 "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" + 3925 "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" + 3926 "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" + 3927 "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" + 3928 "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" + 3929 "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" + 3930 "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" + 3931 "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" + 3932 "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" + 3933 "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" + 3934 "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" + 3935 "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" + 3936 "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" + 3937 "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" + 3938 "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" + 3939 "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" + 3940 "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" + 3941 "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" + 3942 "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" + 3943 "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" + 3944 "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" + 3945 "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" + 3946 " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" + 3947 "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" + 3948 "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" + 3949 " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" + 3950 "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" + 3951 "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b."; 3952 3953 String latinText = 3954 "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" + 3955 "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" + 3956 "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" + 3957 "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" + 3958 "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" + 3959 " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " + 3960 "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" + 3961 "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" + 3962 "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" + 3963 "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" + 3964 "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" + 3965 "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" + 3966 " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" + 3967 "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" + 3968 " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" + 3969 "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" + 3970 "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" + 3971 "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb."; 3972 3973 expect(tr, thaiText, latinText); 3974 } 3975 3976 3977 //====================================================================== 3978 // These tests are not mirrored (yet) in icu4c at 3979 // source/test/intltest/transtst.cpp 3980 //====================================================================== 3981 3982 /** 3983 * Improve code coverage. 3984 */ 3985 @Test 3986 public void TestCoverage() { 3987 // NullTransliterator 3988 Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD); 3989 expect(t, "a", "a"); 3990 3991 // Source, target set 3992 t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD); 3993 t.setFilter(new UnicodeSet("[A-Z]")); 3994 logln("source = " + t.getSourceSet()); 3995 logln("target = " + t.getTargetSet()); 3996 3997 t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD); 3998 logln("source = " + t.getSourceSet()); 3999 logln("target = " + t.getTargetSet()); 4000 } 4001 /* 4002 * Test case for threading problem in NormalizationTransliterator 4003 * reported by ticket#5160 4004 */ 4005 @Test 4006 public void TestT5160() { 4007 final String[] testData = { 4008 "a", 4009 "b", 4010 "\u09BE", 4011 "A\u0301", 4012 }; 4013 final String[] expected = { 4014 "a", 4015 "b", 4016 "\u09BE", 4017 "\u00C1", 4018 }; 4019 Transliterator translit = Transliterator.getInstance("NFC"); 4020 NormTranslitTask[] tasks = new NormTranslitTask[testData.length]; 4021 for (int i = 0; i < tasks.length; i++) { 4022 tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]); 4023 } 4024 TestUtil.runUntilDone(tasks); 4025 4026 for (int i = 0; i < tasks.length; i++) { 4027 if (tasks[i].getErrorMessage() != null) { 4028 System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage()); 4029 break; 4030 } 4031 } 4032 } 4033 4034 static class NormTranslitTask implements Runnable { 4035 Transliterator translit; 4036 String testData; 4037 String expectedData; 4038 String errorMsg; 4039 4040 NormTranslitTask(Transliterator translit, String testData, String expectedData) { 4041 this.translit = translit; 4042 this.testData = testData; 4043 this.expectedData = expectedData; 4044 } 4045 4046 public void run() { 4047 errorMsg = null; 4048 StringBuffer inBuf = new StringBuffer(testData); 4049 StringBuffer expectedBuf = new StringBuffer(expectedData); 4050 4051 for(int i = 0; i < 1000; i++) { 4052 String in = inBuf.toString(); 4053 String out = translit.transliterate(in); 4054 String expected = expectedBuf.toString(); 4055 if (!out.equals(expected)) { 4056 errorMsg = "in {" + in + "} / out {" + out + "} / expected {" + expected + "}"; 4057 break; 4058 } 4059 inBuf.append(testData); 4060 expectedBuf.append(expectedData); 4061 } 4062 } 4063 4064 public String getErrorMessage() { 4065 return errorMsg; 4066 } 4067 } 4068 4069 //====================================================================== 4070 // Support methods 4071 //====================================================================== 4072 static void expect(String rules, 4073 String source, 4074 String expectedResult, 4075 Transliterator.Position pos) { 4076 Transliterator t = Transliterator.createFromRules("<ID>", rules, Transliterator.FORWARD); 4077 expect(t, source, expectedResult, pos); 4078 } 4079 4080 static void expect(String rules, String source, String expectedResult) { 4081 expect(rules, source, expectedResult, null); 4082 } 4083 4084 static void expect(Transliterator t, String source, String expectedResult, 4085 Transliterator reverseTransliterator) { 4086 expect(t, source, expectedResult); 4087 if (reverseTransliterator != null) { 4088 expect(reverseTransliterator, expectedResult, source); 4089 } 4090 } 4091 4092 static void expect(Transliterator t, String source, String expectedResult) { 4093 expect(t, source, expectedResult, (Transliterator.Position) null); 4094 } 4095 4096 static void expect(Transliterator t, String source, String expectedResult, 4097 Transliterator.Position pos) { 4098 if (pos == null) { 4099 String result = t.transliterate(source); 4100 if (!expectAux(t.getID() + ":String", source, result, expectedResult)) return; 4101 } 4102 4103 Transliterator.Position index = null; 4104 if (pos == null) { 4105 index = new Transliterator.Position(0, source.length(), 0, source.length()); 4106 } else { 4107 index = new Transliterator.Position(pos.contextStart, pos.contextLimit, 4108 pos.start, pos.limit); 4109 } 4110 4111 ReplaceableString rsource = new ReplaceableString(source); 4112 4113 t.finishTransliteration(rsource, index); 4114 // Do it all at once -- below we do it incrementally 4115 4116 if (index.start != index.limit) { 4117 expectAux(t.getID() + ":UNFINISHED", source, 4118 "start: " + index.start + ", limit: " + index.limit, false, expectedResult); 4119 return; 4120 } 4121 String result = rsource.toString(); 4122 if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult)) return; 4123 4124 4125 if (pos == null) { 4126 index = new Transliterator.Position(); 4127 } else { 4128 index = new Transliterator.Position(pos.contextStart, pos.contextLimit, 4129 pos.start, pos.limit); 4130 } 4131 4132 // Test incremental transliteration -- this result 4133 // must be the same after we finalize (see below). 4134 List<String> v = new ArrayList<String>(); 4135 v.add(source); 4136 rsource.replace(0, rsource.length(), ""); 4137 if (pos != null) { 4138 rsource.replace(0, 0, source); 4139 v.add(UtilityExtensions.formatInput(rsource, index)); 4140 t.transliterate(rsource, index); 4141 v.add(UtilityExtensions.formatInput(rsource, index)); 4142 } else { 4143 for (int i=0; i<source.length(); ++i) { 4144 //v.add(i == 0 ? "" : " + " + source.charAt(i) + ""); 4145 //log.append(source.charAt(i)).append(" -> ")); 4146 t.transliterate(rsource, index, source.charAt(i)); 4147 //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1)); 4148 v.add(UtilityExtensions.formatInput(rsource, index) + 4149 ((i<source.length()-1)?(" + '" + source.charAt(i+1) + "' ->"):" =>")); 4150 } 4151 } 4152 4153 // As a final step in keyboard transliteration, we must call 4154 // transliterate to finish off any pending partial matches that 4155 // were waiting for more input. 4156 t.finishTransliteration(rsource, index); 4157 result = rsource.toString(); 4158 //log.append(" => ").append(rsource.toString()); 4159 v.add(result); 4160 4161 String[] results = new String[v.size()]; 4162 v.toArray(results); 4163 expectAux(t.getID() + ":Incremental", results, 4164 result.equals(expectedResult), 4165 expectedResult); 4166 } 4167 4168 static boolean expectAux(String tag, String source, 4169 String result, String expectedResult) { 4170 return expectAux(tag, new String[] {source, result}, 4171 result.equals(expectedResult), 4172 expectedResult); 4173 } 4174 4175 static boolean expectAux(String tag, String source, 4176 String result, boolean pass, 4177 String expectedResult) { 4178 return expectAux(tag, new String[] {source, result}, 4179 pass, 4180 expectedResult); 4181 } 4182 4183 static boolean expectAux(String tag, String source, 4184 boolean pass, 4185 String expectedResult) { 4186 return expectAux(tag, new String[] {source}, 4187 pass, 4188 expectedResult); 4189 } 4190 4191 static boolean expectAux(String tag, String[] results, boolean pass, 4192 String expectedResult) { 4193 msg((pass?"(":"FAIL: (")+tag+")", pass ? LOG : ERR, true, true); 4194 4195 for (int i = 0; i < results.length; ++i) { 4196 String label; 4197 if (i == 0) { 4198 label = "source: "; 4199 } else if (i == results.length - 1) { 4200 label = "result: "; 4201 } else { 4202 if (!isVerbose() && pass) continue; 4203 label = "interm" + i + ": "; 4204 } 4205 msg(" " + label + results[i], pass ? LOG : ERR, false, true); 4206 } 4207 4208 if (!pass) { 4209 msg( " expected: " + expectedResult, ERR, false, true); 4210 } 4211 4212 return pass; 4213 } 4214 4215 static private void assertTransform(String message, String expected, StringTransform t, String source) { 4216 assertEquals(message + " " + source, expected, t.transform(source)); 4217 } 4218 4219 4220 static private void assertTransform(String message, String expected, StringTransform t, StringTransform back, String source, String source2) { 4221 assertEquals(message + " " +source, expected, t.transform(source)); 4222 assertEquals(message + " " +source2, expected, t.transform(source2)); 4223 assertEquals(message + " " + expected, source, back.transform(expected)); 4224 } 4225 4226 /* 4227 * Tests the method public Enumeration<String> getAvailableTargets(String source) 4228 */ 4229 @Test 4230 public void TestGetAvailableTargets() { 4231 try { 4232 // Tests when if (targets == null) is true 4233 Transliterator.getAvailableTargets(""); 4234 } catch (Exception e) { 4235 errln("TransliteratorRegistry.getAvailableTargets(String) was not " + "supposed to return an exception."); 4236 } 4237 } 4238 4239 /* 4240 * Tests the method public Enumeration<String> getAvailableVariants(String source, String target) 4241 */ 4242 @Test 4243 public void TestGetAvailableVariants() { 4244 try { 4245 // Tests when if (targets == null) is true 4246 Transliterator.getAvailableVariants("", ""); 4247 } catch (Exception e) { 4248 errln("TransliteratorRegistry.getAvailableVariants(String) was not " + "supposed to return an exception."); 4249 } 4250 } 4251 4252 /* 4253 * Tests the mehtod String nextLine() in RuleBody 4254 */ 4255 @Test 4256 public void TestNextLine() { 4257 // Tests when "if (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') is true 4258 try{ 4259 Transliterator.createFromRules("gif", "\\", Transliterator.FORWARD); 4260 } catch(Exception e){ 4261 errln("TransliteratorParser.nextLine() was not suppose to return an " + 4262 "exception for a rule of '\\'"); 4263 } 4264 } 4265 } 4266