1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2002-2014, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 /** 12 * Port From: ICU4C v2.1 : collate/CollationIteratorTest 13 * Source File: $ICU4CRoot/source/test/intltest/itercoll.cpp 14 **/ 15 16 package android.icu.dev.test.collator; 17 18 import java.text.CharacterIterator; 19 import java.text.StringCharacterIterator; 20 import java.util.Arrays; 21 import java.util.Locale; 22 23 import org.junit.Test; 24 import org.junit.runner.RunWith; 25 import org.junit.runners.JUnit4; 26 27 import android.icu.dev.test.TestFmwk; 28 import android.icu.lang.UCharacter; 29 import android.icu.text.CollationElementIterator; 30 import android.icu.text.Collator; 31 import android.icu.text.RuleBasedCollator; 32 import android.icu.text.UCharacterIterator; 33 import android.icu.util.ULocale; 34 import android.icu.testsharding.MainTestShard; 35 36 @MainTestShard 37 @RunWith(JUnit4.class) 38 public class CollationIteratorTest extends TestFmwk { 39 40 String test1 = "What subset of all possible test cases?"; 41 String test2 = "has the highest probability of detecting"; 42 43 /* 44 * @bug 4157299 45 */ 46 @Test 47 public void TestClearBuffers(/* char* par */) { 48 RuleBasedCollator c = null; 49 try { 50 c = new RuleBasedCollator("&a < b < c & ab = d"); 51 } catch (Exception e) { 52 warnln("Couldn't create a RuleBasedCollator."); 53 return; 54 } 55 56 String source = "abcd"; 57 CollationElementIterator i = c.getCollationElementIterator(source); 58 int e0 = 0; 59 try { 60 e0 = i.next(); // save the first collation element 61 } catch (Exception e) { 62 errln("call to i.next() failed."); 63 return; 64 } 65 66 try { 67 i.setOffset(3); // go to the expanding character 68 } catch (Exception e) { 69 errln("call to i.setOffset(3) failed."); 70 return; 71 } 72 73 try { 74 i.next(); // but only use up half of it 75 } catch (Exception e) { 76 errln("call to i.next() failed."); 77 return; 78 } 79 80 try { 81 i.setOffset(0); // go back to the beginning 82 } catch (Exception e) { 83 errln("call to i.setOffset(0) failed. "); 84 } 85 86 int e = 0; 87 try { 88 e = i.next(); // and get this one again 89 } catch (Exception ee) { 90 errln("call to i.next() failed. "); 91 return; 92 } 93 94 if (e != e0) { 95 errln("got 0x" + Integer.toHexString(e) + ", expected 0x" + Integer.toHexString(e0)); 96 } 97 } 98 99 /** @bug 4108762 100 * Test for getMaxExpansion() 101 */ 102 @Test 103 public void TestMaxExpansion(/* char* par */) { 104 int unassigned = 0xEFFFD; 105 String rule = "&a < ab < c/aba < d < z < ch"; 106 RuleBasedCollator coll = null; 107 try { 108 coll = new RuleBasedCollator(rule); 109 } catch (Exception e) { 110 warnln("Fail to create RuleBasedCollator"); 111 return; 112 } 113 char ch = 0; 114 String str = String.valueOf(ch); 115 116 CollationElementIterator iter = coll.getCollationElementIterator(str); 117 118 while (ch < 0xFFFF) { 119 int count = 1; 120 ch ++; 121 str = String.valueOf(ch); 122 iter.setText(str); 123 int order = iter.previous(); 124 125 // thai management 126 if (order == 0) { 127 order = iter.previous(); 128 } 129 130 while (iter.previous() != CollationElementIterator.NULLORDER) { 131 count ++; 132 } 133 134 if (iter.getMaxExpansion(order) < count) { 135 errln("Failure at codepoint " + ch + ", maximum expansion count < " + count); 136 } 137 } 138 139 // testing for exact max expansion 140 ch = 0; 141 while (ch < 0x61) { 142 str = String.valueOf(ch); 143 iter.setText(str); 144 int order = iter.previous(); 145 146 if (iter.getMaxExpansion(order) != 1) { 147 errln("Failure at codepoint 0x" + Integer.toHexString(ch) 148 + " maximum expansion count == 1"); 149 } 150 ch ++; 151 } 152 153 ch = 0x63; 154 str = String.valueOf(ch); 155 iter.setText(str); 156 int temporder = iter.previous(); 157 158 if (iter.getMaxExpansion(temporder) != 3) { 159 errln("Failure at codepoint 0x" + Integer.toHexString(ch) 160 + " maximum expansion count == 3"); 161 } 162 163 ch = 0x64; 164 str = String.valueOf(ch); 165 iter.setText(str); 166 temporder = iter.previous(); 167 168 if (iter.getMaxExpansion(temporder) != 1) { 169 errln("Failure at codepoint 0x" + Integer.toHexString(ch) 170 + " maximum expansion count == 1"); 171 } 172 173 str = UCharacter.toString(unassigned); 174 iter.setText(str); 175 temporder = iter.previous(); 176 177 if (iter.getMaxExpansion(temporder) != 2) { 178 errln("Failure at codepoint 0x" + Integer.toHexString(ch) 179 + " maximum expansion count == 2"); 180 } 181 182 183 // testing jamo 184 ch = 0x1165; 185 str = String.valueOf(ch); 186 iter.setText(str); 187 temporder = iter.previous(); 188 189 if (iter.getMaxExpansion(temporder) > 3) { 190 errln("Failure at codepoint 0x" + Integer.toHexString(ch) 191 + " maximum expansion count < 3"); 192 } 193 194 // testing special jamo &a<\u1165 195 rule = "\u0026\u0071\u003c\u1165\u002f\u0071\u0071\u0071\u0071"; 196 197 try { 198 coll = new RuleBasedCollator(rule); 199 } catch (Exception e) { 200 errln("Fail to create RuleBasedCollator"); 201 return; 202 } 203 iter = coll.getCollationElementIterator(str); 204 205 temporder = iter.previous(); 206 207 if (iter.getMaxExpansion(temporder) != 6) { 208 errln("Failure at codepoint 0x" + Integer.toHexString(ch) 209 + " maximum expansion count == 6"); 210 } 211 } 212 213 /** 214 * Test for getOffset() and setOffset() 215 */ 216 @Test 217 public void TestOffset(/* char* par */) { 218 RuleBasedCollator en_us; 219 try { 220 en_us = (RuleBasedCollator)Collator.getInstance(Locale.US); 221 } catch (Exception e) { 222 warnln("ERROR: in creation of collator of ENGLISH locale"); 223 return; 224 } 225 226 CollationElementIterator iter = en_us.getCollationElementIterator(test1); 227 // testing boundaries 228 iter.setOffset(0); 229 if (iter.previous() != CollationElementIterator.NULLORDER) { 230 errln("Error: After setting offset to 0, we should be at the end " 231 + "of the backwards iteration"); 232 } 233 iter.setOffset(test1.length()); 234 if (iter.next() != CollationElementIterator.NULLORDER) { 235 errln("Error: After setting offset to the end of the string, we " 236 + "should be at the end of the forwards iteration"); 237 } 238 239 // Run all the way through the iterator, then get the offset 240 int[] orders = CollationTest.getOrders(iter); 241 logln("orders.length = " + orders.length); 242 243 int offset = iter.getOffset(); 244 245 if (offset != test1.length()) { 246 String msg1 = "offset at end != length: "; 247 String msg2 = " vs "; 248 errln(msg1 + offset + msg2 + test1.length()); 249 } 250 251 // Now set the offset back to the beginning and see if it works 252 CollationElementIterator pristine = en_us.getCollationElementIterator(test1); 253 254 try { 255 iter.setOffset(0); 256 } catch(Exception e) { 257 errln("setOffset failed."); 258 } 259 assertEqual(iter, pristine); 260 261 // setting offset in the middle of a contraction 262 String contraction = "change"; 263 RuleBasedCollator tailored = null; 264 try { 265 tailored = new RuleBasedCollator("& a < ch"); 266 } catch (Exception e) { 267 errln("Error: in creation of Spanish collator"); 268 return; 269 } 270 iter = tailored.getCollationElementIterator(contraction); 271 int order[] = CollationTest.getOrders(iter); 272 iter.setOffset(1); // sets offset in the middle of ch 273 int order2[] = CollationTest.getOrders(iter); 274 if (!Arrays.equals(order, order2)) { 275 errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction"); 276 } 277 contraction = "peache"; 278 iter = tailored.getCollationElementIterator(contraction); 279 iter.setOffset(3); 280 order = CollationTest.getOrders(iter); 281 iter.setOffset(4); // sets offset in the middle of ch 282 order2 = CollationTest.getOrders(iter); 283 if (!Arrays.equals(order, order2)) { 284 errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction"); 285 } 286 // setting offset in the middle of a surrogate pair 287 String surrogate = "\ud800\udc00str"; 288 iter = tailored.getCollationElementIterator(surrogate); 289 order = CollationTest.getOrders(iter); 290 iter.setOffset(1); // sets offset in the middle of surrogate 291 order2 = CollationTest.getOrders(iter); 292 if (!Arrays.equals(order, order2)) { 293 errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair"); 294 } 295 surrogate = "simple\ud800\udc00str"; 296 iter = tailored.getCollationElementIterator(surrogate); 297 iter.setOffset(6); 298 order = CollationTest.getOrders(iter); 299 iter.setOffset(7); // sets offset in the middle of surrogate 300 order2 = CollationTest.getOrders(iter); 301 if (!Arrays.equals(order, order2)) { 302 errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair"); 303 } 304 // TODO: try iterating halfway through a messy string. 305 } 306 307 308 309 void assertEqual(CollationElementIterator i1, CollationElementIterator i2) { 310 int c1, c2, count = 0; 311 do { 312 c1 = i1.next(); 313 c2 = i2.next(); 314 if (c1 != c2) { 315 errln(" " + count + ": strength(0x" + 316 Integer.toHexString(c1) + ") != strength(0x" + Integer.toHexString(c2) + ")"); 317 break; 318 } 319 count += 1; 320 } while (c1 != CollationElementIterator.NULLORDER); 321 CollationTest.backAndForth(this, i1); 322 CollationTest.backAndForth(this, i2); 323 } 324 325 /** 326 * Test for CollationElementIterator.previous() 327 * 328 * @bug 4108758 - Make sure it works with contracting characters 329 * 330 */ 331 @Test 332 public void TestPrevious(/* char* par */) { 333 RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US); 334 CollationElementIterator iter = en_us.getCollationElementIterator(test1); 335 336 // A basic test to see if it's working at all 337 CollationTest.backAndForth(this, iter); 338 339 // Test with a contracting character sequence 340 String source; 341 RuleBasedCollator c1 = null; 342 try { 343 c1 = new RuleBasedCollator("&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH"); 344 } catch (Exception e) { 345 errln("Couldn't create a RuleBasedCollator with a contracting sequence."); 346 return; 347 } 348 349 source = "abchdcba"; 350 iter = c1.getCollationElementIterator(source); 351 CollationTest.backAndForth(this, iter); 352 353 // Test with an expanding character sequence 354 RuleBasedCollator c2 = null; 355 try { 356 c2 = new RuleBasedCollator("&a < b < c/abd < d"); 357 } catch (Exception e ) { 358 errln("Couldn't create a RuleBasedCollator with an expanding sequence."); 359 return; 360 } 361 362 source = "abcd"; 363 iter = c2.getCollationElementIterator(source); 364 CollationTest.backAndForth(this, iter); 365 366 // Now try both 367 RuleBasedCollator c3 = null; 368 try { 369 c3 = new RuleBasedCollator("&a < b < c/aba < d < z < ch"); 370 } catch (Exception e) { 371 errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence."); 372 return; 373 } 374 375 source = "abcdbchdc"; 376 iter = c3.getCollationElementIterator(source); 377 CollationTest.backAndForth(this, iter); 378 379 source= "\u0e41\u0e02\u0e41\u0e02\u0e27abc"; 380 Collator c4 = null; 381 try { 382 c4 = Collator.getInstance(new Locale("th", "TH", "")); 383 } catch (Exception e) { 384 errln("Couldn't create a collator"); 385 return; 386 } 387 388 iter = ((RuleBasedCollator)c4).getCollationElementIterator(source); 389 CollationTest.backAndForth(this, iter); 390 391 source= "\u0061\u30CF\u3099\u30FC"; 392 Collator c5 = null; 393 try { 394 c5 = Collator.getInstance(new Locale("ja", "JP", "")); 395 } catch (Exception e) { 396 errln("Couldn't create Japanese collator\n"); 397 return; 398 } 399 iter = ((RuleBasedCollator)c5).getCollationElementIterator(source); 400 401 CollationTest.backAndForth(this, iter); 402 } 403 404 405 406 /** 407 * Test for setText() 408 */ 409 @Test 410 public void TestSetText(/* char* par */) { 411 RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US); 412 CollationElementIterator iter1 = en_us.getCollationElementIterator(test1); 413 CollationElementIterator iter2 = en_us.getCollationElementIterator(test2); 414 415 // Run through the second iterator just to exercise it 416 int c = iter2.next(); 417 int i = 0; 418 419 while ( ++i < 10 && c != CollationElementIterator.NULLORDER) { 420 try { 421 c = iter2.next(); 422 } catch (Exception e) { 423 errln("iter2.next() returned an error."); 424 break; 425 } 426 } 427 428 // Now set it to point to the same string as the first iterator 429 try { 430 iter2.setText(test1); 431 } catch (Exception e) { 432 errln("call to iter2->setText(test1) failed."); 433 return; 434 } 435 assertEqual(iter1, iter2); 436 437 iter1.reset(); 438 //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text 439 CharacterIterator chariter = new StringCharacterIterator(test1); 440 try { 441 iter2.setText(chariter); 442 } catch (Exception e ) { 443 errln("call to iter2->setText(chariter(test1)) failed."); 444 return; 445 } 446 assertEqual(iter1, iter2); 447 448 iter1.reset(); 449 //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text 450 UCharacterIterator uchariter = UCharacterIterator.getInstance(test1); 451 try { 452 iter2.setText(uchariter); 453 } catch (Exception e ) { 454 errln("call to iter2->setText(uchariter(test1)) failed."); 455 return; 456 } 457 assertEqual(iter1, iter2); 458 } 459 460 /** 461 * Test for CollationElementIterator previous and next for the whole set of 462 * unicode characters. 463 */ 464 @Test 465 public void TestUnicodeChar() { 466 RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US); 467 CollationElementIterator iter; 468 char codepoint; 469 StringBuffer source = new StringBuffer(); 470 source.append("\u0e4d\u0e4e\u0e4f"); 471 // source.append("\u04e8\u04e9"); 472 iter = en_us.getCollationElementIterator(source.toString()); 473 // A basic test to see if it's working at all 474 CollationTest.backAndForth(this, iter); 475 for (codepoint = 1; codepoint < 0xFFFE;) { 476 source.delete(0, source.length()); 477 while (codepoint % 0xFF != 0) { 478 if (UCharacter.isDefined(codepoint)) { 479 source.append(codepoint); 480 } 481 codepoint ++; 482 } 483 484 if (UCharacter.isDefined(codepoint)) { 485 source.append(codepoint); 486 } 487 488 if (codepoint != 0xFFFF) { 489 codepoint ++; 490 } 491 /*if (codepoint >= 0x04fc) { 492 System.out.println("codepoint " + Integer.toHexString(codepoint)); 493 String str = source.substring(230, 232); 494 System.out.println(android.icu.impl.Utility.escape(str)); 495 System.out.println("codepoint " + Integer.toHexString(codepoint) 496 + "length " + str.length()); 497 iter = en_us.getCollationElementIterator(str); 498 CollationTest.backAndForth(this, iter); 499 } 500 */ 501 iter = en_us.getCollationElementIterator(source.toString()); 502 // A basic test to see if it's working at all 503 CollationTest.backAndForth(this, iter); 504 } 505 } 506 507 /** 508 * Test for CollationElementIterator previous and next for the whole set of 509 * unicode characters with normalization on. 510 */ 511 @Test 512 public void TestNormalizedUnicodeChar() 513 { 514 // thai should have normalization on 515 RuleBasedCollator th_th = null; 516 try { 517 th_th = (RuleBasedCollator)Collator.getInstance( 518 new Locale("th", "TH")); 519 } catch (Exception e) { 520 warnln("Error creating Thai collator"); 521 return; 522 } 523 StringBuffer source = new StringBuffer(); 524 source.append('\uFDFA'); 525 CollationElementIterator iter 526 = th_th.getCollationElementIterator(source.toString()); 527 CollationTest.backAndForth(this, iter); 528 for (char codepoint = 0x1; codepoint < 0xfffe;) { 529 source.delete(0, source.length()); 530 while (codepoint % 0xFF != 0) { 531 if (UCharacter.isDefined(codepoint)) { 532 source.append(codepoint); 533 } 534 codepoint ++; 535 } 536 537 if (UCharacter.isDefined(codepoint)) { 538 source.append(codepoint); 539 } 540 541 if (codepoint != 0xFFFF) { 542 codepoint ++; 543 } 544 545 /*if (((int)codepoint) >= 0xfe00) { 546 String str = source.substring(185, 190); 547 System.out.println(android.icu.impl.Utility.escape(str)); 548 System.out.println("codepoint " 549 + Integer.toHexString(codepoint) 550 + "length " + str.length()); 551 iter = th_th.getCollationElementIterator(str); 552 CollationTest.backAndForth(this, iter); 553 */ 554 iter = th_th.getCollationElementIterator(source.toString()); 555 // A basic test to see if it's working at all 556 CollationTest.backAndForth(this, iter); 557 } 558 } 559 560 /** 561 * Testing the discontiguous contractions 562 */ 563 @Test 564 public void TestDiscontiguous() 565 { 566 String rulestr ="&z < AB < X\u0300 < ABC < X\u0300\u0315"; 567 String src[] = {"ADB", "ADBC", "A\u0315B", "A\u0315BC", 568 // base character blocked 569 "XD\u0300", "XD\u0300\u0315", 570 // non blocking combining character 571 "X\u0319\u0300", "X\u0319\u0300\u0315", 572 // blocking combining character 573 "X\u0314\u0300", "X\u0314\u0300\u0315", 574 // contraction prefix 575 "ABDC", "AB\u0315C","X\u0300D\u0315", 576 "X\u0300\u0319\u0315", "X\u0300\u031A\u0315", 577 // ends not with a contraction character 578 "X\u0319\u0300D", "X\u0319\u0300\u0315D", 579 "X\u0300D\u0315D", "X\u0300\u0319\u0315D", 580 "X\u0300\u031A\u0315D" 581 }; 582 String tgt[] = {// non blocking combining character 583 "A D B", "A D BC", "A \u0315 B", "A \u0315 BC", 584 // base character blocked 585 "X D \u0300", "X D \u0300\u0315", 586 // non blocking combining character 587 "X\u0300 \u0319", "X\u0300\u0315 \u0319", 588 // blocking combining character 589 "X \u0314 \u0300", "X \u0314 \u0300\u0315", 590 // contraction prefix 591 "AB DC", "AB \u0315 C","X\u0300 D \u0315", 592 "X\u0300\u0315 \u0319", "X\u0300 \u031A \u0315", 593 // ends not with a contraction character 594 "X\u0300 \u0319D", "X\u0300\u0315 \u0319D", 595 "X\u0300 D\u0315D", "X\u0300\u0315 \u0319D", 596 "X\u0300 \u031A\u0315D" 597 }; 598 int count = 0; 599 try { 600 RuleBasedCollator coll = new RuleBasedCollator(rulestr); 601 CollationElementIterator iter 602 = coll.getCollationElementIterator(""); 603 CollationElementIterator resultiter 604 = coll.getCollationElementIterator(""); 605 while (count < src.length) { 606 iter.setText(src[count]); 607 int s = 0; 608 while (s < tgt[count].length()) { 609 int e = tgt[count].indexOf(' ', s); 610 if (e < 0) { 611 e = tgt[count].length(); 612 } 613 String resultstr = tgt[count].substring(s, e); 614 resultiter.setText(resultstr); 615 int ce = resultiter.next(); 616 while (ce != CollationElementIterator.NULLORDER) { 617 if (ce != iter.next()) { 618 errln("Discontiguos contraction test mismatch at" 619 + count); 620 return; 621 } 622 ce = resultiter.next(); 623 } 624 s = e + 1; 625 } 626 iter.reset(); 627 CollationTest.backAndForth(this, iter); 628 count ++; 629 } 630 } 631 catch (Exception e) { 632 warnln("Error running discontiguous tests " + e.toString()); 633 } 634 } 635 636 /** 637 * Test the incremental normalization 638 */ 639 @Test 640 public void TestNormalization() 641 { 642 String rules = "&a < \u0300\u0315 < A\u0300\u0315 < \u0316\u0315B < \u0316\u0300\u0315"; 643 String testdata[] = {"\u1ED9", "o\u0323\u0302", 644 "\u0300\u0315", "\u0315\u0300", 645 "A\u0300\u0315B", "A\u0315\u0300B", 646 "A\u0316\u0315B", "A\u0315\u0316B", 647 "\u0316\u0300\u0315", "\u0315\u0300\u0316", 648 "A\u0316\u0300\u0315B", "A\u0315\u0300\u0316B", 649 "\u0316\u0315\u0300", "A\u0316\u0315\u0300B"}; 650 RuleBasedCollator coll = null; 651 try { 652 coll = new RuleBasedCollator(rules); 653 coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 654 } catch (Exception e) { 655 warnln("ERROR: in creation of collator using rules " + rules); 656 return; 657 } 658 659 CollationElementIterator iter = coll.getCollationElementIterator("testing"); 660 for (int count = 0; count < testdata.length; count ++) { 661 iter.setText(testdata[count]); 662 CollationTest.backAndForth(this, iter); 663 } 664 } 665 666 /** 667 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with 668 * normalization on AND jamo tailoring, among other things. 669 * 670 * Note: This test is sensitive to changes of the root collator, 671 * for example whether the ae-ligature maps to three CEs (as in the DUCET) 672 * or to two CEs (as in the CLDR 24 FractionalUCA.txt). 673 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding. 674 * For example, the DUCET's artificial secondary CE in the ae-ligature 675 * may map to two 32-bit iterator CEs (as it did until ICU 52). 676 */ 677 @Test 678 public void TestSearchCollatorElements() 679 { 680 String tsceText = 681 " \uAC00" + // simple LV Hangul 682 " \uAC01" + // simple LVT Hangul 683 " \uAC0F" + // LVTT, last jamo expands for search 684 " \uAFFF" + // LLVVVTT, every jamo expands for search 685 " \u1100\u1161\u11A8" + // 0xAC01 as conjoining jamo 686 " \u3131\u314F\u3131" + // 0xAC01 as compatibility jamo 687 " \u1100\u1161\u11B6" + // 0xAC0F as conjoining jamo; last expands for search 688 " \u1101\u1170\u11B6" + // 0xAFFF as conjoining jamo; all expand for search 689 " \u00E6" + // small letter ae, expands 690 " \u1E4D" + // small letter o with tilde and acute, decomposes 691 " "; 692 693 int[] rootStandardOffsets = { 694 0, 1,2, 695 2, 3,4,4, 696 4, 5,6,6, 697 6, 7,8,8, 698 8, 9,10,11, 699 12, 13,14,15, 700 16, 17,18,19, 701 20, 21,22,23, 702 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */ 703 26, 27,28,28, 704 28, 705 29 706 }; 707 708 int[] rootSearchOffsets = { 709 0, 1,2, 710 2, 3,4,4, 711 4, 5,6,6,6, 712 6, 7,8,8,8,8,8,8, 713 8, 9,10,11, 714 12, 13,14,15, 715 16, 17,18,19,20, 716 20, 21,22,22,23,23,23,24, 717 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */ 718 26, 27,28,28, 719 28, 720 29 721 }; 722 723 class TSCEItem { 724 private String localeString; 725 private int[] offsets; 726 TSCEItem(String locStr, int[] offs) { 727 localeString = locStr; 728 offsets = offs; 729 } 730 public String getLocaleString() { return localeString; } 731 public int[] getOffsets() { return offsets; } 732 } 733 final TSCEItem[] tsceItems = { 734 new TSCEItem( "root", rootStandardOffsets ), 735 new TSCEItem( "root@collation=search", rootSearchOffsets ), 736 }; 737 738 for (TSCEItem tsceItem: tsceItems) { 739 String localeString = tsceItem.getLocaleString(); 740 ULocale uloc = new ULocale(localeString); 741 RuleBasedCollator col = null; 742 try { 743 col = (RuleBasedCollator)Collator.getInstance(uloc); 744 } catch (Exception e) { 745 errln("Error: in locale " + localeString + ", err in Collator.getInstance"); 746 continue; 747 } 748 CollationElementIterator uce = col.getCollationElementIterator(tsceText); 749 int[] offsets = tsceItem.getOffsets(); 750 int ioff, noff = offsets.length; 751 int offset, element; 752 753 ioff = 0; 754 do { 755 offset = uce.getOffset(); 756 element = uce.next(); 757 logln(String.format("(%s) offset=%2d ce=%08x\n", tsceItem.localeString, offset, element)); 758 if (element == 0) { 759 errln("Error: in locale " + localeString + ", CEIterator next() returned element 0"); 760 } 761 if ( ioff < noff ) { 762 if ( offset != offsets[ioff] ) { 763 errln("Error: in locale " + localeString + ", expected CEIterator next()->getOffset " + offsets[ioff] + ", got " + offset); 764 //ioff = noff; 765 //break; 766 } 767 ioff++; 768 } else { 769 errln("Error: in locale " + localeString + ", CEIterator next() returned more elements than expected"); 770 } 771 } while (element != CollationElementIterator.NULLORDER); 772 if ( ioff < noff ) { 773 errln("Error: in locale " + localeString + ", CEIterator next() returned fewer elements than expected"); 774 } 775 776 // backwards test 777 uce.setOffset(tsceText.length()); 778 ioff = noff; 779 do { 780 offset = uce.getOffset(); 781 element = uce.previous(); 782 if (element == 0) { 783 errln("Error: in locale " + localeString + ", CEIterator previous() returned element 0"); 784 } 785 if ( ioff > 0 ) { 786 ioff--; 787 if ( offset != offsets[ioff] ) { 788 errln("Error: in locale " + localeString + ", expected CEIterator previous()->getOffset " + offsets[ioff] + ", got " + offset); 789 //ioff = 0; 790 //break; 791 } 792 } else { 793 errln("Error: in locale " + localeString + ", CEIterator previous() returned more elements than expected"); 794 } 795 } while (element != CollationElementIterator.NULLORDER); 796 if ( ioff > 0 ) { 797 errln("Error: in locale " + localeString + ", CEIterator previous() returned fewer elements than expected"); 798 } 799 } 800 } 801 } 802