1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2002-2014, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 /** 11 * Port From: ICU4C v2.1 : collate/CollationIteratorTest 12 * Source File: $ICU4CRoot/source/test/intltest/itercoll.cpp 13 **/ 14 15 package com.ibm.icu.dev.test.collator; 16 17 import java.text.CharacterIterator; 18 import java.text.StringCharacterIterator; 19 import java.util.Arrays; 20 import java.util.Locale; 21 22 import org.junit.Test; 23 import org.junit.runner.RunWith; 24 import org.junit.runners.JUnit4; 25 26 import com.ibm.icu.dev.test.TestFmwk; 27 import com.ibm.icu.lang.UCharacter; 28 import com.ibm.icu.text.CollationElementIterator; 29 import com.ibm.icu.text.Collator; 30 import com.ibm.icu.text.RuleBasedCollator; 31 import com.ibm.icu.text.UCharacterIterator; 32 import com.ibm.icu.util.ULocale; 33 34 @RunWith(JUnit4.class) 35 public class CollationIteratorTest extends TestFmwk { 36 37 String test1 = "What subset of all possible test cases?"; 38 String test2 = "has the highest probability of detecting"; 39 40 /* 41 * @bug 4157299 42 */ 43 @Test 44 public void TestClearBuffers(/* char* par */) { 45 RuleBasedCollator c = null; 46 try { 47 c = new RuleBasedCollator("&a < b < c & ab = d"); 48 } catch (Exception e) { 49 warnln("Couldn't create a RuleBasedCollator."); 50 return; 51 } 52 53 String source = "abcd"; 54 CollationElementIterator i = c.getCollationElementIterator(source); 55 int e0 = 0; 56 try { 57 e0 = i.next(); // save the first collation element 58 } catch (Exception e) { 59 errln("call to i.next() failed."); 60 return; 61 } 62 63 try { 64 i.setOffset(3); // go to the expanding character 65 } catch (Exception e) { 66 errln("call to i.setOffset(3) failed."); 67 return; 68 } 69 70 try { 71 i.next(); // but only use up half of it 72 } catch (Exception e) { 73 errln("call to i.next() failed."); 74 return; 75 } 76 77 try { 78 i.setOffset(0); // go back to the beginning 79 } catch (Exception e) { 80 errln("call to i.setOffset(0) failed. "); 81 } 82 83 int e = 0; 84 try { 85 e = i.next(); // and get this one again 86 } catch (Exception ee) { 87 errln("call to i.next() failed. "); 88 return; 89 } 90 91 if (e != e0) { 92 errln("got 0x" + Integer.toHexString(e) + ", expected 0x" + Integer.toHexString(e0)); 93 } 94 } 95 96 /** @bug 4108762 97 * Test for getMaxExpansion() 98 */ 99 @Test 100 public void TestMaxExpansion(/* char* par */) { 101 int unassigned = 0xEFFFD; 102 String rule = "&a < ab < c/aba < d < z < ch"; 103 RuleBasedCollator coll = null; 104 try { 105 coll = new RuleBasedCollator(rule); 106 } catch (Exception e) { 107 warnln("Fail to create RuleBasedCollator"); 108 return; 109 } 110 char ch = 0; 111 String str = String.valueOf(ch); 112 113 CollationElementIterator iter = coll.getCollationElementIterator(str); 114 115 while (ch < 0xFFFF) { 116 int count = 1; 117 ch ++; 118 str = String.valueOf(ch); 119 iter.setText(str); 120 int order = iter.previous(); 121 122 // thai management 123 if (order == 0) { 124 order = iter.previous(); 125 } 126 127 while (iter.previous() != CollationElementIterator.NULLORDER) { 128 count ++; 129 } 130 131 if (iter.getMaxExpansion(order) < count) { 132 errln("Failure at codepoint " + ch + ", maximum expansion count < " + count); 133 } 134 } 135 136 // testing for exact max expansion 137 ch = 0; 138 while (ch < 0x61) { 139 str = String.valueOf(ch); 140 iter.setText(str); 141 int order = iter.previous(); 142 143 if (iter.getMaxExpansion(order) != 1) { 144 errln("Failure at codepoint 0x" + Integer.toHexString(ch) 145 + " maximum expansion count == 1"); 146 } 147 ch ++; 148 } 149 150 ch = 0x63; 151 str = String.valueOf(ch); 152 iter.setText(str); 153 int temporder = iter.previous(); 154 155 if (iter.getMaxExpansion(temporder) != 3) { 156 errln("Failure at codepoint 0x" + Integer.toHexString(ch) 157 + " maximum expansion count == 3"); 158 } 159 160 ch = 0x64; 161 str = String.valueOf(ch); 162 iter.setText(str); 163 temporder = iter.previous(); 164 165 if (iter.getMaxExpansion(temporder) != 1) { 166 errln("Failure at codepoint 0x" + Integer.toHexString(ch) 167 + " maximum expansion count == 1"); 168 } 169 170 str = UCharacter.toString(unassigned); 171 iter.setText(str); 172 temporder = iter.previous(); 173 174 if (iter.getMaxExpansion(temporder) != 2) { 175 errln("Failure at codepoint 0x" + Integer.toHexString(ch) 176 + " maximum expansion count == 2"); 177 } 178 179 180 // testing jamo 181 ch = 0x1165; 182 str = String.valueOf(ch); 183 iter.setText(str); 184 temporder = iter.previous(); 185 186 if (iter.getMaxExpansion(temporder) > 3) { 187 errln("Failure at codepoint 0x" + Integer.toHexString(ch) 188 + " maximum expansion count < 3"); 189 } 190 191 // testing special jamo &a<\u1165 192 rule = "\u0026\u0071\u003c\u1165\u002f\u0071\u0071\u0071\u0071"; 193 194 try { 195 coll = new RuleBasedCollator(rule); 196 } catch (Exception e) { 197 errln("Fail to create RuleBasedCollator"); 198 return; 199 } 200 iter = coll.getCollationElementIterator(str); 201 202 temporder = iter.previous(); 203 204 if (iter.getMaxExpansion(temporder) != 6) { 205 errln("Failure at codepoint 0x" + Integer.toHexString(ch) 206 + " maximum expansion count == 6"); 207 } 208 } 209 210 /** 211 * Test for getOffset() and setOffset() 212 */ 213 @Test 214 public void TestOffset(/* char* par */) { 215 RuleBasedCollator en_us; 216 try { 217 en_us = (RuleBasedCollator)Collator.getInstance(Locale.US); 218 } catch (Exception e) { 219 warnln("ERROR: in creation of collator of ENGLISH locale"); 220 return; 221 } 222 223 CollationElementIterator iter = en_us.getCollationElementIterator(test1); 224 // testing boundaries 225 iter.setOffset(0); 226 if (iter.previous() != CollationElementIterator.NULLORDER) { 227 errln("Error: After setting offset to 0, we should be at the end " 228 + "of the backwards iteration"); 229 } 230 iter.setOffset(test1.length()); 231 if (iter.next() != CollationElementIterator.NULLORDER) { 232 errln("Error: After setting offset to the end of the string, we " 233 + "should be at the end of the forwards iteration"); 234 } 235 236 // Run all the way through the iterator, then get the offset 237 int[] orders = CollationTest.getOrders(iter); 238 logln("orders.length = " + orders.length); 239 240 int offset = iter.getOffset(); 241 242 if (offset != test1.length()) { 243 String msg1 = "offset at end != length: "; 244 String msg2 = " vs "; 245 errln(msg1 + offset + msg2 + test1.length()); 246 } 247 248 // Now set the offset back to the beginning and see if it works 249 CollationElementIterator pristine = en_us.getCollationElementIterator(test1); 250 251 try { 252 iter.setOffset(0); 253 } catch(Exception e) { 254 errln("setOffset failed."); 255 } 256 assertEqual(iter, pristine); 257 258 // setting offset in the middle of a contraction 259 String contraction = "change"; 260 RuleBasedCollator tailored = null; 261 try { 262 tailored = new RuleBasedCollator("& a < ch"); 263 } catch (Exception e) { 264 errln("Error: in creation of Spanish collator"); 265 return; 266 } 267 iter = tailored.getCollationElementIterator(contraction); 268 int order[] = CollationTest.getOrders(iter); 269 iter.setOffset(1); // sets offset in the middle of ch 270 int order2[] = CollationTest.getOrders(iter); 271 if (!Arrays.equals(order, order2)) { 272 errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction"); 273 } 274 contraction = "peache"; 275 iter = tailored.getCollationElementIterator(contraction); 276 iter.setOffset(3); 277 order = CollationTest.getOrders(iter); 278 iter.setOffset(4); // sets offset in the middle of ch 279 order2 = CollationTest.getOrders(iter); 280 if (!Arrays.equals(order, order2)) { 281 errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction"); 282 } 283 // setting offset in the middle of a surrogate pair 284 String surrogate = "\ud800\udc00str"; 285 iter = tailored.getCollationElementIterator(surrogate); 286 order = CollationTest.getOrders(iter); 287 iter.setOffset(1); // sets offset in the middle of surrogate 288 order2 = CollationTest.getOrders(iter); 289 if (!Arrays.equals(order, order2)) { 290 errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair"); 291 } 292 surrogate = "simple\ud800\udc00str"; 293 iter = tailored.getCollationElementIterator(surrogate); 294 iter.setOffset(6); 295 order = CollationTest.getOrders(iter); 296 iter.setOffset(7); // sets offset in the middle of surrogate 297 order2 = CollationTest.getOrders(iter); 298 if (!Arrays.equals(order, order2)) { 299 errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair"); 300 } 301 // TODO: try iterating halfway through a messy string. 302 } 303 304 305 306 void assertEqual(CollationElementIterator i1, CollationElementIterator i2) { 307 int c1, c2, count = 0; 308 do { 309 c1 = i1.next(); 310 c2 = i2.next(); 311 if (c1 != c2) { 312 errln(" " + count + ": strength(0x" + 313 Integer.toHexString(c1) + ") != strength(0x" + Integer.toHexString(c2) + ")"); 314 break; 315 } 316 count += 1; 317 } while (c1 != CollationElementIterator.NULLORDER); 318 CollationTest.backAndForth(this, i1); 319 CollationTest.backAndForth(this, i2); 320 } 321 322 /** 323 * Test for CollationElementIterator.previous() 324 * 325 * @bug 4108758 - Make sure it works with contracting characters 326 * 327 */ 328 @Test 329 public void TestPrevious(/* char* par */) { 330 RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US); 331 CollationElementIterator iter = en_us.getCollationElementIterator(test1); 332 333 // A basic test to see if it's working at all 334 CollationTest.backAndForth(this, iter); 335 336 // Test with a contracting character sequence 337 String source; 338 RuleBasedCollator c1 = null; 339 try { 340 c1 = new RuleBasedCollator("&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH"); 341 } catch (Exception e) { 342 errln("Couldn't create a RuleBasedCollator with a contracting sequence."); 343 return; 344 } 345 346 source = "abchdcba"; 347 iter = c1.getCollationElementIterator(source); 348 CollationTest.backAndForth(this, iter); 349 350 // Test with an expanding character sequence 351 RuleBasedCollator c2 = null; 352 try { 353 c2 = new RuleBasedCollator("&a < b < c/abd < d"); 354 } catch (Exception e ) { 355 errln("Couldn't create a RuleBasedCollator with an expanding sequence."); 356 return; 357 } 358 359 source = "abcd"; 360 iter = c2.getCollationElementIterator(source); 361 CollationTest.backAndForth(this, iter); 362 363 // Now try both 364 RuleBasedCollator c3 = null; 365 try { 366 c3 = new RuleBasedCollator("&a < b < c/aba < d < z < ch"); 367 } catch (Exception e) { 368 errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence."); 369 return; 370 } 371 372 source = "abcdbchdc"; 373 iter = c3.getCollationElementIterator(source); 374 CollationTest.backAndForth(this, iter); 375 376 source= "\u0e41\u0e02\u0e41\u0e02\u0e27abc"; 377 Collator c4 = null; 378 try { 379 c4 = Collator.getInstance(new Locale("th", "TH", "")); 380 } catch (Exception e) { 381 errln("Couldn't create a collator"); 382 return; 383 } 384 385 iter = ((RuleBasedCollator)c4).getCollationElementIterator(source); 386 CollationTest.backAndForth(this, iter); 387 388 source= "\u0061\u30CF\u3099\u30FC"; 389 Collator c5 = null; 390 try { 391 c5 = Collator.getInstance(new Locale("ja", "JP", "")); 392 } catch (Exception e) { 393 errln("Couldn't create Japanese collator\n"); 394 return; 395 } 396 iter = ((RuleBasedCollator)c5).getCollationElementIterator(source); 397 398 CollationTest.backAndForth(this, iter); 399 } 400 401 402 403 /** 404 * Test for setText() 405 */ 406 @Test 407 public void TestSetText(/* char* par */) { 408 RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US); 409 CollationElementIterator iter1 = en_us.getCollationElementIterator(test1); 410 CollationElementIterator iter2 = en_us.getCollationElementIterator(test2); 411 412 // Run through the second iterator just to exercise it 413 int c = iter2.next(); 414 int i = 0; 415 416 while ( ++i < 10 && c != CollationElementIterator.NULLORDER) { 417 try { 418 c = iter2.next(); 419 } catch (Exception e) { 420 errln("iter2.next() returned an error."); 421 break; 422 } 423 } 424 425 // Now set it to point to the same string as the first iterator 426 try { 427 iter2.setText(test1); 428 } catch (Exception e) { 429 errln("call to iter2->setText(test1) failed."); 430 return; 431 } 432 assertEqual(iter1, iter2); 433 434 iter1.reset(); 435 //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text 436 CharacterIterator chariter = new StringCharacterIterator(test1); 437 try { 438 iter2.setText(chariter); 439 } catch (Exception e ) { 440 errln("call to iter2->setText(chariter(test1)) failed."); 441 return; 442 } 443 assertEqual(iter1, iter2); 444 445 iter1.reset(); 446 //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text 447 UCharacterIterator uchariter = UCharacterIterator.getInstance(test1); 448 try { 449 iter2.setText(uchariter); 450 } catch (Exception e ) { 451 errln("call to iter2->setText(uchariter(test1)) failed."); 452 return; 453 } 454 assertEqual(iter1, iter2); 455 } 456 457 /** 458 * Test for CollationElementIterator previous and next for the whole set of 459 * unicode characters. 460 */ 461 @Test 462 public void TestUnicodeChar() { 463 RuleBasedCollator en_us = (RuleBasedCollator)Collator.getInstance(Locale.US); 464 CollationElementIterator iter; 465 char codepoint; 466 StringBuffer source = new StringBuffer(); 467 source.append("\u0e4d\u0e4e\u0e4f"); 468 // source.append("\u04e8\u04e9"); 469 iter = en_us.getCollationElementIterator(source.toString()); 470 // A basic test to see if it's working at all 471 CollationTest.backAndForth(this, iter); 472 for (codepoint = 1; codepoint < 0xFFFE;) { 473 source.delete(0, source.length()); 474 while (codepoint % 0xFF != 0) { 475 if (UCharacter.isDefined(codepoint)) { 476 source.append(codepoint); 477 } 478 codepoint ++; 479 } 480 481 if (UCharacter.isDefined(codepoint)) { 482 source.append(codepoint); 483 } 484 485 if (codepoint != 0xFFFF) { 486 codepoint ++; 487 } 488 /*if (codepoint >= 0x04fc) { 489 System.out.println("codepoint " + Integer.toHexString(codepoint)); 490 String str = source.substring(230, 232); 491 System.out.println(com.ibm.icu.impl.Utility.escape(str)); 492 System.out.println("codepoint " + Integer.toHexString(codepoint) 493 + "length " + str.length()); 494 iter = en_us.getCollationElementIterator(str); 495 CollationTest.backAndForth(this, iter); 496 } 497 */ 498 iter = en_us.getCollationElementIterator(source.toString()); 499 // A basic test to see if it's working at all 500 CollationTest.backAndForth(this, iter); 501 } 502 } 503 504 /** 505 * Test for CollationElementIterator previous and next for the whole set of 506 * unicode characters with normalization on. 507 */ 508 @Test 509 public void TestNormalizedUnicodeChar() 510 { 511 // thai should have normalization on 512 RuleBasedCollator th_th = null; 513 try { 514 th_th = (RuleBasedCollator)Collator.getInstance( 515 new Locale("th", "TH")); 516 } catch (Exception e) { 517 warnln("Error creating Thai collator"); 518 return; 519 } 520 StringBuffer source = new StringBuffer(); 521 source.append('\uFDFA'); 522 CollationElementIterator iter 523 = th_th.getCollationElementIterator(source.toString()); 524 CollationTest.backAndForth(this, iter); 525 for (char codepoint = 0x1; codepoint < 0xfffe;) { 526 source.delete(0, source.length()); 527 while (codepoint % 0xFF != 0) { 528 if (UCharacter.isDefined(codepoint)) { 529 source.append(codepoint); 530 } 531 codepoint ++; 532 } 533 534 if (UCharacter.isDefined(codepoint)) { 535 source.append(codepoint); 536 } 537 538 if (codepoint != 0xFFFF) { 539 codepoint ++; 540 } 541 542 /*if (((int)codepoint) >= 0xfe00) { 543 String str = source.substring(185, 190); 544 System.out.println(com.ibm.icu.impl.Utility.escape(str)); 545 System.out.println("codepoint " 546 + Integer.toHexString(codepoint) 547 + "length " + str.length()); 548 iter = th_th.getCollationElementIterator(str); 549 CollationTest.backAndForth(this, iter); 550 */ 551 iter = th_th.getCollationElementIterator(source.toString()); 552 // A basic test to see if it's working at all 553 CollationTest.backAndForth(this, iter); 554 } 555 } 556 557 /** 558 * Testing the discontiguous contractions 559 */ 560 @Test 561 public void TestDiscontiguous() 562 { 563 String rulestr ="&z < AB < X\u0300 < ABC < X\u0300\u0315"; 564 String src[] = {"ADB", "ADBC", "A\u0315B", "A\u0315BC", 565 // base character blocked 566 "XD\u0300", "XD\u0300\u0315", 567 // non blocking combining character 568 "X\u0319\u0300", "X\u0319\u0300\u0315", 569 // blocking combining character 570 "X\u0314\u0300", "X\u0314\u0300\u0315", 571 // contraction prefix 572 "ABDC", "AB\u0315C","X\u0300D\u0315", 573 "X\u0300\u0319\u0315", "X\u0300\u031A\u0315", 574 // ends not with a contraction character 575 "X\u0319\u0300D", "X\u0319\u0300\u0315D", 576 "X\u0300D\u0315D", "X\u0300\u0319\u0315D", 577 "X\u0300\u031A\u0315D" 578 }; 579 String tgt[] = {// non blocking combining character 580 "A D B", "A D BC", "A \u0315 B", "A \u0315 BC", 581 // base character blocked 582 "X D \u0300", "X D \u0300\u0315", 583 // non blocking combining character 584 "X\u0300 \u0319", "X\u0300\u0315 \u0319", 585 // blocking combining character 586 "X \u0314 \u0300", "X \u0314 \u0300\u0315", 587 // contraction prefix 588 "AB DC", "AB \u0315 C","X\u0300 D \u0315", 589 "X\u0300\u0315 \u0319", "X\u0300 \u031A \u0315", 590 // ends not with a contraction character 591 "X\u0300 \u0319D", "X\u0300\u0315 \u0319D", 592 "X\u0300 D\u0315D", "X\u0300\u0315 \u0319D", 593 "X\u0300 \u031A\u0315D" 594 }; 595 int count = 0; 596 try { 597 RuleBasedCollator coll = new RuleBasedCollator(rulestr); 598 CollationElementIterator iter 599 = coll.getCollationElementIterator(""); 600 CollationElementIterator resultiter 601 = coll.getCollationElementIterator(""); 602 while (count < src.length) { 603 iter.setText(src[count]); 604 int s = 0; 605 while (s < tgt[count].length()) { 606 int e = tgt[count].indexOf(' ', s); 607 if (e < 0) { 608 e = tgt[count].length(); 609 } 610 String resultstr = tgt[count].substring(s, e); 611 resultiter.setText(resultstr); 612 int ce = resultiter.next(); 613 while (ce != CollationElementIterator.NULLORDER) { 614 if (ce != iter.next()) { 615 errln("Discontiguos contraction test mismatch at" 616 + count); 617 return; 618 } 619 ce = resultiter.next(); 620 } 621 s = e + 1; 622 } 623 iter.reset(); 624 CollationTest.backAndForth(this, iter); 625 count ++; 626 } 627 } 628 catch (Exception e) { 629 warnln("Error running discontiguous tests " + e.toString()); 630 } 631 } 632 633 /** 634 * Test the incremental normalization 635 */ 636 @Test 637 public void TestNormalization() 638 { 639 String rules = "&a < \u0300\u0315 < A\u0300\u0315 < \u0316\u0315B < \u0316\u0300\u0315"; 640 String testdata[] = {"\u1ED9", "o\u0323\u0302", 641 "\u0300\u0315", "\u0315\u0300", 642 "A\u0300\u0315B", "A\u0315\u0300B", 643 "A\u0316\u0315B", "A\u0315\u0316B", 644 "\u0316\u0300\u0315", "\u0315\u0300\u0316", 645 "A\u0316\u0300\u0315B", "A\u0315\u0300\u0316B", 646 "\u0316\u0315\u0300", "A\u0316\u0315\u0300B"}; 647 RuleBasedCollator coll = null; 648 try { 649 coll = new RuleBasedCollator(rules); 650 coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 651 } catch (Exception e) { 652 warnln("ERROR: in creation of collator using rules " + rules); 653 return; 654 } 655 656 CollationElementIterator iter = coll.getCollationElementIterator("testing"); 657 for (int count = 0; count < testdata.length; count ++) { 658 iter.setText(testdata[count]); 659 CollationTest.backAndForth(this, iter); 660 } 661 } 662 663 /** 664 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with 665 * normalization on AND jamo tailoring, among other things. 666 * 667 * Note: This test is sensitive to changes of the root collator, 668 * for example whether the ae-ligature maps to three CEs (as in the DUCET) 669 * or to two CEs (as in the CLDR 24 FractionalUCA.txt). 670 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding. 671 * For example, the DUCET's artificial secondary CE in the ae-ligature 672 * may map to two 32-bit iterator CEs (as it did until ICU 52). 673 */ 674 @Test 675 public void TestSearchCollatorElements() 676 { 677 String tsceText = 678 " \uAC00" + // simple LV Hangul 679 " \uAC01" + // simple LVT Hangul 680 " \uAC0F" + // LVTT, last jamo expands for search 681 " \uAFFF" + // LLVVVTT, every jamo expands for search 682 " \u1100\u1161\u11A8" + // 0xAC01 as conjoining jamo 683 " \u3131\u314F\u3131" + // 0xAC01 as compatibility jamo 684 " \u1100\u1161\u11B6" + // 0xAC0F as conjoining jamo; last expands for search 685 " \u1101\u1170\u11B6" + // 0xAFFF as conjoining jamo; all expand for search 686 " \u00E6" + // small letter ae, expands 687 " \u1E4D" + // small letter o with tilde and acute, decomposes 688 " "; 689 690 int[] rootStandardOffsets = { 691 0, 1,2, 692 2, 3,4,4, 693 4, 5,6,6, 694 6, 7,8,8, 695 8, 9,10,11, 696 12, 13,14,15, 697 16, 17,18,19, 698 20, 21,22,23, 699 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */ 700 26, 27,28,28, 701 28, 702 29 703 }; 704 705 int[] rootSearchOffsets = { 706 0, 1,2, 707 2, 3,4,4, 708 4, 5,6,6,6, 709 6, 7,8,8,8,8,8,8, 710 8, 9,10,11, 711 12, 13,14,15, 712 16, 17,18,19,20, 713 20, 21,22,22,23,23,23,24, 714 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */ 715 26, 27,28,28, 716 28, 717 29 718 }; 719 720 class TSCEItem { 721 private String localeString; 722 private int[] offsets; 723 TSCEItem(String locStr, int[] offs) { 724 localeString = locStr; 725 offsets = offs; 726 } 727 public String getLocaleString() { return localeString; } 728 public int[] getOffsets() { return offsets; } 729 } 730 final TSCEItem[] tsceItems = { 731 new TSCEItem( "root", rootStandardOffsets ), 732 new TSCEItem( "root@collation=search", rootSearchOffsets ), 733 }; 734 735 for (TSCEItem tsceItem: tsceItems) { 736 String localeString = tsceItem.getLocaleString(); 737 ULocale uloc = new ULocale(localeString); 738 RuleBasedCollator col = null; 739 try { 740 col = (RuleBasedCollator)Collator.getInstance(uloc); 741 } catch (Exception e) { 742 errln("Error: in locale " + localeString + ", err in Collator.getInstance"); 743 continue; 744 } 745 CollationElementIterator uce = col.getCollationElementIterator(tsceText); 746 int[] offsets = tsceItem.getOffsets(); 747 int ioff, noff = offsets.length; 748 int offset, element; 749 750 ioff = 0; 751 do { 752 offset = uce.getOffset(); 753 element = uce.next(); 754 logln(String.format("(%s) offset=%2d ce=%08x\n", tsceItem.localeString, offset, element)); 755 if (element == 0) { 756 errln("Error: in locale " + localeString + ", CEIterator next() returned element 0"); 757 } 758 if ( ioff < noff ) { 759 if ( offset != offsets[ioff] ) { 760 errln("Error: in locale " + localeString + ", expected CEIterator next()->getOffset " + offsets[ioff] + ", got " + offset); 761 //ioff = noff; 762 //break; 763 } 764 ioff++; 765 } else { 766 errln("Error: in locale " + localeString + ", CEIterator next() returned more elements than expected"); 767 } 768 } while (element != CollationElementIterator.NULLORDER); 769 if ( ioff < noff ) { 770 errln("Error: in locale " + localeString + ", CEIterator next() returned fewer elements than expected"); 771 } 772 773 // backwards test 774 uce.setOffset(tsceText.length()); 775 ioff = noff; 776 do { 777 offset = uce.getOffset(); 778 element = uce.previous(); 779 if (element == 0) { 780 errln("Error: in locale " + localeString + ", CEIterator previous() returned element 0"); 781 } 782 if ( ioff > 0 ) { 783 ioff--; 784 if ( offset != offsets[ioff] ) { 785 errln("Error: in locale " + localeString + ", expected CEIterator previous()->getOffset " + offsets[ioff] + ", got " + offset); 786 //ioff = 0; 787 //break; 788 } 789 } else { 790 errln("Error: in locale " + localeString + ", CEIterator previous() returned more elements than expected"); 791 } 792 } while (element != CollationElementIterator.NULLORDER); 793 if ( ioff > 0 ) { 794 errln("Error: in locale " + localeString + ", CEIterator previous() returned fewer elements than expected"); 795 } 796 } 797 } 798 } 799