1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 1996-2016, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 package android.icu.dev.test.rbbi; 11 12 import java.text.StringCharacterIterator; 13 import java.util.ArrayList; 14 import java.util.List; 15 import java.util.Locale; 16 17 import org.junit.Before; 18 import org.junit.Test; 19 20 import android.icu.dev.test.TestFmwk; 21 import android.icu.text.BreakIterator; 22 import android.icu.text.FilteredBreakIteratorBuilder; 23 import android.icu.util.ULocale; 24 25 public class BreakIteratorTest extends TestFmwk 26 { 27 private BreakIterator characterBreak; 28 private BreakIterator wordBreak; 29 private BreakIterator lineBreak; 30 private BreakIterator sentenceBreak; 31 private BreakIterator titleBreak; 32 33 public BreakIteratorTest() 34 { 35 36 } 37 38 @Before 39 public void init(){ 40 characterBreak = BreakIterator.getCharacterInstance(); 41 wordBreak = BreakIterator.getWordInstance(); 42 lineBreak = BreakIterator.getLineInstance(); 43 //logln("Creating sentence iterator..."); 44 sentenceBreak = BreakIterator.getSentenceInstance(); 45 //logln("Finished creating sentence iterator..."); 46 titleBreak = BreakIterator.getTitleInstance(); 47 } 48 //========================================================================= 49 // general test subroutines 50 //========================================================================= 51 52 private void generalIteratorTest(BreakIterator bi, List<String> expectedResult) { 53 StringBuffer buffer = new StringBuffer(); 54 String text; 55 for (int i = 0; i < expectedResult.size(); i++) { 56 text = expectedResult.get(i); 57 buffer.append(text); 58 } 59 text = buffer.toString(); 60 61 bi.setText(text); 62 63 List<String> nextResults = _testFirstAndNext(bi, text); 64 List<String> previousResults = _testLastAndPrevious(bi, text); 65 66 logln("comparing forward and backward..."); 67 //TODO(junit) - needs to be rewritten 68 //int errs = getErrorCount(); 69 compareFragmentLists("forward iteration", "backward iteration", nextResults, 70 previousResults); 71 //if (getErrorCount() == errs) { 72 logln("comparing expected and actual..."); 73 compareFragmentLists("expected result", "actual result", expectedResult, 74 nextResults); 75 logln("comparing expected and actual..."); 76 compareFragmentLists("expected result", "actual result", expectedResult, 77 nextResults); 78 //} 79 80 int[] boundaries = new int[expectedResult.size() + 3]; 81 boundaries[0] = BreakIterator.DONE; 82 boundaries[1] = 0; 83 for (int i = 0; i < expectedResult.size(); i++) 84 boundaries[i + 2] = boundaries[i + 1] + (expectedResult.get(i)). 85 length(); 86 boundaries[boundaries.length - 1] = BreakIterator.DONE; 87 88 _testFollowing(bi, text, boundaries); 89 _testPreceding(bi, text, boundaries); 90 _testIsBoundary(bi, text, boundaries); 91 92 doMultipleSelectionTest(bi, text); 93 } 94 95 private List<String> _testFirstAndNext(BreakIterator bi, String text) { 96 int p = bi.first(); 97 int lastP = p; 98 List<String> result = new ArrayList<String>(); 99 100 if (p != 0) 101 errln("first() returned " + p + " instead of 0"); 102 while (p != BreakIterator.DONE) { 103 p = bi.next(); 104 if (p != BreakIterator.DONE) { 105 if (p <= lastP) 106 errln("next() failed to move forward: next() on position " 107 + lastP + " yielded " + p); 108 109 result.add(text.substring(lastP, p)); 110 } 111 else { 112 if (lastP != text.length()) 113 errln("next() returned DONE prematurely: offset was " 114 + lastP + " instead of " + text.length()); 115 } 116 lastP = p; 117 } 118 return result; 119 } 120 121 private List<String> _testLastAndPrevious(BreakIterator bi, String text) { 122 int p = bi.last(); 123 int lastP = p; 124 List<String> result = new ArrayList<String>(); 125 126 if (p != text.length()) 127 errln("last() returned " + p + " instead of " + text.length()); 128 while (p != BreakIterator.DONE) { 129 p = bi.previous(); 130 if (p != BreakIterator.DONE) { 131 if (p >= lastP) 132 errln("previous() failed to move backward: previous() on position " 133 + lastP + " yielded " + p); 134 135 result.add(0, text.substring(p, lastP)); 136 } 137 else { 138 if (lastP != 0) 139 errln("previous() returned DONE prematurely: offset was " 140 + lastP + " instead of 0"); 141 } 142 lastP = p; 143 } 144 return result; 145 } 146 147 private void compareFragmentLists(String f1Name, String f2Name, List<String> f1, List<String> f2) { 148 int p1 = 0; 149 int p2 = 0; 150 String s1; 151 String s2; 152 int t1 = 0; 153 int t2 = 0; 154 155 while (p1 < f1.size() && p2 < f2.size()) { 156 s1 = f1.get(p1); 157 s2 = f2.get(p2); 158 t1 += s1.length(); 159 t2 += s2.length(); 160 161 if (s1.equals(s2)) { 162 debugLogln(" >" + s1 + "<"); 163 ++p1; 164 ++p2; 165 } 166 else { 167 int tempT1 = t1; 168 int tempT2 = t2; 169 int tempP1 = p1; 170 int tempP2 = p2; 171 172 while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) { 173 while (tempT1 < tempT2 && tempP1 < f1.size()) { 174 tempT1 += (f1.get(tempP1)).length(); 175 ++tempP1; 176 } 177 while (tempT2 < tempT1 && tempP2 < f2.size()) { 178 tempT2 += (f2.get(tempP2)).length(); 179 ++tempP2; 180 } 181 } 182 logln("*** " + f1Name + " has:"); 183 while (p1 <= tempP1 && p1 < f1.size()) { 184 s1 = f1.get(p1); 185 t1 += s1.length(); 186 debugLogln(" *** >" + s1 + "<"); 187 ++p1; 188 } 189 logln("***** " + f2Name + " has:"); 190 while (p2 <= tempP2 && p2 < f2.size()) { 191 s2 = f2.get(p2); 192 t2 += s2.length(); 193 debugLogln(" ***** >" + s2 + "<"); 194 ++p2; 195 } 196 errln("Discrepancy between " + f1Name + " and " + f2Name); 197 } 198 } 199 } 200 201 private void _testFollowing(BreakIterator bi, String text, int[] boundaries) { 202 logln("testFollowing():"); 203 int p = 2; 204 for (int i = 0; i <= text.length(); i++) { 205 if (i == boundaries[p]) 206 ++p; 207 208 int b = bi.following(i); 209 logln("bi.following(" + i + ") -> " + b); 210 if (b != boundaries[p]) 211 errln("Wrong result from following() for " + i + ": expected " + boundaries[p] 212 + ", got " + b); 213 } 214 } 215 216 private void _testPreceding(BreakIterator bi, String text, int[] boundaries) { 217 logln("testPreceding():"); 218 int p = 0; 219 for (int i = 0; i <= text.length(); i++) { 220 int b = bi.preceding(i); 221 logln("bi.preceding(" + i + ") -> " + b); 222 if (b != boundaries[p]) 223 errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p] 224 + ", got " + b); 225 226 if (i == boundaries[p + 1]) 227 ++p; 228 } 229 } 230 231 private void _testIsBoundary(BreakIterator bi, String text, int[] boundaries) { 232 logln("testIsBoundary():"); 233 int p = 1; 234 boolean isB; 235 for (int i = 0; i <= text.length(); i++) { 236 isB = bi.isBoundary(i); 237 logln("bi.isBoundary(" + i + ") -> " + isB); 238 239 if (i == boundaries[p]) { 240 if (!isB) 241 errln("Wrong result from isBoundary() for " + i + ": expected true, got false"); 242 ++p; 243 } 244 else { 245 if (isB) 246 errln("Wrong result from isBoundary() for " + i + ": expected false, got true"); 247 } 248 } 249 } 250 251 private void doMultipleSelectionTest(BreakIterator iterator, String testText) 252 { 253 logln("Multiple selection test..."); 254 BreakIterator testIterator = (BreakIterator)iterator.clone(); 255 int offset = iterator.first(); 256 int testOffset; 257 int count = 0; 258 259 do { 260 testOffset = testIterator.first(); 261 testOffset = testIterator.next(count); 262 logln("next(" + count + ") -> " + testOffset); 263 if (offset != testOffset) 264 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset); 265 266 if (offset != BreakIterator.DONE) { 267 count++; 268 offset = iterator.next(); 269 } 270 } while (offset != BreakIterator.DONE); 271 272 // now do it backwards... 273 offset = iterator.last(); 274 count = 0; 275 276 do { 277 testOffset = testIterator.last(); 278 testOffset = testIterator.next(count); 279 logln("next(" + count + ") -> " + testOffset); 280 if (offset != testOffset) 281 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset); 282 283 if (offset != BreakIterator.DONE) { 284 count--; 285 offset = iterator.previous(); 286 } 287 } while (offset != BreakIterator.DONE); 288 } 289 290 291 private void doOtherInvariantTest(BreakIterator tb, String testChars) 292 { 293 StringBuffer work = new StringBuffer("a\r\na"); 294 int errorCount = 0; 295 296 // a break should never occur between CR and LF 297 for (int i = 0; i < testChars.length(); i++) { 298 work.setCharAt(0, testChars.charAt(i)); 299 for (int j = 0; j < testChars.length(); j++) { 300 work.setCharAt(3, testChars.charAt(j)); 301 tb.setText(work.toString()); 302 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next()) 303 if (k == 2) { 304 errln("Break between CR and LF in string U+" + Integer.toHexString( 305 (work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString( 306 (work.charAt(3)))); 307 errorCount++; 308 if (errorCount >= 75) 309 return; 310 } 311 } 312 } 313 314 // a break should never occur before a non-spacing mark, unless it's preceded 315 // by a line terminator 316 work.setLength(0); 317 work.append("aaaa"); 318 for (int i = 0; i < testChars.length(); i++) { 319 char c = testChars.charAt(i); 320 if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003') 321 continue; 322 work.setCharAt(1, c); 323 for (int j = 0; j < testChars.length(); j++) { 324 c = testChars.charAt(j); 325 if (Character.getType(c) != Character.NON_SPACING_MARK && Character.getType(c) 326 != Character.ENCLOSING_MARK) 327 continue; 328 work.setCharAt(2, c); 329 tb.setText(work.toString()); 330 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next()) 331 if (k == 2) { 332 errln("Break between U+" + Integer.toHexString((work.charAt(1))) 333 + " and U+" + Integer.toHexString((work.charAt(2)))); 334 errorCount++; 335 if (errorCount >= 75) 336 return; 337 } 338 } 339 } 340 } 341 342 public void debugLogln(String s) { 343 final String zeros = "0000"; 344 String temp; 345 StringBuffer out = new StringBuffer(); 346 for (int i = 0; i < s.length(); i++) { 347 char c = s.charAt(i); 348 if (c >= ' ' && c < '\u007f') 349 out.append(c); 350 else { 351 out.append("\\u"); 352 temp = Integer.toHexString(c); 353 out.append(zeros.substring(0, 4 - temp.length())); 354 out.append(temp); 355 } 356 } 357 logln(out.toString()); 358 } 359 360 //========================================================================= 361 // tests 362 //========================================================================= 363 364 365 /** 366 * @bug 4097779 367 */ 368 @Test 369 public void TestBug4097779() { 370 List<String> wordSelectionData = new ArrayList<String>(2); 371 372 wordSelectionData.add("aa\u0300a"); 373 wordSelectionData.add(" "); 374 375 generalIteratorTest(wordBreak, wordSelectionData); 376 } 377 378 /** 379 * @bug 4098467 380 */ 381 @Test 382 public void TestBug4098467Words() { 383 List<String> wordSelectionData = new ArrayList<String>(); 384 385 // What follows is a string of Korean characters (I found it in the Yellow Pages 386 // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed 387 // it correctly), first as precomposed syllables, and then as conjoining jamo. 388 // Both sequences should be semantically identical and break the same way. 389 // precomposed syllables... 390 wordSelectionData.add("\uc0c1\ud56d"); 391 wordSelectionData.add(" "); 392 wordSelectionData.add("\ud55c\uc778"); 393 wordSelectionData.add(" "); 394 wordSelectionData.add("\uc5f0\ud569"); 395 wordSelectionData.add(" "); 396 wordSelectionData.add("\uc7a5\ub85c\uad50\ud68c"); 397 wordSelectionData.add(" "); 398 // conjoining jamo... 399 wordSelectionData.add("\u1109\u1161\u11bc\u1112\u1161\u11bc"); 400 wordSelectionData.add(" "); 401 wordSelectionData.add("\u1112\u1161\u11ab\u110b\u1175\u11ab"); 402 wordSelectionData.add(" "); 403 wordSelectionData.add("\u110b\u1167\u11ab\u1112\u1161\u11b8"); 404 wordSelectionData.add(" "); 405 wordSelectionData.add("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c"); 406 wordSelectionData.add(" "); 407 408 generalIteratorTest(wordBreak, wordSelectionData); 409 } 410 411 412 /** 413 * @bug 4111338 414 */ 415 @Test 416 public void TestBug4111338() { 417 List<String> sentenceSelectionData = new ArrayList<String>(); 418 419 // test for bug #4111338: Don't break sentences at the boundary between CJK 420 // and other letters 421 sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165:\"JAVA\u821c" 422 + "\u8165\u7fc8\u51ce\u306d,\u2494\u56d8\u4ec0\u60b1\u8560\u51ba" 423 + "\u611d\u57b6\u2510\u5d46\".\u2029"); 424 sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8" 425 + "\u97e4JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0" 426 + "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029"); 427 sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e4" 428 + "\u6470\u8790JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8" 429 + "\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029"); 430 sentenceSelectionData.add("He said, \"I can go there.\"\u2029"); 431 432 generalIteratorTest(sentenceBreak, sentenceSelectionData); 433 } 434 435 436 /** 437 * @bug 4143071 438 */ 439 @Test 440 public void TestBug4143071() { 441 List<String> sentenceSelectionData = new ArrayList<String>(3); 442 443 // Make sure sentences that end with digits work right 444 sentenceSelectionData.add("Today is the 27th of May, 1998. "); 445 sentenceSelectionData.add("Tomorrow will be 28 May 1998. "); 446 sentenceSelectionData.add("The day after will be the 30th.\u2029"); 447 448 generalIteratorTest(sentenceBreak, sentenceSelectionData); 449 } 450 451 /** 452 * @bug 4152416 453 */ 454 @Test 455 public void TestBug4152416() { 456 List<String> sentenceSelectionData = new ArrayList<String>(2); 457 458 // Make sure sentences ending with a capital letter are treated correctly 459 sentenceSelectionData.add("The type of all primitive " 460 + "<code>boolean</code> values accessed in the target VM. "); 461 sentenceSelectionData.add("Calls to xxx will return an " 462 + "implementor of this interface.\u2029"); 463 464 generalIteratorTest(sentenceBreak, sentenceSelectionData); 465 } 466 467 /** 468 * @bug 4152117 469 */ 470 @Test 471 public void TestBug4152117() { 472 List<String> sentenceSelectionData = new ArrayList<String>(3); 473 474 // Make sure sentence breaking is handling punctuation correctly 475 // [COULD NOT REPRODUCE THIS BUG, BUT TEST IS HERE TO MAKE SURE 476 // IT DOESN'T CROP UP] 477 sentenceSelectionData.add("Constructs a randomly generated " 478 + "BigInteger, uniformly distributed over the range <tt>0</tt> " 479 + "to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive. "); 480 sentenceSelectionData.add("The uniformity of the distribution " 481 + "assumes that a fair source of random bits is provided in " 482 + "<tt>rnd</tt>. "); 483 sentenceSelectionData.add("Note that this constructor always " 484 + "constructs a non-negative BigInteger.\u2029"); 485 486 generalIteratorTest(sentenceBreak, sentenceSelectionData); 487 } 488 489 @Test 490 public void TestLineBreak() { 491 List<String> lineSelectionData = new ArrayList<String>(); 492 493 lineSelectionData.add("Multi-"); 494 lineSelectionData.add("Level "); 495 lineSelectionData.add("example "); 496 lineSelectionData.add("of "); 497 lineSelectionData.add("a "); 498 lineSelectionData.add("semi-"); 499 lineSelectionData.add("idiotic "); 500 lineSelectionData.add("non-"); 501 lineSelectionData.add("sensical "); 502 lineSelectionData.add("(non-"); 503 lineSelectionData.add("important) "); 504 lineSelectionData.add("sentence. "); 505 506 lineSelectionData.add("Hi "); 507 lineSelectionData.add("Hello "); 508 lineSelectionData.add("How\n"); 509 lineSelectionData.add("are\r"); 510 lineSelectionData.add("you\u2028"); 511 lineSelectionData.add("fine.\t"); 512 lineSelectionData.add("good. "); 513 514 lineSelectionData.add("Now\r"); 515 lineSelectionData.add("is\n"); 516 lineSelectionData.add("the\r\n"); 517 lineSelectionData.add("time\n"); 518 lineSelectionData.add("\r"); 519 lineSelectionData.add("for\r"); 520 lineSelectionData.add("\r"); 521 lineSelectionData.add("all"); 522 523 generalIteratorTest(lineBreak, lineSelectionData); 524 } 525 526 /** 527 * @bug 4068133 528 */ 529 @Test 530 public void TestBug4068133() { 531 List<String> lineSelectionData = new ArrayList<String>(9); 532 533 lineSelectionData.add("\u96f6"); 534 lineSelectionData.add("\u4e00\u3002"); 535 lineSelectionData.add("\u4e8c\u3001"); 536 lineSelectionData.add("\u4e09\u3002\u3001"); 537 lineSelectionData.add("\u56db\u3001\u3002\u3001"); 538 lineSelectionData.add("\u4e94,"); 539 lineSelectionData.add("\u516d."); 540 lineSelectionData.add("\u4e03.\u3001,\u3002"); 541 lineSelectionData.add("\u516b"); 542 543 generalIteratorTest(lineBreak, lineSelectionData); 544 } 545 546 /** 547 * @bug 4086052 548 */ 549 @Test 550 public void TestBug4086052() { 551 List<String> lineSelectionData = new ArrayList<String>(1); 552 553 lineSelectionData.add("foo\u00a0bar "); 554 // lineSelectionData.addElement("foo\ufeffbar"); 555 556 generalIteratorTest(lineBreak, lineSelectionData); 557 } 558 559 /** 560 * @bug 4097920 561 */ 562 @Test 563 public void TestBug4097920() { 564 List<String> lineSelectionData = new ArrayList<String>(3); 565 566 lineSelectionData.add("dog,cat,mouse "); 567 lineSelectionData.add("(one)"); 568 lineSelectionData.add("(two)\n"); 569 generalIteratorTest(lineBreak, lineSelectionData); 570 } 571 572 573 574 /** 575 * @bug 4117554 576 */ 577 @Test 578 public void TestBug4117554Lines() { 579 List<String> lineSelectionData = new ArrayList<String>(3); 580 581 // Fullwidth .!? should be treated as postJwrd 582 lineSelectionData.add("\u4e01\uff0e"); 583 lineSelectionData.add("\u4e02\uff01"); 584 lineSelectionData.add("\u4e03\uff1f"); 585 586 generalIteratorTest(lineBreak, lineSelectionData); 587 } 588 589 @Test 590 public void TestLettersAndDigits() { 591 // a character sequence such as "X11" or "30F3" or "native2ascii" should 592 // be kept together as a single word 593 List<String> lineSelectionData = new ArrayList<String>(3); 594 595 lineSelectionData.add("X11 "); 596 lineSelectionData.add("30F3 "); 597 lineSelectionData.add("native2ascii"); 598 599 generalIteratorTest(lineBreak, lineSelectionData); 600 } 601 602 603 private static final String graveS = "S\u0300"; 604 private static final String acuteBelowI = "i\u0317"; 605 private static final String acuteE = "e\u0301"; 606 private static final String circumflexA = "a\u0302"; 607 private static final String tildeE = "e\u0303"; 608 609 @Test 610 public void TestCharacterBreak() { 611 List<String> characterSelectionData = new ArrayList<String>(); 612 613 characterSelectionData.add(graveS); 614 characterSelectionData.add(acuteBelowI); 615 characterSelectionData.add("m"); 616 characterSelectionData.add("p"); 617 characterSelectionData.add("l"); 618 characterSelectionData.add(acuteE); 619 characterSelectionData.add(" "); 620 characterSelectionData.add("s"); 621 characterSelectionData.add(circumflexA); 622 characterSelectionData.add("m"); 623 characterSelectionData.add("p"); 624 characterSelectionData.add("l"); 625 characterSelectionData.add(tildeE); 626 characterSelectionData.add("."); 627 characterSelectionData.add("w"); 628 characterSelectionData.add(circumflexA); 629 characterSelectionData.add("w"); 630 characterSelectionData.add("a"); 631 characterSelectionData.add("f"); 632 characterSelectionData.add("q"); 633 characterSelectionData.add("\n"); 634 characterSelectionData.add("\r"); 635 characterSelectionData.add("\r\n"); 636 characterSelectionData.add("\n"); 637 638 generalIteratorTest(characterBreak, characterSelectionData); 639 } 640 641 /** 642 * @bug 4098467 643 */ 644 @Test 645 public void TestBug4098467Characters() { 646 List<String> characterSelectionData = new ArrayList<String>(); 647 648 // What follows is a string of Korean characters (I found it in the Yellow Pages 649 // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed 650 // it correctly), first as precomposed syllables, and then as conjoining jamo. 651 // Both sequences should be semantically identical and break the same way. 652 // precomposed syllables... 653 characterSelectionData.add("\uc0c1"); 654 characterSelectionData.add("\ud56d"); 655 characterSelectionData.add(" "); 656 characterSelectionData.add("\ud55c"); 657 characterSelectionData.add("\uc778"); 658 characterSelectionData.add(" "); 659 characterSelectionData.add("\uc5f0"); 660 characterSelectionData.add("\ud569"); 661 characterSelectionData.add(" "); 662 characterSelectionData.add("\uc7a5"); 663 characterSelectionData.add("\ub85c"); 664 characterSelectionData.add("\uad50"); 665 characterSelectionData.add("\ud68c"); 666 characterSelectionData.add(" "); 667 // conjoining jamo... 668 characterSelectionData.add("\u1109\u1161\u11bc"); 669 characterSelectionData.add("\u1112\u1161\u11bc"); 670 characterSelectionData.add(" "); 671 characterSelectionData.add("\u1112\u1161\u11ab"); 672 characterSelectionData.add("\u110b\u1175\u11ab"); 673 characterSelectionData.add(" "); 674 characterSelectionData.add("\u110b\u1167\u11ab"); 675 characterSelectionData.add("\u1112\u1161\u11b8"); 676 characterSelectionData.add(" "); 677 characterSelectionData.add("\u110c\u1161\u11bc"); 678 characterSelectionData.add("\u1105\u1169"); 679 characterSelectionData.add("\u1100\u116d"); 680 characterSelectionData.add("\u1112\u116c"); 681 682 generalIteratorTest(characterBreak, characterSelectionData); 683 } 684 685 @Test 686 public void TestTitleBreak() 687 { 688 List<String> titleData = new ArrayList<String>(); 689 titleData.add(" "); 690 titleData.add("This "); 691 titleData.add("is "); 692 titleData.add("a "); 693 titleData.add("simple "); 694 titleData.add("sample "); 695 titleData.add("sentence. "); 696 titleData.add("This "); 697 698 generalIteratorTest(titleBreak, titleData); 699 } 700 701 702 703 /* 704 * @bug 4153072 705 */ 706 @Test 707 public void TestBug4153072() { 708 BreakIterator iter = BreakIterator.getWordInstance(); 709 String str = "...Hello, World!..."; 710 int begin = 3; 711 int end = str.length() - 3; 712 // not used boolean gotException = false; 713 714 715 iter.setText(new StringCharacterIterator(str, begin, end, begin)); 716 for (int index = -1; index < begin + 1; ++index) { 717 try { 718 iter.isBoundary(index); 719 if (index < begin) 720 errln("Didn't get exception with offset = " + index + 721 " and begin index = " + begin); 722 } 723 catch (IllegalArgumentException e) { 724 if (index >= begin) 725 errln("Got exception with offset = " + index + 726 " and begin index = " + begin); 727 } 728 } 729 } 730 731 732 @Test 733 public void TestBug4146175Lines() { 734 List<String> lineSelectionData = new ArrayList<String>(2); 735 736 // the fullwidth comma should stick to the preceding Japanese character 737 lineSelectionData.add("\u7d42\uff0c"); 738 lineSelectionData.add("\u308f"); 739 740 generalIteratorTest(lineBreak, lineSelectionData); 741 } 742 743 private static final String cannedTestChars 744 = "\u0000\u0001\u0002\u0003\u0004 !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2" 745 + "\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ad\u00ae\u00af\u00b0\u00b2\u00b3" 746 + "\u00b4\u00b9\u00bb\u00bc\u00bd\u02b0\u02b1\u02b2\u02b3\u02b4\u0300\u0301\u0302\u0303" 747 + "\u0304\u05d0\u05d1\u05d2\u05d3\u05d4\u0903\u093e\u093f\u0940\u0949\u0f3a\u0f3b\u2000" 748 + "\u2001\u2002\u200c\u200d\u200e\u200f\u2010\u2011\u2012\u2028\u2029\u202a\u203e\u203f" 749 + "\u2040\u20dd\u20de\u20df\u20e0\u2160\u2161\u2162\u2163\u2164"; 750 751 @Test 752 public void TestSentenceInvariants() 753 { 754 BreakIterator e = BreakIterator.getSentenceInstance(); 755 doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff"); 756 } 757 758 @Test 759 public void TestEmptyString() 760 { 761 String text = ""; 762 List<String> x = new ArrayList<String>(1); 763 x.add(text); 764 765 generalIteratorTest(lineBreak, x); 766 } 767 768 @Test 769 public void TestGetAvailableLocales() 770 { 771 Locale[] locList = BreakIterator.getAvailableLocales(); 772 773 if (locList.length == 0) 774 errln("getAvailableLocales() returned an empty list!"); 775 // I have no idea how to test this function... 776 777 android.icu.util.ULocale[] ulocList = BreakIterator.getAvailableULocales(); 778 if (ulocList.length == 0) { 779 errln("getAvailableULocales() returned an empty list!"); 780 } else { 781 logln("getAvailableULocales() returned " + ulocList.length + " locales"); 782 } 783 } 784 785 786 /** 787 * @bug 4068137 788 */ 789 @Test 790 public void TestEndBehavior() 791 { 792 String testString = "boo."; 793 BreakIterator wb = BreakIterator.getWordInstance(); 794 wb.setText(testString); 795 796 if (wb.first() != 0) 797 errln("Didn't get break at beginning of string."); 798 if (wb.next() != 3) 799 errln("Didn't get break before period in \"boo.\""); 800 if (wb.current() != 4 && wb.next() != 4) 801 errln("Didn't get break at end of string."); 802 } 803 804 // The Following two tests are ported from ICU4C 1.8.1 [Richard/GCL] 805 /** 806 * Port From: ICU4C v1.8.1 : textbounds : IntlTestTextBoundary 807 * Source File: $ICU4CRoot/source/test/intltest/ittxtbd.cpp 808 **/ 809 /** 810 * test methods preceding, following and isBoundary 811 **/ 812 @Test 813 public void TestPreceding() { 814 String words3 = "aaa bbb ccc"; 815 BreakIterator e = BreakIterator.getWordInstance(Locale.getDefault()); 816 e.setText( words3 ); 817 e.first(); 818 int p1 = e.next(); 819 int p2 = e.next(); 820 int p3 = e.next(); 821 int p4 = e.next(); 822 823 int f = e.following(p2+1); 824 int p = e.preceding(p2+1); 825 if (f!=p3) 826 errln("IntlTestTextBoundary::TestPreceding: f!=p3"); 827 if (p!=p2) 828 errln("IntlTestTextBoundary::TestPreceding: p!=p2"); 829 830 if (p1+1!=p2) 831 errln("IntlTestTextBoundary::TestPreceding: p1+1!=p2"); 832 833 if (p3+1!=p4) 834 errln("IntlTestTextBoundary::TestPreceding: p3+1!=p4"); 835 836 if (!e.isBoundary(p2) || e.isBoundary(p2+1) || !e.isBoundary(p3)) 837 { 838 errln("IntlTestTextBoundary::TestPreceding: isBoundary err"); 839 } 840 } 841 842 843 /** 844 * Bug 4450804 845 */ 846 @Test 847 public void TestLineBreakContractions() { 848 List<String> expected = new ArrayList<String>(7); 849 expected.add("These "); 850 expected.add("are "); 851 expected.add("'foobles'. "); 852 expected.add("Don't "); 853 expected.add("you "); 854 expected.add("like "); 855 expected.add("them?"); 856 generalIteratorTest(lineBreak, expected); 857 } 858 859 /** 860 * Ticket#5615 861 */ 862 @Test 863 public void TestT5615() { 864 android.icu.util.ULocale[] ulocales = BreakIterator.getAvailableULocales(); 865 int type = 0; 866 android.icu.util.ULocale loc = null; 867 try { 868 for (int i = 0; i < ulocales.length; i++) { 869 loc = ulocales[i]; 870 for (type = 0; type < 5 /* 5 = BreakIterator.KIND_COUNT */; ++type) { 871 BreakIterator brk = BreakIterator.getBreakInstance(loc, type); 872 if (brk == null) { 873 errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc); 874 } 875 } 876 } 877 } catch (Exception e) { 878 errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc + " / exception: " + e.getMessage()); 879 } 880 } 881 882 /** 883 * At present, Japanese doesn't have exceptions. 884 * However, this still should not fail. 885 */ 886 @Test 887 public void TestFilteredJapanese() { 888 ULocale loc = ULocale.JAPANESE; 889 BreakIterator brk = FilteredBreakIteratorBuilder 890 .createInstance(loc) 891 .build(BreakIterator.getSentenceInstance(loc)); 892 brk.setText(""); 893 assertEquals("Starting point", 0, brk.current()); 894 assertEquals("Next point", 5, brk.next()); 895 assertEquals("Last point", BreakIterator.DONE, brk.next()); 896 } 897 898 /* 899 * Test case for Ticket#10721. BreakIterator factory method should throw NPE 900 * when specified locale is null. 901 */ 902 @Test 903 public void TestNullLocale() { 904 Locale loc = null; 905 ULocale uloc = null; 906 907 @SuppressWarnings("unused") 908 BreakIterator brk; 909 910 // Character 911 try { 912 brk = BreakIterator.getCharacterInstance(loc); 913 errln("getCharacterInstance((Locale)null) did not throw NPE."); 914 } catch (NullPointerException e) { /* OK */ } 915 try { 916 brk = BreakIterator.getCharacterInstance(uloc); 917 errln("getCharacterInstance((ULocale)null) did not throw NPE."); 918 } catch (NullPointerException e) { /* OK */ } 919 920 // Line 921 try { 922 brk = BreakIterator.getLineInstance(loc); 923 errln("getLineInstance((Locale)null) did not throw NPE."); 924 } catch (NullPointerException e) { /* OK */ } 925 try { 926 brk = BreakIterator.getLineInstance(uloc); 927 errln("getLineInstance((ULocale)null) did not throw NPE."); 928 } catch (NullPointerException e) { /* OK */ } 929 930 // Sentence 931 try { 932 brk = BreakIterator.getSentenceInstance(loc); 933 errln("getSentenceInstance((Locale)null) did not throw NPE."); 934 } catch (NullPointerException e) { /* OK */ } 935 try { 936 brk = BreakIterator.getSentenceInstance(uloc); 937 errln("getSentenceInstance((ULocale)null) did not throw NPE."); 938 } catch (NullPointerException e) { /* OK */ } 939 940 // Title 941 try { 942 brk = BreakIterator.getTitleInstance(loc); 943 errln("getTitleInstance((Locale)null) did not throw NPE."); 944 } catch (NullPointerException e) { /* OK */ } 945 try { 946 brk = BreakIterator.getTitleInstance(uloc); 947 errln("getTitleInstance((ULocale)null) did not throw NPE."); 948 } catch (NullPointerException e) { /* OK */ } 949 950 // Word 951 try { 952 brk = BreakIterator.getWordInstance(loc); 953 errln("getWordInstance((Locale)null) did not throw NPE."); 954 } catch (NullPointerException e) { /* OK */ } 955 try { 956 brk = BreakIterator.getWordInstance(uloc); 957 errln("getWordInstance((ULocale)null) did not throw NPE."); 958 } catch (NullPointerException e) { /* OK */ } 959 } 960 961 /** 962 * Test FilteredBreakIteratorBuilder newly introduced 963 */ 964 @Test 965 public void TestFilteredBreakIteratorBuilder() { 966 FilteredBreakIteratorBuilder builder; 967 BreakIterator baseBI; 968 BreakIterator filteredBI; 969 970 String text = "In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."; // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited. 971 String ABBR_MR = "Mr."; 972 String ABBR_CAPT = "Capt."; 973 974 { 975 logln("Constructing empty builder\n"); 976 builder = FilteredBreakIteratorBuilder.createInstance(); 977 978 logln("Constructing base BI\n"); 979 baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH); 980 981 logln("Building new BI\n"); 982 filteredBI = builder.build(baseBI); 983 984 assertDefaultBreakBehavior(filteredBI, text); 985 } 986 987 { 988 logln("Constructing empty builder\n"); 989 builder = FilteredBreakIteratorBuilder.createInstance(); 990 991 logln("Adding Mr. as an exception\n"); 992 993 assertEquals("2.1 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR)); 994 assertEquals("2.2 suppressBreakAfter", false, builder.suppressBreakAfter(ABBR_MR)); 995 assertEquals("2.3 unsuppressBreakAfter", true, builder.unsuppressBreakAfter(ABBR_MR)); 996 assertEquals("2.4 unsuppressBreakAfter", false, builder.unsuppressBreakAfter(ABBR_MR)); 997 assertEquals("2.5 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR)); 998 999 logln("Constructing base BI\n"); 1000 baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH); 1001 1002 logln("Building new BI\n"); 1003 filteredBI = builder.build(baseBI); 1004 1005 logln("Testing:"); 1006 filteredBI.setText(text); 1007 assertEquals("2nd next", 84, filteredBI.next()); 1008 assertEquals("2nd next", 90, filteredBI.next()); 1009 assertEquals("2nd next", 278, filteredBI.next()); 1010 filteredBI.first(); 1011 } 1012 1013 1014 { 1015 logln("Constructing empty builder\n"); 1016 builder = FilteredBreakIteratorBuilder.createInstance(); 1017 1018 logln("Adding Mr. and Capt as an exception\n"); 1019 assertEquals("3.1 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR)); 1020 assertEquals("3.2 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_CAPT)); 1021 1022 logln("Constructing base BI\n"); 1023 baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH); 1024 1025 logln("Building new BI\n"); 1026 filteredBI = builder.build(baseBI); 1027 1028 logln("Testing:"); 1029 filteredBI.setText(text); 1030 assertEquals("3rd next", 84, filteredBI.next()); 1031 assertEquals("3rd next", 278, filteredBI.next()); 1032 filteredBI.first(); 1033 } 1034 1035 { 1036 logln("Constructing English builder\n"); 1037 builder = FilteredBreakIteratorBuilder.createInstance(ULocale.ENGLISH); 1038 1039 logln("Constructing base BI\n"); 1040 baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH); 1041 1042 logln("unsuppressing 'Capt'"); 1043 assertEquals("1st suppressBreakAfter", true, builder.unsuppressBreakAfter(ABBR_CAPT)); 1044 1045 logln("Building new BI\n"); 1046 filteredBI = builder.build(baseBI); 1047 1048 if(filteredBI != null) { 1049 logln("Testing:"); 1050 filteredBI.setText(text); 1051 assertEquals("4th next", 84, filteredBI.next()); 1052 assertEquals("4th next", 90, filteredBI.next()); 1053 assertEquals("4th next", 278, filteredBI.next()); 1054 filteredBI.first(); 1055 } 1056 } 1057 1058 { 1059 logln("Constructing English builder\n"); 1060 builder = FilteredBreakIteratorBuilder.createInstance(ULocale.ENGLISH); 1061 1062 logln("Constructing base BI\n"); 1063 baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH); 1064 1065 logln("Building new BI\n"); 1066 filteredBI = builder.build(baseBI); 1067 1068 if(filteredBI != null) { 1069 assertEnglishBreakBehavior(filteredBI, text); 1070 } 1071 } 1072 1073 { 1074 logln("Constructing English @ss=standard\n"); 1075 filteredBI = BreakIterator.getSentenceInstance(ULocale.forLanguageTag("en-US-u-ss-standard")); 1076 1077 if(filteredBI != null) { 1078 assertEnglishBreakBehavior(filteredBI, text); 1079 } 1080 } 1081 1082 { 1083 logln("Constructing Afrikaans @ss=standard - should be == default\n"); 1084 filteredBI = BreakIterator.getSentenceInstance(ULocale.forLanguageTag("af-u-ss-standard")); 1085 1086 assertDefaultBreakBehavior(filteredBI, text); 1087 } 1088 1089 { 1090 logln("Constructing Japanese @ss=standard - should be == default\n"); 1091 filteredBI = BreakIterator.getSentenceInstance(ULocale.forLanguageTag("ja-u-ss-standard")); 1092 1093 assertDefaultBreakBehavior(filteredBI, text); 1094 } 1095 { 1096 logln("Constructing tfg @ss=standard - should be == default\n"); 1097 filteredBI = BreakIterator.getSentenceInstance(ULocale.forLanguageTag("tfg-u-ss-standard")); 1098 1099 assertDefaultBreakBehavior(filteredBI, text); 1100 } 1101 1102 { 1103 logln("Constructing French builder"); 1104 builder = FilteredBreakIteratorBuilder.createInstance(ULocale.FRENCH); 1105 1106 logln("Constructing base BI\n"); 1107 baseBI = BreakIterator.getSentenceInstance(Locale.FRENCH); 1108 1109 logln("Building new BI\n"); 1110 filteredBI = builder.build(baseBI); 1111 1112 if(filteredBI != null) { 1113 assertFrenchBreakBehavior(filteredBI, text); 1114 } 1115 } 1116 } 1117 1118 /** 1119 * @param filteredBI 1120 * @param text 1121 */ 1122 private void assertFrenchBreakBehavior(BreakIterator filteredBI, String text) { 1123 logln("Testing French behavior:"); 1124 filteredBI.setText(text); 1125 assertEquals("6th next", 20, filteredBI.next()); 1126 assertEquals("6th next", 84, filteredBI.next()); 1127 filteredBI.first(); 1128 } 1129 1130 /** 1131 * @param filteredBI 1132 * @param text 1133 */ 1134 private void assertEnglishBreakBehavior(BreakIterator filteredBI, String text) { 1135 logln("Testing English filtered behavior:"); 1136 filteredBI.setText(text); 1137 1138 assertEquals("5th next", 84, filteredBI.next()); 1139 assertEquals("5th next", 278, filteredBI.next()); 1140 filteredBI.first(); 1141 } 1142 1143 /** 1144 * @param filteredBI 1145 * @param text 1146 */ 1147 private void assertDefaultBreakBehavior(BreakIterator filteredBI, String text) { 1148 logln("Testing Default Behavior:"); 1149 filteredBI.setText(text); 1150 assertEquals("1st next", 20, filteredBI.next()); 1151 assertEquals("1st next", 84, filteredBI.next()); 1152 assertEquals("1st next", 90, filteredBI.next()); 1153 assertEquals("1st next", 181, filteredBI.next()); 1154 assertEquals("1st next", 278, filteredBI.next()); 1155 filteredBI.first(); 1156 } 1157 } 1158