Home | History | Annotate | Download | only in rbbi
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 1996-2016, International Business Machines Corporation and    *
      7  * others. All Rights Reserved.                                                *
      8  *******************************************************************************
      9  */
     10 package android.icu.dev.test.rbbi;
     11 
     12 import java.text.StringCharacterIterator;
     13 import java.util.ArrayList;
     14 import java.util.List;
     15 import java.util.Locale;
     16 
     17 import org.junit.Before;
     18 import org.junit.Test;
     19 
     20 import android.icu.dev.test.TestFmwk;
     21 import android.icu.text.BreakIterator;
     22 import android.icu.text.FilteredBreakIteratorBuilder;
     23 import android.icu.util.ULocale;
     24 
     25 public class BreakIteratorTest extends TestFmwk
     26 {
     27     private BreakIterator characterBreak;
     28     private BreakIterator wordBreak;
     29     private BreakIterator lineBreak;
     30     private BreakIterator sentenceBreak;
     31     private BreakIterator titleBreak;
     32 
     33     public BreakIteratorTest()
     34     {
     35 
     36     }
     37 
     38     @Before
     39     public void init(){
     40         characterBreak = BreakIterator.getCharacterInstance();
     41         wordBreak = BreakIterator.getWordInstance();
     42         lineBreak = BreakIterator.getLineInstance();
     43         //logln("Creating sentence iterator...");
     44         sentenceBreak = BreakIterator.getSentenceInstance();
     45         //logln("Finished creating sentence iterator...");
     46         titleBreak = BreakIterator.getTitleInstance();
     47     }
     48     //=========================================================================
     49     // general test subroutines
     50     //=========================================================================
     51 
     52     private void generalIteratorTest(BreakIterator bi, List<String> expectedResult) {
     53         StringBuffer buffer = new StringBuffer();
     54         String text;
     55         for (int i = 0; i < expectedResult.size(); i++) {
     56             text = expectedResult.get(i);
     57             buffer.append(text);
     58         }
     59         text = buffer.toString();
     60 
     61         bi.setText(text);
     62 
     63         List<String> nextResults = _testFirstAndNext(bi, text);
     64         List<String> previousResults = _testLastAndPrevious(bi, text);
     65 
     66         logln("comparing forward and backward...");
     67         //TODO(junit) - needs to be rewritten
     68         //int errs = getErrorCount();
     69         compareFragmentLists("forward iteration", "backward iteration", nextResults,
     70                         previousResults);
     71         //if (getErrorCount() == errs) {
     72         logln("comparing expected and actual...");
     73         compareFragmentLists("expected result", "actual result", expectedResult,
     74                         nextResults);
     75         logln("comparing expected and actual...");
     76         compareFragmentLists("expected result", "actual result", expectedResult,
     77                             nextResults);
     78         //}
     79 
     80         int[] boundaries = new int[expectedResult.size() + 3];
     81         boundaries[0] = BreakIterator.DONE;
     82         boundaries[1] = 0;
     83         for (int i = 0; i < expectedResult.size(); i++)
     84             boundaries[i + 2] = boundaries[i + 1] + (expectedResult.get(i)).
     85                             length();
     86         boundaries[boundaries.length - 1] = BreakIterator.DONE;
     87 
     88         _testFollowing(bi, text, boundaries);
     89         _testPreceding(bi, text, boundaries);
     90         _testIsBoundary(bi, text, boundaries);
     91 
     92         doMultipleSelectionTest(bi, text);
     93     }
     94 
     95     private List<String> _testFirstAndNext(BreakIterator bi, String text) {
     96         int p = bi.first();
     97         int lastP = p;
     98         List<String> result = new ArrayList<String>();
     99 
    100         if (p != 0)
    101             errln("first() returned " + p + " instead of 0");
    102         while (p != BreakIterator.DONE) {
    103             p = bi.next();
    104             if (p != BreakIterator.DONE) {
    105                 if (p <= lastP)
    106                     errln("next() failed to move forward: next() on position "
    107                                     + lastP + " yielded " + p);
    108 
    109                 result.add(text.substring(lastP, p));
    110             }
    111             else {
    112                 if (lastP != text.length())
    113                     errln("next() returned DONE prematurely: offset was "
    114                                     + lastP + " instead of " + text.length());
    115             }
    116             lastP = p;
    117         }
    118         return result;
    119     }
    120 
    121     private List<String> _testLastAndPrevious(BreakIterator bi, String text) {
    122         int p = bi.last();
    123         int lastP = p;
    124         List<String> result = new ArrayList<String>();
    125 
    126         if (p != text.length())
    127             errln("last() returned " + p + " instead of " + text.length());
    128         while (p != BreakIterator.DONE) {
    129             p = bi.previous();
    130             if (p != BreakIterator.DONE) {
    131                 if (p >= lastP)
    132                     errln("previous() failed to move backward: previous() on position "
    133                                     + lastP + " yielded " + p);
    134 
    135                 result.add(0, text.substring(p, lastP));
    136             }
    137             else {
    138                 if (lastP != 0)
    139                     errln("previous() returned DONE prematurely: offset was "
    140                                     + lastP + " instead of 0");
    141             }
    142             lastP = p;
    143         }
    144         return result;
    145     }
    146 
    147     private void compareFragmentLists(String f1Name, String f2Name, List<String> f1, List<String> f2) {
    148         int p1 = 0;
    149         int p2 = 0;
    150         String s1;
    151         String s2;
    152         int t1 = 0;
    153         int t2 = 0;
    154 
    155         while (p1 < f1.size() && p2 < f2.size()) {
    156             s1 = f1.get(p1);
    157             s2 = f2.get(p2);
    158             t1 += s1.length();
    159             t2 += s2.length();
    160 
    161             if (s1.equals(s2)) {
    162                 debugLogln("   >" + s1 + "<");
    163                 ++p1;
    164                 ++p2;
    165             }
    166             else {
    167                 int tempT1 = t1;
    168                 int tempT2 = t2;
    169                 int tempP1 = p1;
    170                 int tempP2 = p2;
    171 
    172                 while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) {
    173                     while (tempT1 < tempT2 && tempP1 < f1.size()) {
    174                         tempT1 += (f1.get(tempP1)).length();
    175                         ++tempP1;
    176                     }
    177                     while (tempT2 < tempT1 && tempP2 < f2.size()) {
    178                         tempT2 += (f2.get(tempP2)).length();
    179                         ++tempP2;
    180                     }
    181                 }
    182                 logln("*** " + f1Name + " has:");
    183                 while (p1 <= tempP1 && p1 < f1.size()) {
    184                     s1 = f1.get(p1);
    185                     t1 += s1.length();
    186                     debugLogln(" *** >" + s1 + "<");
    187                     ++p1;
    188                 }
    189                 logln("***** " + f2Name + " has:");
    190                 while (p2 <= tempP2 && p2 < f2.size()) {
    191                     s2 = f2.get(p2);
    192                     t2 += s2.length();
    193                     debugLogln(" ***** >" + s2 + "<");
    194                     ++p2;
    195                 }
    196                 errln("Discrepancy between " + f1Name + " and " + f2Name);
    197             }
    198         }
    199     }
    200 
    201     private void _testFollowing(BreakIterator bi, String text, int[] boundaries) {
    202         logln("testFollowing():");
    203         int p = 2;
    204         for (int i = 0; i <= text.length(); i++) {
    205             if (i == boundaries[p])
    206                 ++p;
    207 
    208             int b = bi.following(i);
    209             logln("bi.following(" + i + ") -> " + b);
    210             if (b != boundaries[p])
    211                 errln("Wrong result from following() for " + i + ": expected " + boundaries[p]
    212                                 + ", got " + b);
    213         }
    214     }
    215 
    216     private void _testPreceding(BreakIterator bi, String text, int[] boundaries) {
    217         logln("testPreceding():");
    218         int p = 0;
    219         for (int i = 0; i <= text.length(); i++) {
    220             int b = bi.preceding(i);
    221             logln("bi.preceding(" + i + ") -> " + b);
    222             if (b != boundaries[p])
    223                 errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
    224                                 + ", got " + b);
    225 
    226             if (i == boundaries[p + 1])
    227                 ++p;
    228         }
    229     }
    230 
    231     private void _testIsBoundary(BreakIterator bi, String text, int[] boundaries) {
    232         logln("testIsBoundary():");
    233         int p = 1;
    234         boolean isB;
    235         for (int i = 0; i <= text.length(); i++) {
    236             isB = bi.isBoundary(i);
    237             logln("bi.isBoundary(" + i + ") -> " + isB);
    238 
    239             if (i == boundaries[p]) {
    240                 if (!isB)
    241                     errln("Wrong result from isBoundary() for " + i + ": expected true, got false");
    242                 ++p;
    243             }
    244             else {
    245                 if (isB)
    246                     errln("Wrong result from isBoundary() for " + i + ": expected false, got true");
    247             }
    248         }
    249     }
    250 
    251     private void doMultipleSelectionTest(BreakIterator iterator, String testText)
    252     {
    253         logln("Multiple selection test...");
    254         BreakIterator testIterator = (BreakIterator)iterator.clone();
    255         int offset = iterator.first();
    256         int testOffset;
    257         int count = 0;
    258 
    259         do {
    260             testOffset = testIterator.first();
    261             testOffset = testIterator.next(count);
    262             logln("next(" + count + ") -> " + testOffset);
    263             if (offset != testOffset)
    264                 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
    265 
    266             if (offset != BreakIterator.DONE) {
    267                 count++;
    268                 offset = iterator.next();
    269             }
    270         } while (offset != BreakIterator.DONE);
    271 
    272         // now do it backwards...
    273         offset = iterator.last();
    274         count = 0;
    275 
    276         do {
    277             testOffset = testIterator.last();
    278             testOffset = testIterator.next(count);
    279             logln("next(" + count + ") -> " + testOffset);
    280             if (offset != testOffset)
    281                 errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
    282 
    283             if (offset != BreakIterator.DONE) {
    284                 count--;
    285                 offset = iterator.previous();
    286             }
    287         } while (offset != BreakIterator.DONE);
    288     }
    289 
    290 
    291     private void doOtherInvariantTest(BreakIterator tb, String testChars)
    292     {
    293         StringBuffer work = new StringBuffer("a\r\na");
    294         int errorCount = 0;
    295 
    296         // a break should never occur between CR and LF
    297         for (int i = 0; i < testChars.length(); i++) {
    298             work.setCharAt(0, testChars.charAt(i));
    299             for (int j = 0; j < testChars.length(); j++) {
    300                 work.setCharAt(3, testChars.charAt(j));
    301                 tb.setText(work.toString());
    302                 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
    303                     if (k == 2) {
    304                         errln("Break between CR and LF in string U+" + Integer.toHexString(
    305                                 (work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString(
    306                                 (work.charAt(3))));
    307                         errorCount++;
    308                         if (errorCount >= 75)
    309                             return;
    310                     }
    311             }
    312         }
    313 
    314         // a break should never occur before a non-spacing mark, unless it's preceded
    315         // by a line terminator
    316         work.setLength(0);
    317         work.append("aaaa");
    318         for (int i = 0; i < testChars.length(); i++) {
    319             char c = testChars.charAt(i);
    320             if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003')
    321                 continue;
    322             work.setCharAt(1, c);
    323             for (int j = 0; j < testChars.length(); j++) {
    324                 c = testChars.charAt(j);
    325                 if (Character.getType(c) != Character.NON_SPACING_MARK && Character.getType(c)
    326                         != Character.ENCLOSING_MARK)
    327                     continue;
    328                 work.setCharAt(2, c);
    329                 tb.setText(work.toString());
    330                 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
    331                     if (k == 2) {
    332                         errln("Break between U+" + Integer.toHexString((work.charAt(1)))
    333                                 + " and U+" + Integer.toHexString((work.charAt(2))));
    334                         errorCount++;
    335                         if (errorCount >= 75)
    336                             return;
    337                     }
    338             }
    339         }
    340     }
    341 
    342     public void debugLogln(String s) {
    343         final String zeros = "0000";
    344         String temp;
    345         StringBuffer out = new StringBuffer();
    346         for (int i = 0; i < s.length(); i++) {
    347             char c = s.charAt(i);
    348             if (c >= ' ' && c < '\u007f')
    349                 out.append(c);
    350             else {
    351                 out.append("\\u");
    352                 temp = Integer.toHexString(c);
    353                 out.append(zeros.substring(0, 4 - temp.length()));
    354                 out.append(temp);
    355             }
    356         }
    357         logln(out.toString());
    358     }
    359 
    360     //=========================================================================
    361     // tests
    362     //=========================================================================
    363 
    364 
    365     /**
    366      * @bug 4097779
    367      */
    368     @Test
    369     public void TestBug4097779() {
    370         List<String> wordSelectionData = new ArrayList<String>(2);
    371 
    372         wordSelectionData.add("aa\u0300a");
    373         wordSelectionData.add(" ");
    374 
    375         generalIteratorTest(wordBreak, wordSelectionData);
    376     }
    377 
    378     /**
    379      * @bug 4098467
    380      */
    381     @Test
    382     public void TestBug4098467Words() {
    383         List<String> wordSelectionData = new ArrayList<String>();
    384 
    385         // What follows is a string of Korean characters (I found it in the Yellow Pages
    386         // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
    387         // it correctly), first as precomposed syllables, and then as conjoining jamo.
    388         // Both sequences should be semantically identical and break the same way.
    389         // precomposed syllables...
    390         wordSelectionData.add("\uc0c1\ud56d");
    391         wordSelectionData.add(" ");
    392         wordSelectionData.add("\ud55c\uc778");
    393         wordSelectionData.add(" ");
    394         wordSelectionData.add("\uc5f0\ud569");
    395         wordSelectionData.add(" ");
    396         wordSelectionData.add("\uc7a5\ub85c\uad50\ud68c");
    397         wordSelectionData.add(" ");
    398         // conjoining jamo...
    399         wordSelectionData.add("\u1109\u1161\u11bc\u1112\u1161\u11bc");
    400         wordSelectionData.add(" ");
    401         wordSelectionData.add("\u1112\u1161\u11ab\u110b\u1175\u11ab");
    402         wordSelectionData.add(" ");
    403         wordSelectionData.add("\u110b\u1167\u11ab\u1112\u1161\u11b8");
    404         wordSelectionData.add(" ");
    405         wordSelectionData.add("\u110c\u1161\u11bc\u1105\u1169\u1100\u116d\u1112\u116c");
    406         wordSelectionData.add(" ");
    407 
    408         generalIteratorTest(wordBreak, wordSelectionData);
    409     }
    410 
    411 
    412     /**
    413      * @bug 4111338
    414      */
    415     @Test
    416     public void TestBug4111338() {
    417         List<String> sentenceSelectionData = new ArrayList<String>();
    418 
    419         // test for bug #4111338: Don't break sentences at the boundary between CJK
    420         // and other letters
    421         sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165:\"JAVA\u821c"
    422                 + "\u8165\u7fc8\u51ce\u306d,\u2494\u56d8\u4ec0\u60b1\u8560\u51ba"
    423                 + "\u611d\u57b6\u2510\u5d46\".\u2029");
    424         sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8"
    425                 + "\u97e4JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0"
    426                 + "\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");
    427         sentenceSelectionData.add("\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e4"
    428                 + "\u6470\u8790JAVA\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8"
    429                 + "\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u2029");
    430         sentenceSelectionData.add("He said, \"I can go there.\"\u2029");
    431 
    432         generalIteratorTest(sentenceBreak, sentenceSelectionData);
    433     }
    434 
    435 
    436     /**
    437      * @bug 4143071
    438      */
    439     @Test
    440     public void TestBug4143071() {
    441         List<String> sentenceSelectionData = new ArrayList<String>(3);
    442 
    443         // Make sure sentences that end with digits work right
    444         sentenceSelectionData.add("Today is the 27th of May, 1998.  ");
    445         sentenceSelectionData.add("Tomorrow will be 28 May 1998.  ");
    446         sentenceSelectionData.add("The day after will be the 30th.\u2029");
    447 
    448         generalIteratorTest(sentenceBreak, sentenceSelectionData);
    449     }
    450 
    451     /**
    452      * @bug 4152416
    453      */
    454     @Test
    455     public void TestBug4152416() {
    456         List<String> sentenceSelectionData = new ArrayList<String>(2);
    457 
    458         // Make sure sentences ending with a capital letter are treated correctly
    459         sentenceSelectionData.add("The type of all primitive "
    460                 + "<code>boolean</code> values accessed in the target VM.  ");
    461         sentenceSelectionData.add("Calls to xxx will return an "
    462                 + "implementor of this interface.\u2029");
    463 
    464         generalIteratorTest(sentenceBreak, sentenceSelectionData);
    465     }
    466 
    467     /**
    468      * @bug 4152117
    469      */
    470     @Test
    471     public void TestBug4152117() {
    472         List<String> sentenceSelectionData = new ArrayList<String>(3);
    473 
    474         // Make sure sentence breaking is handling punctuation correctly
    475         // [COULD NOT REPRODUCE THIS BUG, BUT TEST IS HERE TO MAKE SURE
    476         // IT DOESN'T CROP UP]
    477         sentenceSelectionData.add("Constructs a randomly generated "
    478                 + "BigInteger, uniformly distributed over the range <tt>0</tt> "
    479                 + "to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive.  ");
    480         sentenceSelectionData.add("The uniformity of the distribution "
    481                 + "assumes that a fair source of random bits is provided in "
    482                 + "<tt>rnd</tt>.  ");
    483         sentenceSelectionData.add("Note that this constructor always "
    484                 + "constructs a non-negative BigInteger.\u2029");
    485 
    486         generalIteratorTest(sentenceBreak, sentenceSelectionData);
    487     }
    488 
    489     @Test
    490     public void TestLineBreak() {
    491         List<String> lineSelectionData = new ArrayList<String>();
    492 
    493         lineSelectionData.add("Multi-");
    494         lineSelectionData.add("Level ");
    495         lineSelectionData.add("example ");
    496         lineSelectionData.add("of ");
    497         lineSelectionData.add("a ");
    498         lineSelectionData.add("semi-");
    499         lineSelectionData.add("idiotic ");
    500         lineSelectionData.add("non-");
    501         lineSelectionData.add("sensical ");
    502         lineSelectionData.add("(non-");
    503         lineSelectionData.add("important) ");
    504         lineSelectionData.add("sentence. ");
    505 
    506         lineSelectionData.add("Hi  ");
    507         lineSelectionData.add("Hello ");
    508         lineSelectionData.add("How\n");
    509         lineSelectionData.add("are\r");
    510         lineSelectionData.add("you\u2028");
    511         lineSelectionData.add("fine.\t");
    512         lineSelectionData.add("good.  ");
    513 
    514         lineSelectionData.add("Now\r");
    515         lineSelectionData.add("is\n");
    516         lineSelectionData.add("the\r\n");
    517         lineSelectionData.add("time\n");
    518         lineSelectionData.add("\r");
    519         lineSelectionData.add("for\r");
    520         lineSelectionData.add("\r");
    521         lineSelectionData.add("all");
    522 
    523         generalIteratorTest(lineBreak, lineSelectionData);
    524     }
    525 
    526     /**
    527      * @bug 4068133
    528      */
    529     @Test
    530     public void TestBug4068133() {
    531         List<String> lineSelectionData = new ArrayList<String>(9);
    532 
    533         lineSelectionData.add("\u96f6");
    534         lineSelectionData.add("\u4e00\u3002");
    535         lineSelectionData.add("\u4e8c\u3001");
    536         lineSelectionData.add("\u4e09\u3002\u3001");
    537         lineSelectionData.add("\u56db\u3001\u3002\u3001");
    538         lineSelectionData.add("\u4e94,");
    539         lineSelectionData.add("\u516d.");
    540         lineSelectionData.add("\u4e03.\u3001,\u3002");
    541         lineSelectionData.add("\u516b");
    542 
    543         generalIteratorTest(lineBreak, lineSelectionData);
    544     }
    545 
    546     /**
    547      * @bug 4086052
    548      */
    549     @Test
    550     public void TestBug4086052() {
    551         List<String> lineSelectionData = new ArrayList<String>(1);
    552 
    553         lineSelectionData.add("foo\u00a0bar ");
    554 //        lineSelectionData.addElement("foo\ufeffbar");
    555 
    556         generalIteratorTest(lineBreak, lineSelectionData);
    557     }
    558 
    559     /**
    560      * @bug 4097920
    561      */
    562     @Test
    563     public void TestBug4097920() {
    564         List<String> lineSelectionData = new ArrayList<String>(3);
    565 
    566         lineSelectionData.add("dog,cat,mouse ");
    567         lineSelectionData.add("(one)");
    568         lineSelectionData.add("(two)\n");
    569         generalIteratorTest(lineBreak, lineSelectionData);
    570     }
    571 
    572 
    573 
    574     /**
    575      * @bug 4117554
    576      */
    577     @Test
    578     public void TestBug4117554Lines() {
    579         List<String> lineSelectionData = new ArrayList<String>(3);
    580 
    581         // Fullwidth .!? should be treated as postJwrd
    582         lineSelectionData.add("\u4e01\uff0e");
    583         lineSelectionData.add("\u4e02\uff01");
    584         lineSelectionData.add("\u4e03\uff1f");
    585 
    586         generalIteratorTest(lineBreak, lineSelectionData);
    587     }
    588 
    589     @Test
    590     public void TestLettersAndDigits() {
    591         // a character sequence such as "X11" or "30F3" or "native2ascii" should
    592         // be kept together as a single word
    593         List<String> lineSelectionData = new ArrayList<String>(3);
    594 
    595         lineSelectionData.add("X11 ");
    596         lineSelectionData.add("30F3 ");
    597         lineSelectionData.add("native2ascii");
    598 
    599         generalIteratorTest(lineBreak, lineSelectionData);
    600     }
    601 
    602 
    603     private static final String graveS = "S\u0300";
    604     private static final String acuteBelowI = "i\u0317";
    605     private static final String acuteE = "e\u0301";
    606     private static final String circumflexA = "a\u0302";
    607     private static final String tildeE = "e\u0303";
    608 
    609     @Test
    610     public void TestCharacterBreak() {
    611         List<String> characterSelectionData = new ArrayList<String>();
    612 
    613         characterSelectionData.add(graveS);
    614         characterSelectionData.add(acuteBelowI);
    615         characterSelectionData.add("m");
    616         characterSelectionData.add("p");
    617         characterSelectionData.add("l");
    618         characterSelectionData.add(acuteE);
    619         characterSelectionData.add(" ");
    620         characterSelectionData.add("s");
    621         characterSelectionData.add(circumflexA);
    622         characterSelectionData.add("m");
    623         characterSelectionData.add("p");
    624         characterSelectionData.add("l");
    625         characterSelectionData.add(tildeE);
    626         characterSelectionData.add(".");
    627         characterSelectionData.add("w");
    628         characterSelectionData.add(circumflexA);
    629         characterSelectionData.add("w");
    630         characterSelectionData.add("a");
    631         characterSelectionData.add("f");
    632         characterSelectionData.add("q");
    633         characterSelectionData.add("\n");
    634         characterSelectionData.add("\r");
    635         characterSelectionData.add("\r\n");
    636         characterSelectionData.add("\n");
    637 
    638         generalIteratorTest(characterBreak, characterSelectionData);
    639     }
    640 
    641     /**
    642      * @bug 4098467
    643      */
    644     @Test
    645     public void TestBug4098467Characters() {
    646         List<String> characterSelectionData = new ArrayList<String>();
    647 
    648         // What follows is a string of Korean characters (I found it in the Yellow Pages
    649         // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
    650         // it correctly), first as precomposed syllables, and then as conjoining jamo.
    651         // Both sequences should be semantically identical and break the same way.
    652         // precomposed syllables...
    653         characterSelectionData.add("\uc0c1");
    654         characterSelectionData.add("\ud56d");
    655         characterSelectionData.add(" ");
    656         characterSelectionData.add("\ud55c");
    657         characterSelectionData.add("\uc778");
    658         characterSelectionData.add(" ");
    659         characterSelectionData.add("\uc5f0");
    660         characterSelectionData.add("\ud569");
    661         characterSelectionData.add(" ");
    662         characterSelectionData.add("\uc7a5");
    663         characterSelectionData.add("\ub85c");
    664         characterSelectionData.add("\uad50");
    665         characterSelectionData.add("\ud68c");
    666         characterSelectionData.add(" ");
    667         // conjoining jamo...
    668         characterSelectionData.add("\u1109\u1161\u11bc");
    669         characterSelectionData.add("\u1112\u1161\u11bc");
    670         characterSelectionData.add(" ");
    671         characterSelectionData.add("\u1112\u1161\u11ab");
    672         characterSelectionData.add("\u110b\u1175\u11ab");
    673         characterSelectionData.add(" ");
    674         characterSelectionData.add("\u110b\u1167\u11ab");
    675         characterSelectionData.add("\u1112\u1161\u11b8");
    676         characterSelectionData.add(" ");
    677         characterSelectionData.add("\u110c\u1161\u11bc");
    678         characterSelectionData.add("\u1105\u1169");
    679         characterSelectionData.add("\u1100\u116d");
    680         characterSelectionData.add("\u1112\u116c");
    681 
    682         generalIteratorTest(characterBreak, characterSelectionData);
    683     }
    684 
    685     @Test
    686     public void TestTitleBreak()
    687     {
    688         List<String> titleData = new ArrayList<String>();
    689         titleData.add("   ");
    690         titleData.add("This ");
    691         titleData.add("is ");
    692         titleData.add("a ");
    693         titleData.add("simple ");
    694         titleData.add("sample ");
    695         titleData.add("sentence. ");
    696         titleData.add("This ");
    697 
    698         generalIteratorTest(titleBreak, titleData);
    699     }
    700 
    701 
    702 
    703     /*
    704      * @bug 4153072
    705      */
    706     @Test
    707     public void TestBug4153072() {
    708         BreakIterator iter = BreakIterator.getWordInstance();
    709         String str = "...Hello, World!...";
    710         int begin = 3;
    711         int end = str.length() - 3;
    712         // not used boolean gotException = false;
    713 
    714 
    715         iter.setText(new StringCharacterIterator(str, begin, end, begin));
    716         for (int index = -1; index < begin + 1; ++index) {
    717             try {
    718                 iter.isBoundary(index);
    719                 if (index < begin)
    720                     errln("Didn't get exception with offset = " + index +
    721                                     " and begin index = " + begin);
    722             }
    723             catch (IllegalArgumentException e) {
    724                 if (index >= begin)
    725                     errln("Got exception with offset = " + index +
    726                                     " and begin index = " + begin);
    727             }
    728         }
    729     }
    730 
    731 
    732     @Test
    733     public void TestBug4146175Lines() {
    734         List<String> lineSelectionData = new ArrayList<String>(2);
    735 
    736         // the fullwidth comma should stick to the preceding Japanese character
    737         lineSelectionData.add("\u7d42\uff0c");
    738         lineSelectionData.add("\u308f");
    739 
    740         generalIteratorTest(lineBreak, lineSelectionData);
    741     }
    742 
    743     private static final String cannedTestChars
    744         = "\u0000\u0001\u0002\u0003\u0004 !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2"
    745         + "\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ad\u00ae\u00af\u00b0\u00b2\u00b3"
    746         + "\u00b4\u00b9\u00bb\u00bc\u00bd\u02b0\u02b1\u02b2\u02b3\u02b4\u0300\u0301\u0302\u0303"
    747         + "\u0304\u05d0\u05d1\u05d2\u05d3\u05d4\u0903\u093e\u093f\u0940\u0949\u0f3a\u0f3b\u2000"
    748         + "\u2001\u2002\u200c\u200d\u200e\u200f\u2010\u2011\u2012\u2028\u2029\u202a\u203e\u203f"
    749         + "\u2040\u20dd\u20de\u20df\u20e0\u2160\u2161\u2162\u2163\u2164";
    750 
    751     @Test
    752     public void TestSentenceInvariants()
    753     {
    754         BreakIterator e = BreakIterator.getSentenceInstance();
    755         doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff");
    756     }
    757 
    758     @Test
    759     public void TestEmptyString()
    760     {
    761         String text = "";
    762         List<String> x = new ArrayList<String>(1);
    763         x.add(text);
    764 
    765         generalIteratorTest(lineBreak, x);
    766     }
    767 
    768     @Test
    769     public void TestGetAvailableLocales()
    770     {
    771         Locale[] locList = BreakIterator.getAvailableLocales();
    772 
    773         if (locList.length == 0)
    774             errln("getAvailableLocales() returned an empty list!");
    775         // I have no idea how to test this function...
    776 
    777         android.icu.util.ULocale[] ulocList = BreakIterator.getAvailableULocales();
    778         if (ulocList.length == 0) {
    779             errln("getAvailableULocales() returned an empty list!");
    780         } else {
    781             logln("getAvailableULocales() returned " + ulocList.length + " locales");
    782         }
    783     }
    784 
    785 
    786     /**
    787      * @bug 4068137
    788      */
    789     @Test
    790     public void TestEndBehavior()
    791     {
    792         String testString = "boo.";
    793         BreakIterator wb = BreakIterator.getWordInstance();
    794         wb.setText(testString);
    795 
    796         if (wb.first() != 0)
    797             errln("Didn't get break at beginning of string.");
    798         if (wb.next() != 3)
    799             errln("Didn't get break before period in \"boo.\"");
    800         if (wb.current() != 4 && wb.next() != 4)
    801             errln("Didn't get break at end of string.");
    802     }
    803 
    804     // The Following two tests are ported from ICU4C 1.8.1 [Richard/GCL]
    805     /**
    806      * Port From:   ICU4C v1.8.1 : textbounds : IntlTestTextBoundary
    807      * Source File: $ICU4CRoot/source/test/intltest/ittxtbd.cpp
    808      **/
    809     /**
    810      * test methods preceding, following and isBoundary
    811      **/
    812     @Test
    813     public void TestPreceding() {
    814         String words3 = "aaa bbb ccc";
    815         BreakIterator e = BreakIterator.getWordInstance(Locale.getDefault());
    816         e.setText( words3 );
    817         e.first();
    818         int p1 = e.next();
    819         int p2 = e.next();
    820         int p3 = e.next();
    821         int p4 = e.next();
    822 
    823         int f = e.following(p2+1);
    824         int p = e.preceding(p2+1);
    825         if (f!=p3)
    826             errln("IntlTestTextBoundary::TestPreceding: f!=p3");
    827         if (p!=p2)
    828             errln("IntlTestTextBoundary::TestPreceding: p!=p2");
    829 
    830         if (p1+1!=p2)
    831             errln("IntlTestTextBoundary::TestPreceding: p1+1!=p2");
    832 
    833         if (p3+1!=p4)
    834             errln("IntlTestTextBoundary::TestPreceding: p3+1!=p4");
    835 
    836         if (!e.isBoundary(p2) || e.isBoundary(p2+1) || !e.isBoundary(p3))
    837         {
    838             errln("IntlTestTextBoundary::TestPreceding: isBoundary err");
    839         }
    840     }
    841 
    842 
    843     /**
    844      * Bug 4450804
    845      */
    846     @Test
    847     public void TestLineBreakContractions() {
    848         List<String> expected = new ArrayList<String>(7);
    849         expected.add("These ");
    850         expected.add("are ");
    851         expected.add("'foobles'. ");
    852         expected.add("Don't ");
    853         expected.add("you ");
    854         expected.add("like ");
    855         expected.add("them?");
    856         generalIteratorTest(lineBreak, expected);
    857     }
    858 
    859     /**
    860      * Ticket#5615
    861      */
    862     @Test
    863     public void TestT5615() {
    864         android.icu.util.ULocale[] ulocales = BreakIterator.getAvailableULocales();
    865         int type = 0;
    866         android.icu.util.ULocale loc = null;
    867         try {
    868             for (int i = 0; i < ulocales.length; i++) {
    869                 loc = ulocales[i];
    870                 for (type = 0; type < 5 /* 5 = BreakIterator.KIND_COUNT */; ++type) {
    871                     BreakIterator brk = BreakIterator.getBreakInstance(loc, type);
    872                     if (brk == null) {
    873                         errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc);
    874                     }
    875                 }
    876             }
    877         } catch (Exception e) {
    878             errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc + " / exception: " + e.getMessage());
    879         }
    880     }
    881 
    882     /**
    883      * At present, Japanese doesn't have exceptions.
    884      * However, this still should not fail.
    885      */
    886     @Test
    887     public void TestFilteredJapanese() {
    888         ULocale loc = ULocale.JAPANESE;
    889         BreakIterator brk = FilteredBreakIteratorBuilder
    890                 .createInstance(loc)
    891                 .build(BreakIterator.getSentenceInstance(loc));
    892         brk.setText("");
    893         assertEquals("Starting point", 0, brk.current());
    894         assertEquals("Next point", 5, brk.next());
    895         assertEquals("Last point", BreakIterator.DONE, brk.next());
    896     }
    897 
    898     /*
    899      * Test case for Ticket#10721. BreakIterator factory method should throw NPE
    900      * when specified locale is null.
    901      */
    902     @Test
    903     public void TestNullLocale() {
    904         Locale loc = null;
    905         ULocale uloc = null;
    906 
    907         @SuppressWarnings("unused")
    908         BreakIterator brk;
    909 
    910         // Character
    911         try {
    912             brk = BreakIterator.getCharacterInstance(loc);
    913             errln("getCharacterInstance((Locale)null) did not throw NPE.");
    914         } catch (NullPointerException e) { /* OK */ }
    915         try {
    916             brk = BreakIterator.getCharacterInstance(uloc);
    917             errln("getCharacterInstance((ULocale)null) did not throw NPE.");
    918         } catch (NullPointerException e) { /* OK */ }
    919 
    920         // Line
    921         try {
    922             brk = BreakIterator.getLineInstance(loc);
    923             errln("getLineInstance((Locale)null) did not throw NPE.");
    924         } catch (NullPointerException e) { /* OK */ }
    925         try {
    926             brk = BreakIterator.getLineInstance(uloc);
    927             errln("getLineInstance((ULocale)null) did not throw NPE.");
    928         } catch (NullPointerException e) { /* OK */ }
    929 
    930         // Sentence
    931         try {
    932             brk = BreakIterator.getSentenceInstance(loc);
    933             errln("getSentenceInstance((Locale)null) did not throw NPE.");
    934         } catch (NullPointerException e) { /* OK */ }
    935         try {
    936             brk = BreakIterator.getSentenceInstance(uloc);
    937             errln("getSentenceInstance((ULocale)null) did not throw NPE.");
    938         } catch (NullPointerException e) { /* OK */ }
    939 
    940         // Title
    941         try {
    942             brk = BreakIterator.getTitleInstance(loc);
    943             errln("getTitleInstance((Locale)null) did not throw NPE.");
    944         } catch (NullPointerException e) { /* OK */ }
    945         try {
    946             brk = BreakIterator.getTitleInstance(uloc);
    947             errln("getTitleInstance((ULocale)null) did not throw NPE.");
    948         } catch (NullPointerException e) { /* OK */ }
    949 
    950         // Word
    951         try {
    952             brk = BreakIterator.getWordInstance(loc);
    953             errln("getWordInstance((Locale)null) did not throw NPE.");
    954         } catch (NullPointerException e) { /* OK */ }
    955         try {
    956             brk = BreakIterator.getWordInstance(uloc);
    957             errln("getWordInstance((ULocale)null) did not throw NPE.");
    958         } catch (NullPointerException e) { /* OK */ }
    959     }
    960 
    961     /**
    962      * Test FilteredBreakIteratorBuilder newly introduced
    963      */
    964     @Test
    965     public void TestFilteredBreakIteratorBuilder() {
    966         FilteredBreakIteratorBuilder builder;
    967         BreakIterator baseBI;
    968         BreakIterator filteredBI;
    969 
    970         String text = "In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."; // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
    971         String ABBR_MR = "Mr.";
    972         String ABBR_CAPT = "Capt.";
    973 
    974         {
    975             logln("Constructing empty builder\n");
    976             builder = FilteredBreakIteratorBuilder.createInstance();
    977 
    978             logln("Constructing base BI\n");
    979             baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH);
    980 
    981             logln("Building new BI\n");
    982             filteredBI = builder.build(baseBI);
    983 
    984             assertDefaultBreakBehavior(filteredBI, text);
    985         }
    986 
    987         {
    988             logln("Constructing empty builder\n");
    989             builder = FilteredBreakIteratorBuilder.createInstance();
    990 
    991             logln("Adding Mr. as an exception\n");
    992 
    993             assertEquals("2.1 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR));
    994             assertEquals("2.2 suppressBreakAfter", false, builder.suppressBreakAfter(ABBR_MR));
    995             assertEquals("2.3 unsuppressBreakAfter", true, builder.unsuppressBreakAfter(ABBR_MR));
    996             assertEquals("2.4 unsuppressBreakAfter", false, builder.unsuppressBreakAfter(ABBR_MR));
    997             assertEquals("2.5 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR));
    998 
    999             logln("Constructing base BI\n");
   1000             baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH);
   1001 
   1002             logln("Building new BI\n");
   1003             filteredBI = builder.build(baseBI);
   1004 
   1005             logln("Testing:");
   1006             filteredBI.setText(text);
   1007             assertEquals("2nd next", 84, filteredBI.next());
   1008             assertEquals("2nd next", 90, filteredBI.next());
   1009             assertEquals("2nd next", 278, filteredBI.next());
   1010             filteredBI.first();
   1011         }
   1012 
   1013 
   1014         {
   1015           logln("Constructing empty builder\n");
   1016           builder = FilteredBreakIteratorBuilder.createInstance();
   1017 
   1018           logln("Adding Mr. and Capt as an exception\n");
   1019           assertEquals("3.1 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR));
   1020           assertEquals("3.2 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_CAPT));
   1021 
   1022           logln("Constructing base BI\n");
   1023           baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH);
   1024 
   1025           logln("Building new BI\n");
   1026           filteredBI = builder.build(baseBI);
   1027 
   1028           logln("Testing:");
   1029           filteredBI.setText(text);
   1030           assertEquals("3rd next", 84, filteredBI.next());
   1031           assertEquals("3rd next", 278, filteredBI.next());
   1032           filteredBI.first();
   1033         }
   1034 
   1035         {
   1036           logln("Constructing English builder\n");
   1037           builder = FilteredBreakIteratorBuilder.createInstance(ULocale.ENGLISH);
   1038 
   1039           logln("Constructing base BI\n");
   1040           baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH);
   1041 
   1042           logln("unsuppressing 'Capt'");
   1043           assertEquals("1st suppressBreakAfter", true, builder.unsuppressBreakAfter(ABBR_CAPT));
   1044 
   1045           logln("Building new BI\n");
   1046           filteredBI = builder.build(baseBI);
   1047 
   1048           if(filteredBI != null) {
   1049             logln("Testing:");
   1050             filteredBI.setText(text);
   1051             assertEquals("4th next", 84, filteredBI.next());
   1052             assertEquals("4th next", 90, filteredBI.next());
   1053             assertEquals("4th next", 278, filteredBI.next());
   1054             filteredBI.first();
   1055           }
   1056         }
   1057 
   1058         {
   1059           logln("Constructing English builder\n");
   1060           builder = FilteredBreakIteratorBuilder.createInstance(ULocale.ENGLISH);
   1061 
   1062           logln("Constructing base BI\n");
   1063           baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH);
   1064 
   1065           logln("Building new BI\n");
   1066           filteredBI = builder.build(baseBI);
   1067 
   1068           if(filteredBI != null) {
   1069             assertEnglishBreakBehavior(filteredBI, text);
   1070           }
   1071         }
   1072 
   1073         {
   1074             logln("Constructing English @ss=standard\n");
   1075             filteredBI = BreakIterator.getSentenceInstance(ULocale.forLanguageTag("en-US-u-ss-standard"));
   1076 
   1077             if(filteredBI != null) {
   1078               assertEnglishBreakBehavior(filteredBI, text);
   1079             }
   1080         }
   1081 
   1082         {
   1083             logln("Constructing Afrikaans @ss=standard - should be == default\n");
   1084             filteredBI = BreakIterator.getSentenceInstance(ULocale.forLanguageTag("af-u-ss-standard"));
   1085 
   1086             assertDefaultBreakBehavior(filteredBI, text);
   1087         }
   1088 
   1089         {
   1090             logln("Constructing Japanese @ss=standard - should be == default\n");
   1091             filteredBI = BreakIterator.getSentenceInstance(ULocale.forLanguageTag("ja-u-ss-standard"));
   1092 
   1093             assertDefaultBreakBehavior(filteredBI, text);
   1094         }
   1095         {
   1096             logln("Constructing tfg @ss=standard - should be == default\n");
   1097             filteredBI = BreakIterator.getSentenceInstance(ULocale.forLanguageTag("tfg-u-ss-standard"));
   1098 
   1099             assertDefaultBreakBehavior(filteredBI, text);
   1100         }
   1101 
   1102         {
   1103           logln("Constructing French builder");
   1104           builder = FilteredBreakIteratorBuilder.createInstance(ULocale.FRENCH);
   1105 
   1106           logln("Constructing base BI\n");
   1107           baseBI = BreakIterator.getSentenceInstance(Locale.FRENCH);
   1108 
   1109           logln("Building new BI\n");
   1110           filteredBI = builder.build(baseBI);
   1111 
   1112           if(filteredBI != null) {
   1113             assertFrenchBreakBehavior(filteredBI, text);
   1114           }
   1115         }
   1116     }
   1117 
   1118     /**
   1119      * @param filteredBI
   1120      * @param text
   1121      */
   1122     private void assertFrenchBreakBehavior(BreakIterator filteredBI, String text) {
   1123         logln("Testing French behavior:");
   1124         filteredBI.setText(text);
   1125         assertEquals("6th next", 20, filteredBI.next());
   1126         assertEquals("6th next", 84, filteredBI.next());
   1127         filteredBI.first();
   1128     }
   1129 
   1130     /**
   1131      * @param filteredBI
   1132      * @param text
   1133      */
   1134     private void assertEnglishBreakBehavior(BreakIterator filteredBI, String text) {
   1135         logln("Testing English filtered behavior:");
   1136           filteredBI.setText(text);
   1137 
   1138           assertEquals("5th next", 84, filteredBI.next());
   1139           assertEquals("5th next", 278, filteredBI.next());
   1140           filteredBI.first();
   1141     }
   1142 
   1143     /**
   1144      * @param filteredBI
   1145      * @param text
   1146      */
   1147     private void assertDefaultBreakBehavior(BreakIterator filteredBI, String text) {
   1148         logln("Testing Default Behavior:");
   1149         filteredBI.setText(text);
   1150         assertEquals("1st next", 20, filteredBI.next());
   1151         assertEquals("1st next", 84, filteredBI.next());
   1152         assertEquals("1st next", 90, filteredBI.next());
   1153         assertEquals("1st next", 181, filteredBI.next());
   1154         assertEquals("1st next", 278, filteredBI.next());
   1155         filteredBI.first();
   1156     }
   1157 }
   1158