Home | History | Annotate | Download | only in rbbi
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 1996-2016, International Business Machines Corporation and    *
      7  * others. All Rights Reserved.                                                *
      8  *******************************************************************************
      9  */
     10 package android.icu.dev.test.rbbi;
     11 
     12 import java.text.StringCharacterIterator;
     13 import java.util.Locale;
     14 
     15 import org.junit.Test;
     16 import org.junit.runner.RunWith;
     17 import org.junit.runners.JUnit4;
     18 
     19 import android.icu.dev.test.TestFmwk;
     20 import android.icu.text.BreakIterator;
     21 import android.icu.text.FilteredBreakIteratorBuilder;
     22 import android.icu.util.ULocale;
     23 import android.icu.testsharding.MainTestShard;
     24 
     25 @MainTestShard
     26 @RunWith(JUnit4.class)
     27 public class BreakIteratorTest extends TestFmwk
     28 {
     29     public BreakIteratorTest()
     30     {
     31 
     32     }
     33 
     34 
     35     //=========================================================================
     36     // general test subroutines
     37     //=========================================================================
     38 
     39 
     40     private void doOtherInvariantTest(BreakIterator tb, String testChars)
     41     {
     42         StringBuffer work = new StringBuffer("a\r\na");
     43         int errorCount = 0;
     44 
     45         // a break should never occur between CR and LF
     46         for (int i = 0; i < testChars.length(); i++) {
     47             work.setCharAt(0, testChars.charAt(i));
     48             for (int j = 0; j < testChars.length(); j++) {
     49                 work.setCharAt(3, testChars.charAt(j));
     50                 tb.setText(work.toString());
     51                 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
     52                     if (k == 2) {
     53                         errln("Break between CR and LF in string U+" + Integer.toHexString(
     54                                 (work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString(
     55                                 (work.charAt(3))));
     56                         errorCount++;
     57                         if (errorCount >= 75)
     58                             return;
     59                     }
     60             }
     61         }
     62 
     63         // a break should never occur before a non-spacing mark, unless it's preceded
     64         // by a line terminator
     65         work.setLength(0);
     66         work.append("aaaa");
     67         for (int i = 0; i < testChars.length(); i++) {
     68             char c = testChars.charAt(i);
     69             if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003')
     70                 continue;
     71             work.setCharAt(1, c);
     72             for (int j = 0; j < testChars.length(); j++) {
     73                 c = testChars.charAt(j);
     74                 if (Character.getType(c) != Character.NON_SPACING_MARK && Character.getType(c)
     75                         != Character.ENCLOSING_MARK)
     76                     continue;
     77                 work.setCharAt(2, c);
     78                 tb.setText(work.toString());
     79                 for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
     80                     if (k == 2) {
     81                         errln("Break between U+" + Integer.toHexString((work.charAt(1)))
     82                                 + " and U+" + Integer.toHexString((work.charAt(2))));
     83                         errorCount++;
     84                         if (errorCount >= 75)
     85                             return;
     86                     }
     87             }
     88         }
     89     }
     90 
     91     public void debugLogln(String s) {
     92         final String zeros = "0000";
     93         String temp;
     94         StringBuffer out = new StringBuffer();
     95         for (int i = 0; i < s.length(); i++) {
     96             char c = s.charAt(i);
     97             if (c >= ' ' && c < '\u007f')
     98                 out.append(c);
     99             else {
    100                 out.append("\\u");
    101                 temp = Integer.toHexString(c);
    102                 out.append(zeros.substring(0, 4 - temp.length()));
    103                 out.append(temp);
    104             }
    105         }
    106         logln(out.toString());
    107     }
    108 
    109     //=========================================================================
    110     // tests
    111     //=========================================================================
    112 
    113 
    114     /*
    115      * @bug 4153072
    116      */
    117     @Test
    118     public void TestBug4153072() {
    119         BreakIterator iter = BreakIterator.getWordInstance();
    120         String str = "...Hello, World!...";
    121         int begin = 3;
    122         int end = str.length() - 3;
    123         // not used boolean gotException = false;
    124 
    125 
    126         iter.setText(new StringCharacterIterator(str, begin, end, begin));
    127         for (int index = -1; index < begin + 1; ++index) {
    128             try {
    129                 iter.isBoundary(index);
    130                 if (index < begin)
    131                     errln("Didn't get exception with offset = " + index +
    132                                     " and begin index = " + begin);
    133             }
    134             catch (IllegalArgumentException e) {
    135                 if (index >= begin)
    136                     errln("Got exception with offset = " + index +
    137                                     " and begin index = " + begin);
    138             }
    139         }
    140     }
    141 
    142 
    143     private static final String cannedTestChars
    144         = "\u0000\u0001\u0002\u0003\u0004 !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2"
    145         + "\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00ab\u00ad\u00ae\u00af\u00b0\u00b2\u00b3"
    146         + "\u00b4\u00b9\u00bb\u00bc\u00bd\u02b0\u02b1\u02b2\u02b3\u02b4\u0300\u0301\u0302\u0303"
    147         + "\u0304\u05d0\u05d1\u05d2\u05d3\u05d4\u0903\u093e\u093f\u0940\u0949\u0f3a\u0f3b\u2000"
    148         + "\u2001\u2002\u200c\u200d\u200e\u200f\u2010\u2011\u2012\u2028\u2029\u202a\u203e\u203f"
    149         + "\u2040\u20dd\u20de\u20df\u20e0\u2160\u2161\u2162\u2163\u2164";
    150 
    151     @Test
    152     public void TestSentenceInvariants()
    153     {
    154         BreakIterator e = BreakIterator.getSentenceInstance();
    155         doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff");
    156     }
    157 
    158     @Test
    159     public void TestGetAvailableLocales()
    160     {
    161         Locale[] locList = BreakIterator.getAvailableLocales();
    162 
    163         if (locList.length == 0)
    164             errln("getAvailableLocales() returned an empty list!");
    165         // I have no idea how to test this function...
    166 
    167         android.icu.util.ULocale[] ulocList = BreakIterator.getAvailableULocales();
    168         if (ulocList.length == 0) {
    169             errln("getAvailableULocales() returned an empty list!");
    170         } else {
    171             logln("getAvailableULocales() returned " + ulocList.length + " locales");
    172         }
    173     }
    174 
    175 
    176     /**
    177      * @bug 4068137
    178      */
    179     @Test
    180     public void TestEndBehavior()
    181     {
    182         String testString = "boo.";
    183         BreakIterator wb = BreakIterator.getWordInstance();
    184         wb.setText(testString);
    185 
    186         if (wb.first() != 0)
    187             errln("Didn't get break at beginning of string.");
    188         if (wb.next() != 3)
    189             errln("Didn't get break before period in \"boo.\"");
    190         if (wb.current() != 4 && wb.next() != 4)
    191             errln("Didn't get break at end of string.");
    192     }
    193 
    194     // The Following test is ported from ICU4C 1.8.1 [Richard/GCL]
    195 
    196     /**
    197      * Ticket#5615
    198      */
    199     @Test
    200     public void TestT5615() {
    201         android.icu.util.ULocale[] ulocales = BreakIterator.getAvailableULocales();
    202         int type = 0;
    203         android.icu.util.ULocale loc = null;
    204         try {
    205             for (int i = 0; i < ulocales.length; i++) {
    206                 loc = ulocales[i];
    207                 for (type = 0; type < 5 /* 5 = BreakIterator.KIND_COUNT */; ++type) {
    208                     BreakIterator brk = BreakIterator.getBreakInstance(loc, type);
    209                     if (brk == null) {
    210                         errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc);
    211                     }
    212                 }
    213             }
    214         } catch (Exception e) {
    215             errln("ERR: Failed to create an instance type: " + type + " / locale: " + loc + " / exception: " + e.getMessage());
    216         }
    217     }
    218 
    219     /**
    220      * At present, Japanese doesn't have exceptions.
    221      * However, this still should not fail.
    222      */
    223     @Test
    224     public void TestFilteredJapanese() {
    225         ULocale loc = ULocale.JAPANESE;
    226         BreakIterator brk = FilteredBreakIteratorBuilder
    227                 .getInstance(loc)
    228                 .wrapIteratorWithFilter(BreakIterator.getSentenceInstance(loc));
    229         brk.setText("");
    230         assertEquals("Starting point", 0, brk.current());
    231         assertEquals("Next point", 5, brk.next());
    232         assertEquals("Last point", BreakIterator.DONE, brk.next());
    233     }
    234 
    235     /*
    236      * Test case for Ticket#10721. BreakIterator factory method should throw NPE
    237      * when specified locale is null.
    238      */
    239     @Test
    240     public void TestNullLocale() {
    241         Locale loc = null;
    242         ULocale uloc = null;
    243 
    244         @SuppressWarnings("unused")
    245         BreakIterator brk;
    246 
    247         // Character
    248         try {
    249             brk = BreakIterator.getCharacterInstance(loc);
    250             errln("getCharacterInstance((Locale)null) did not throw NPE.");
    251         } catch (NullPointerException e) { /* OK */ }
    252         try {
    253             brk = BreakIterator.getCharacterInstance(uloc);
    254             errln("getCharacterInstance((ULocale)null) did not throw NPE.");
    255         } catch (NullPointerException e) { /* OK */ }
    256 
    257         // Line
    258         try {
    259             brk = BreakIterator.getLineInstance(loc);
    260             errln("getLineInstance((Locale)null) did not throw NPE.");
    261         } catch (NullPointerException e) { /* OK */ }
    262         try {
    263             brk = BreakIterator.getLineInstance(uloc);
    264             errln("getLineInstance((ULocale)null) did not throw NPE.");
    265         } catch (NullPointerException e) { /* OK */ }
    266 
    267         // Sentence
    268         try {
    269             brk = BreakIterator.getSentenceInstance(loc);
    270             errln("getSentenceInstance((Locale)null) did not throw NPE.");
    271         } catch (NullPointerException e) { /* OK */ }
    272         try {
    273             brk = BreakIterator.getSentenceInstance(uloc);
    274             errln("getSentenceInstance((ULocale)null) did not throw NPE.");
    275         } catch (NullPointerException e) { /* OK */ }
    276 
    277         // Title
    278         try {
    279             brk = BreakIterator.getTitleInstance(loc);
    280             errln("getTitleInstance((Locale)null) did not throw NPE.");
    281         } catch (NullPointerException e) { /* OK */ }
    282         try {
    283             brk = BreakIterator.getTitleInstance(uloc);
    284             errln("getTitleInstance((ULocale)null) did not throw NPE.");
    285         } catch (NullPointerException e) { /* OK */ }
    286 
    287         // Word
    288         try {
    289             brk = BreakIterator.getWordInstance(loc);
    290             errln("getWordInstance((Locale)null) did not throw NPE.");
    291         } catch (NullPointerException e) { /* OK */ }
    292         try {
    293             brk = BreakIterator.getWordInstance(uloc);
    294             errln("getWordInstance((ULocale)null) did not throw NPE.");
    295         } catch (NullPointerException e) { /* OK */ }
    296     }
    297 
    298     /**
    299      * Test FilteredBreakIteratorBuilder newly introduced
    300      */
    301     @Test
    302     public void TestFilteredBreakIteratorBuilder() {
    303         FilteredBreakIteratorBuilder builder;
    304         BreakIterator baseBI;
    305         BreakIterator filteredBI;
    306 
    307         String text = "In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."; // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
    308         String ABBR_MR = "Mr.";
    309         String ABBR_CAPT = "Capt.";
    310 
    311         {
    312             logln("Constructing empty builder\n");
    313             builder = FilteredBreakIteratorBuilder.getEmptyInstance();
    314 
    315             logln("Constructing base BI\n");
    316             baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH);
    317 
    318             logln("Building new BI\n");
    319             filteredBI = builder.wrapIteratorWithFilter(baseBI);
    320 
    321             assertDefaultBreakBehavior(filteredBI, text);
    322         }
    323 
    324         {
    325             logln("Constructing empty builder\n");
    326             builder = FilteredBreakIteratorBuilder.getEmptyInstance();
    327 
    328             logln("Adding Mr. as an exception\n");
    329 
    330             assertEquals("2.1 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR));
    331             assertEquals("2.2 suppressBreakAfter", false, builder.suppressBreakAfter(ABBR_MR));
    332             assertEquals("2.3 unsuppressBreakAfter", true, builder.unsuppressBreakAfter(ABBR_MR));
    333             assertEquals("2.4 unsuppressBreakAfter", false, builder.unsuppressBreakAfter(ABBR_MR));
    334             assertEquals("2.5 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR));
    335 
    336             logln("Constructing base BI\n");
    337             baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH);
    338 
    339             logln("Building new BI\n");
    340             filteredBI = builder.wrapIteratorWithFilter(baseBI);
    341 
    342             logln("Testing:");
    343             filteredBI.setText(text);
    344             assertEquals("2nd next", 84, filteredBI.next());
    345             assertEquals("2nd next", 90, filteredBI.next());
    346             assertEquals("2nd next", 278, filteredBI.next());
    347             filteredBI.first();
    348         }
    349 
    350 
    351         {
    352           logln("Constructing empty builder\n");
    353           builder = FilteredBreakIteratorBuilder.getEmptyInstance();
    354 
    355           logln("Adding Mr. and Capt as an exception\n");
    356           assertEquals("3.1 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_MR));
    357           assertEquals("3.2 suppressBreakAfter", true, builder.suppressBreakAfter(ABBR_CAPT));
    358 
    359           logln("Constructing base BI\n");
    360           baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH);
    361 
    362           logln("Building new BI\n");
    363           filteredBI = builder.wrapIteratorWithFilter(baseBI);
    364 
    365           logln("Testing:");
    366           filteredBI.setText(text);
    367           assertEquals("3rd next", 84, filteredBI.next());
    368           assertEquals("3rd next", 278, filteredBI.next());
    369           filteredBI.first();
    370         }
    371 
    372         {
    373           logln("Constructing English builder\n");
    374           builder = FilteredBreakIteratorBuilder.getInstance(ULocale.ENGLISH);
    375 
    376           logln("Constructing base BI\n");
    377           baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH);
    378 
    379           logln("unsuppressing 'Capt'");
    380           assertEquals("1st suppressBreakAfter", true, builder.unsuppressBreakAfter(ABBR_CAPT));
    381 
    382           logln("Building new BI\n");
    383           filteredBI = builder.wrapIteratorWithFilter(baseBI);
    384 
    385           if(filteredBI != null) {
    386             logln("Testing:");
    387             filteredBI.setText(text);
    388             assertEquals("4th next", 84, filteredBI.next());
    389             assertEquals("4th next", 90, filteredBI.next());
    390             assertEquals("4th next", 278, filteredBI.next());
    391             filteredBI.first();
    392           }
    393         }
    394 
    395         {
    396           logln("Constructing English builder\n");
    397           builder = FilteredBreakIteratorBuilder.getInstance(ULocale.ENGLISH);
    398 
    399           logln("Constructing base BI\n");
    400           baseBI = BreakIterator.getSentenceInstance(Locale.ENGLISH);
    401 
    402           logln("Building new BI\n");
    403           filteredBI = builder.wrapIteratorWithFilter(baseBI);
    404 
    405           if(filteredBI != null) {
    406             assertEnglishBreakBehavior(filteredBI, text);
    407           }
    408         }
    409 
    410         {
    411             logln("Constructing English @ss=standard\n");
    412             filteredBI = BreakIterator.getSentenceInstance(ULocale.forLanguageTag("en-US-u-ss-standard"));
    413 
    414             if(filteredBI != null) {
    415               assertEnglishBreakBehavior(filteredBI, text);
    416             }
    417         }
    418 
    419         {
    420             logln("Constructing Afrikaans @ss=standard - should be == default\n");
    421             filteredBI = BreakIterator.getSentenceInstance(ULocale.forLanguageTag("af-u-ss-standard"));
    422 
    423             assertDefaultBreakBehavior(filteredBI, text);
    424         }
    425 
    426         {
    427             logln("Constructing Japanese @ss=standard - should be == default\n");
    428             filteredBI = BreakIterator.getSentenceInstance(ULocale.forLanguageTag("ja-u-ss-standard"));
    429 
    430             assertDefaultBreakBehavior(filteredBI, text);
    431         }
    432         {
    433             logln("Constructing tfg @ss=standard - should be == default\n");
    434             filteredBI = BreakIterator.getSentenceInstance(ULocale.forLanguageTag("tfg-u-ss-standard"));
    435 
    436             assertDefaultBreakBehavior(filteredBI, text);
    437         }
    438 
    439         {
    440           logln("Constructing French builder");
    441           builder = FilteredBreakIteratorBuilder.getInstance(ULocale.FRENCH);
    442 
    443           logln("Constructing base BI\n");
    444           baseBI = BreakIterator.getSentenceInstance(Locale.FRENCH);
    445 
    446           logln("Building new BI\n");
    447           filteredBI = builder.wrapIteratorWithFilter(baseBI);
    448 
    449           if(filteredBI != null) {
    450             assertFrenchBreakBehavior(filteredBI, text);
    451           }
    452         }
    453     }
    454 
    455     /**
    456      * @param filteredBI
    457      * @param text
    458      */
    459     private void assertFrenchBreakBehavior(BreakIterator filteredBI, String text) {
    460         logln("Testing French behavior:");
    461         filteredBI.setText(text);
    462         assertEquals("6th next", 20, filteredBI.next());
    463         assertEquals("6th next", 84, filteredBI.next());
    464         filteredBI.first();
    465     }
    466 
    467     /**
    468      * @param filteredBI
    469      * @param text
    470      */
    471     private void assertEnglishBreakBehavior(BreakIterator filteredBI, String text) {
    472         logln("Testing English filtered behavior:");
    473           filteredBI.setText(text);
    474 
    475           assertEquals("5th next", 84, filteredBI.next());
    476           assertEquals("5th next", 278, filteredBI.next());
    477           filteredBI.first();
    478     }
    479 
    480     /**
    481      * @param filteredBI
    482      * @param text
    483      */
    484     private void assertDefaultBreakBehavior(BreakIterator filteredBI, String text) {
    485         logln("Testing Default Behavior:");
    486         filteredBI.setText(text);
    487         assertEquals("1st next", 20, filteredBI.next());
    488         assertEquals("1st next", 84, filteredBI.next());
    489         assertEquals("1st next", 90, filteredBI.next());
    490         assertEquals("1st next", 181, filteredBI.next());
    491         assertEquals("1st next", 278, filteredBI.next());
    492         filteredBI.first();
    493     }
    494 }
    495