Home | History | Annotate | Download | only in rbbi
      1 /*
      2  *******************************************************************************
      3  * Copyright (C) 1996-2013, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  *******************************************************************************
      6  */
      7 package com.ibm.icu.dev.test.rbbi;
      8 
      9 //Regression testing of RuleBasedBreakIterator
     10 //
     11 //  TODO:  These tests should be mostly retired.
     12 //          Much of the test data that was originally here was removed when the RBBI rules
     13 //            were updated to match the Unicode boundary TRs, and the data was found to be invalid.
     14 //          Much of the remaining data has been moved into the rbbitst.txt test data file,
     15 //            which is common between ICU4C and ICU4J.  The remaining test data should also be moved,
     16 //            or simply retired if it is no longer interesting.
     17 import java.text.CharacterIterator;
     18 import java.util.ArrayList;
     19 import java.util.List;
     20 
     21 import com.ibm.icu.dev.test.TestFmwk;
     22 import com.ibm.icu.text.BreakIterator;
     23 import com.ibm.icu.text.RuleBasedBreakIterator;
     24 import com.ibm.icu.util.ULocale;
     25 
     26 public class RBBITest extends TestFmwk {
     27 
     28     public static void main(String[] args) throws Exception {
     29         new RBBITest().run(args);
     30     }
     31 
     32     public RBBITest() {
     33     }
     34 
     35     private static final String halfNA = "\u0928\u094d\u200d"; /*
     36                                                                 * halfform NA = devanigiri NA + virama(supresses
     37                                                                 * inherent vowel)+ zero width joiner
     38                                                                 */
     39 
     40     // tests default rules based character iteration.
     41     // Builds a new iterator from the source rules in the default (prebuilt) iterator.
     42     //
     43     public void TestDefaultRuleBasedCharacterIteration() {
     44         RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance();
     45         logln("Testing the RBBI for character iteration by using default rules");
     46 
     47         // fetch the rules used to create the above RuleBasedBreakIterator
     48         String defaultRules = rbbi.toString();
     49 
     50         RuleBasedBreakIterator charIterDefault = null;
     51         try {
     52             charIterDefault = new RuleBasedBreakIterator(defaultRules);
     53         } catch (IllegalArgumentException iae) {
     54             errln("ERROR: failed construction in TestDefaultRuleBasedCharacterIteration()" + iae.toString());
     55         }
     56 
     57         List<String> chardata = new ArrayList<String>();
     58         chardata.add("H");
     59         chardata.add("e");
     60         chardata.add("l");
     61         chardata.add("l");
     62         chardata.add("o");
     63         chardata.add("e\u0301"); // acuteE
     64         chardata.add("&");
     65         chardata.add("e\u0303"); // tildaE
     66         // devanagiri characters for Hindi support
     67         chardata.add("\u0906"); // devanagiri AA
     68         // chardata.add("\u093e\u0901"); //devanagiri vowelsign AA+ chandrabindhu
     69         chardata.add("\u0916\u0947"); // devanagiri KHA+vowelsign E
     70         chardata.add("\u0938\u0941\u0902"); // devanagiri SA+vowelsign U + anusvara(bindu)
     71         chardata.add("\u0926"); // devanagiri consonant DA
     72         chardata.add("\u0930"); // devanagiri consonant RA
     73         // chardata.add("\u0939\u094c"); //devanagiri HA+vowel sign AI
     74         chardata.add("\u0964"); // devanagiri danda
     75         // end hindi characters
     76         chardata.add("A\u0302"); // circumflexA
     77         chardata.add("i\u0301"); // acuteBelowI
     78         // conjoining jamo...
     79         chardata.add("\u1109\u1161\u11bc");
     80         chardata.add("\u1112\u1161\u11bc");
     81         chardata.add("\n");
     82         chardata.add("\r\n"); // keep CRLF sequences together
     83         chardata.add("S\u0300"); // graveS
     84         chardata.add("i\u0301"); // acuteBelowI
     85         chardata.add("!");
     86 
     87         // What follows is a string of Korean characters (I found it in the Yellow Pages
     88         // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
     89         // it correctly), first as precomposed syllables, and then as conjoining jamo.
     90         // Both sequences should be semantically identical and break the same way.
     91         // precomposed syllables...
     92         chardata.add("\uc0c1");
     93         chardata.add("\ud56d");
     94         chardata.add(" ");
     95         chardata.add("\ud55c");
     96         chardata.add("\uc778");
     97         chardata.add(" ");
     98         chardata.add("\uc5f0");
     99         chardata.add("\ud569");
    100         chardata.add(" ");
    101         chardata.add("\uc7a5");
    102         chardata.add("\ub85c");
    103         chardata.add("\uad50");
    104         chardata.add("\ud68c");
    105         chardata.add(" ");
    106         // conjoining jamo...
    107         chardata.add("\u1109\u1161\u11bc");
    108         chardata.add("\u1112\u1161\u11bc");
    109         chardata.add(" ");
    110         chardata.add("\u1112\u1161\u11ab");
    111         chardata.add("\u110b\u1175\u11ab");
    112         chardata.add(" ");
    113         chardata.add("\u110b\u1167\u11ab");
    114         chardata.add("\u1112\u1161\u11b8");
    115         chardata.add(" ");
    116         chardata.add("\u110c\u1161\u11bc");
    117         chardata.add("\u1105\u1169");
    118         chardata.add("\u1100\u116d");
    119         chardata.add("\u1112\u116c");
    120 
    121         generalIteratorTest(charIterDefault, chardata);
    122 
    123     }
    124 
    125     public void TestDefaultRuleBasedWordIteration() {
    126         logln("Testing the RBBI for word iteration using default rules");
    127         RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator) BreakIterator.getWordInstance();
    128         // fetch the rules used to create the above RuleBasedBreakIterator
    129         String defaultRules = rbbi.toString();
    130 
    131         RuleBasedBreakIterator wordIterDefault = null;
    132         try {
    133             wordIterDefault = new RuleBasedBreakIterator(defaultRules);
    134         } catch (IllegalArgumentException iae) {
    135             errln("ERROR: failed construction in TestDefaultRuleBasedWordIteration() -- custom rules" + iae.toString());
    136         }
    137 
    138         List<String> worddata = new ArrayList<String>();
    139         worddata.add("Write");
    140         worddata.add(" ");
    141         worddata.add("wordrules");
    142         worddata.add(".");
    143         worddata.add(" ");
    144         // worddata.add("alpha-beta-gamma");
    145         worddata.add(" ");
    146         worddata.add("\u092f\u0939");
    147         worddata.add(" ");
    148         worddata.add("\u0939\u093f" + halfNA + "\u0926\u0940");
    149         worddata.add(" ");
    150         worddata.add("\u0939\u0948");
    151         // worddata.add("\u0964"); //danda followed by a space
    152         worddata.add(" ");
    153         worddata.add("\u0905\u093e\u092a");
    154         worddata.add(" ");
    155         worddata.add("\u0938\u093f\u0916\u094b\u0917\u0947");
    156         worddata.add("?");
    157         worddata.add(" ");
    158         worddata.add("\r");
    159         worddata.add("It's");
    160         worddata.add(" ");
    161         // worddata.add("$30.10");
    162         worddata.add(" ");
    163         worddata.add(" ");
    164         worddata.add("Badges");
    165         worddata.add("?");
    166         worddata.add(" ");
    167         worddata.add("BADGES");
    168         worddata.add("!");
    169         worddata.add("1000,233,456.000");
    170         worddata.add(" ");
    171 
    172         generalIteratorTest(wordIterDefault, worddata);
    173     }
    174 
    175 //    private static final String kParagraphSeparator = "\u2029";
    176     private static final String kLineSeparator      = "\u2028";
    177 
    178     public void TestDefaultRuleBasedSentenceIteration() {
    179         logln("Testing the RBBI for sentence iteration using default rules");
    180         RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator) BreakIterator.getSentenceInstance();
    181 
    182         // fetch the rules used to create the above RuleBasedBreakIterator
    183         String defaultRules = rbbi.toString();
    184         RuleBasedBreakIterator sentIterDefault = null;
    185         try {
    186             sentIterDefault = new RuleBasedBreakIterator(defaultRules);
    187         } catch (IllegalArgumentException iae) {
    188             errln("ERROR: failed construction in TestDefaultRuleBasedSentenceIteration()" + iae.toString());
    189         }
    190 
    191         List<String> sentdata = new ArrayList<String>();
    192         sentdata.add("(This is it.) ");
    193         sentdata.add("Testing the sentence iterator. ");
    194         sentdata.add("\"This isn\'t it.\" ");
    195         sentdata.add("Hi! ");
    196         sentdata.add("This is a simple sample sentence. ");
    197         sentdata.add("(This is it.) ");
    198         sentdata.add("This is a simple sample sentence. ");
    199         sentdata.add("\"This isn\'t it.\" ");
    200         sentdata.add("Hi! ");
    201         sentdata.add("This is a simple sample sentence. ");
    202         sentdata.add("It does not have to make any sense as you can see. ");
    203         sentdata.add("Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. ");
    204         sentdata.add("Che la dritta via aveo smarrita. ");
    205 
    206         generalIteratorTest(sentIterDefault, sentdata);
    207     }
    208 
    209     public void TestDefaultRuleBasedLineIteration() {
    210         logln("Testing the RBBI for line iteration using default rules");
    211         RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator) RuleBasedBreakIterator.getLineInstance();
    212         // fetch the rules used to create the above RuleBasedBreakIterator
    213         String defaultRules = rbbi.toString();
    214         RuleBasedBreakIterator lineIterDefault = null;
    215         try {
    216             lineIterDefault = new RuleBasedBreakIterator(defaultRules);
    217         } catch (IllegalArgumentException iae) {
    218             errln("ERROR: failed construction in TestDefaultRuleBasedLineIteration()" + iae.toString());
    219         }
    220 
    221         List<String> linedata = new ArrayList<String>();
    222         linedata.add("Multi-");
    223         linedata.add("Level ");
    224         linedata.add("example ");
    225         linedata.add("of ");
    226         linedata.add("a ");
    227         linedata.add("semi-");
    228         linedata.add("idiotic ");
    229         linedata.add("non-");
    230         linedata.add("sensical ");
    231         linedata.add("(non-");
    232         linedata.add("important) ");
    233         linedata.add("sentence. ");
    234 
    235         linedata.add("Hi  ");
    236         linedata.add("Hello ");
    237         linedata.add("How\n");
    238         linedata.add("are\r");
    239         linedata.add("you" + kLineSeparator);
    240         linedata.add("fine.\t");
    241         linedata.add("good.  ");
    242 
    243         linedata.add("Now\r");
    244         linedata.add("is\n");
    245         linedata.add("the\r\n");
    246         linedata.add("time\n");
    247         linedata.add("\r");
    248         linedata.add("for\r");
    249         linedata.add("\r");
    250         linedata.add("all");
    251 
    252         generalIteratorTest(lineIterDefault, linedata);
    253 
    254     }
    255 
    256     // =========================================================================
    257     // general test subroutines
    258     // =========================================================================
    259 
    260     private void generalIteratorTest(RuleBasedBreakIterator rbbi, List<String> expectedResult) {
    261         StringBuffer buffer = new StringBuffer();
    262         String text;
    263         for (int i = 0; i < expectedResult.size(); i++) {
    264             text = expectedResult.get(i);
    265             buffer.append(text);
    266         }
    267         text = buffer.toString();
    268         if (rbbi == null) {
    269             errln("null iterator, test skipped.");
    270             return;
    271         }
    272 
    273         rbbi.setText(text);
    274 
    275         List<String> nextResults = _testFirstAndNext(rbbi, text);
    276         List<String> previousResults = _testLastAndPrevious(rbbi, text);
    277 
    278         logln("comparing forward and backward...");
    279         int errs = getErrorCount();
    280         compareFragmentLists("forward iteration", "backward iteration", nextResults, previousResults);
    281         if (getErrorCount() == errs) {
    282             logln("comparing expected and actual...");
    283             compareFragmentLists("expected result", "actual result", expectedResult, nextResults);
    284         }
    285 
    286         int[] boundaries = new int[expectedResult.size() + 3];
    287         boundaries[0] = RuleBasedBreakIterator.DONE;
    288         boundaries[1] = 0;
    289         for (int i = 0; i < expectedResult.size(); i++) {
    290             boundaries[i + 2] = boundaries[i + 1] + (expectedResult.get(i).length());
    291         }
    292 
    293         boundaries[boundaries.length - 1] = RuleBasedBreakIterator.DONE;
    294 
    295         _testFollowing(rbbi, text, boundaries);
    296         _testPreceding(rbbi, text, boundaries);
    297         _testIsBoundary(rbbi, text, boundaries);
    298 
    299         doMultipleSelectionTest(rbbi, text);
    300     }
    301 
    302      private List<String> _testFirstAndNext(RuleBasedBreakIterator rbbi, String text) {
    303          int p = rbbi.first();
    304          int lastP = p;
    305          List<String> result = new ArrayList<String>();
    306 
    307          if (p != 0) {
    308              errln("first() returned " + p + " instead of 0");
    309          }
    310 
    311          while (p != RuleBasedBreakIterator.DONE) {
    312              p = rbbi.next();
    313              if (p != RuleBasedBreakIterator.DONE) {
    314                  if (p <= lastP) {
    315                      errln("next() failed to move forward: next() on position "
    316                                      + lastP + " yielded " + p);
    317                  }
    318                  result.add(text.substring(lastP, p));
    319              }
    320              else {
    321                  if (lastP != text.length()) {
    322                      errln("next() returned DONE prematurely: offset was "
    323                                      + lastP + " instead of " + text.length());
    324                  }
    325              }
    326              lastP = p;
    327          }
    328          return result;
    329      }
    330 
    331      private List<String> _testLastAndPrevious(RuleBasedBreakIterator rbbi, String text) {
    332          int p = rbbi.last();
    333          int lastP = p;
    334          List<String> result = new ArrayList<String>();
    335 
    336          if (p != text.length()) {
    337              errln("last() returned " + p + " instead of " + text.length());
    338          }
    339 
    340          while (p != RuleBasedBreakIterator.DONE) {
    341              p = rbbi.previous();
    342              if (p != RuleBasedBreakIterator.DONE) {
    343                  if (p >= lastP) {
    344                      errln("previous() failed to move backward: previous() on position "
    345                                      + lastP + " yielded " + p);
    346                  }
    347 
    348                  result.add(0, text.substring(p, lastP));
    349              }
    350              else {
    351                  if (lastP != 0) {
    352                      errln("previous() returned DONE prematurely: offset was "
    353                                      + lastP + " instead of 0");
    354                  }
    355              }
    356              lastP = p;
    357          }
    358          return result;
    359      }
    360 
    361      private void compareFragmentLists(String f1Name, String f2Name, List<String> f1, List<String> f2) {
    362          int p1 = 0;
    363          int p2 = 0;
    364          String s1;
    365          String s2;
    366          int t1 = 0;
    367          int t2 = 0;
    368 
    369          while (p1 < f1.size() && p2 < f2.size()) {
    370              s1 = f1.get(p1);
    371              s2 = f2.get(p2);
    372              t1 += s1.length();
    373              t2 += s2.length();
    374 
    375              if (s1.equals(s2)) {
    376                  debugLogln("   >" + s1 + "<");
    377                  ++p1;
    378                  ++p2;
    379              }
    380              else {
    381                  int tempT1 = t1;
    382                  int tempT2 = t2;
    383                  int tempP1 = p1;
    384                  int tempP2 = p2;
    385 
    386                  while (tempT1 != tempT2 && tempP1 < f1.size() && tempP2 < f2.size()) {
    387                      while (tempT1 < tempT2 && tempP1 < f1.size()) {
    388                          tempT1 += (f1.get(tempP1)).length();
    389                          ++tempP1;
    390                      }
    391                      while (tempT2 < tempT1 && tempP2 < f2.size()) {
    392                          tempT2 += (f2.get(tempP2)).length();
    393                          ++tempP2;
    394                      }
    395                  }
    396                  logln("*** " + f1Name + " has:");
    397                  while (p1 <= tempP1 && p1 < f1.size()) {
    398                      s1 = f1.get(p1);
    399                      t1 += s1.length();
    400                      debugLogln(" *** >" + s1 + "<");
    401                      ++p1;
    402                  }
    403                  logln("***** " + f2Name + " has:");
    404                  while (p2 <= tempP2 && p2 < f2.size()) {
    405                      s2 = f2.get(p2);
    406                      t2 += s2.length();
    407                      debugLogln(" ***** >" + s2 + "<");
    408                      ++p2;
    409                  }
    410                  errln("Discrepancy between " + f1Name + " and " + f2Name);
    411              }
    412          }
    413      }
    414 
    415     private void _testFollowing(RuleBasedBreakIterator rbbi, String text, int[] boundaries) {
    416        logln("testFollowing():");
    417        int p = 2;
    418        for(int i = 0; i <= text.length(); i++) {
    419            if (i == boundaries[p])
    420                ++p;
    421            int b = rbbi.following(i);
    422            logln("rbbi.following(" + i + ") -> " + b);
    423            if (b != boundaries[p])
    424                errln("Wrong result from following() for " + i + ": expected " + boundaries[p]
    425                                + ", got " + b);
    426        }
    427    }
    428 
    429    private void _testPreceding(RuleBasedBreakIterator rbbi, String text, int[] boundaries) {
    430        logln("testPreceding():");
    431        int p = 0;
    432        for(int i = 0; i <= text.length(); i++) {
    433            int b = rbbi.preceding(i);
    434            logln("rbbi.preceding(" + i + ") -> " + b);
    435            if (b != boundaries[p])
    436                errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
    437                               + ", got " + b);
    438            if (i == boundaries[p + 1])
    439                ++p;
    440        }
    441    }
    442 
    443    private void _testIsBoundary(RuleBasedBreakIterator rbbi, String text, int[] boundaries) {
    444        logln("testIsBoundary():");
    445        int p = 1;
    446        boolean isB;
    447        for(int i = 0; i <= text.length(); i++) {
    448            isB = rbbi.isBoundary(i);
    449            logln("rbbi.isBoundary(" + i + ") -> " + isB);
    450            if(i == boundaries[p]) {
    451                if (!isB)
    452                    errln("Wrong result from isBoundary() for " + i + ": expected true, got false");
    453                ++p;
    454            }
    455            else {
    456                if(isB)
    457                    errln("Wrong result from isBoundary() for " + i + ": expected false, got true");
    458            }
    459        }
    460    }
    461    private void doMultipleSelectionTest(RuleBasedBreakIterator iterator, String testText)
    462    {
    463        logln("Multiple selection test...");
    464        RuleBasedBreakIterator testIterator = (RuleBasedBreakIterator)iterator.clone();
    465        int offset = iterator.first();
    466        int testOffset;
    467        int count = 0;
    468 
    469        do {
    470            testOffset = testIterator.first();
    471            testOffset = testIterator.next(count);
    472            logln("next(" + count + ") -> " + testOffset);
    473            if (offset != testOffset)
    474                errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
    475 
    476            if (offset != RuleBasedBreakIterator.DONE) {
    477                count++;
    478                offset = iterator.next();
    479            }
    480        } while (offset != RuleBasedBreakIterator.DONE);
    481 
    482        // now do it backwards...
    483        offset = iterator.last();
    484        count = 0;
    485 
    486        do {
    487            testOffset = testIterator.last();
    488            testOffset = testIterator.next(count);
    489            logln("next(" + count + ") -> " + testOffset);
    490            if (offset != testOffset)
    491                errln("next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset);
    492 
    493            if (offset != RuleBasedBreakIterator.DONE) {
    494                count--;
    495                offset = iterator.previous();
    496            }
    497        } while (offset != RuleBasedBreakIterator.DONE);
    498    }
    499 
    500    private void debugLogln(String s) {
    501         final String zeros = "0000";
    502         String temp;
    503         StringBuffer out = new StringBuffer();
    504         for (int i = 0; i < s.length(); i++) {
    505             char c = s.charAt(i);
    506             if (c >= ' ' && c < '\u007f')
    507                 out.append(c);
    508             else {
    509                 out.append("\\u");
    510                 temp = Integer.toHexString((int)c);
    511                 out.append(zeros.substring(0, 4 - temp.length()));
    512                 out.append(temp);
    513             }
    514         }
    515          logln(out.toString());
    516     }
    517 
    518    public void TestThaiDictionaryBreakIterator() {
    519        int position;
    520        int index;
    521        int result[] = { 1, 2, 5, 10, 11, 12, 11, 10, 5, 2, 1, 0 };
    522        char ctext[] = {
    523                0x0041, 0x0020,
    524                0x0E01, 0x0E32, 0x0E23, 0x0E17, 0x0E14, 0x0E25, 0x0E2D, 0x0E07,
    525                0x0020, 0x0041
    526                };
    527        String text = new String(ctext);
    528 
    529        ULocale locale = ULocale.createCanonical("th");
    530        BreakIterator b = BreakIterator.getWordInstance(locale);
    531 
    532        b.setText(text);
    533 
    534        index = 0;
    535        // Test forward iteration
    536        while ((position = b.next())!= BreakIterator.DONE) {
    537            if (position != result[index++]) {
    538                errln("Error with ThaiDictionaryBreakIterator forward iteration test at " + position + ".\nShould have been " + result[index-1]);
    539            }
    540        }
    541 
    542        // Test backward iteration
    543        while ((position = b.previous())!= BreakIterator.DONE) {
    544            if (position != result[index++]) {
    545                errln("Error with ThaiDictionaryBreakIterator backward iteration test at " + position + ".\nShould have been " + result[index-1]);
    546            }
    547        }
    548 
    549        //Test invalid sequence and spaces
    550        char text2[] = {
    551                0x0E01, 0x0E39, 0x0020, 0x0E01, 0x0E34, 0x0E19, 0x0E01, 0x0E38, 0x0E49, 0x0E07, 0x0020, 0x0E1B,
    552                0x0E34, 0x0E49, 0x0E48, 0x0E07, 0x0E2D, 0x0E22, 0x0E39, 0x0E48, 0x0E43, 0x0E19,
    553                0x0E16, 0x0E49, 0x0E33
    554        };
    555        int expectedWordResult[] = {
    556                2, 3, 6, 10, 11, 15, 17, 20, 22
    557        };
    558        int expectedLineResult[] = {
    559                3, 6, 11, 15, 17, 20, 22
    560        };
    561        BreakIterator brk = BreakIterator.getWordInstance(new ULocale("th"));
    562        brk.setText(new String(text2));
    563        position = index = 0;
    564        while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) {
    565            if (position != expectedWordResult[index++]) {
    566                errln("Incorrect break given by thai word break iterator. Expected: " + expectedWordResult[index-1] + " Got: " + position);
    567            }
    568        }
    569 
    570        brk = BreakIterator.getLineInstance(new ULocale("th"));
    571        brk.setText(new String(text2));
    572        position = index = 0;
    573        while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) {
    574            if (position != expectedLineResult[index++]) {
    575                errln("Incorrect break given by thai line break iterator. Expected: " + expectedLineResult[index-1] + " Got: " + position);
    576            }
    577        }
    578        // Improve code coverage
    579        if (brk.preceding(expectedLineResult[1]) != expectedLineResult[0]) {
    580            errln("Incorrect preceding position.");
    581        }
    582        if (brk.following(expectedLineResult[1]) != expectedLineResult[2]) {
    583            errln("Incorrect following position.");
    584        }
    585        int []fillInArray = new int[2];
    586        if (((RuleBasedBreakIterator)brk).getRuleStatusVec(fillInArray) != 1 || fillInArray[0] != 0) {
    587            errln("Error: Since getRuleStatusVec is not supported in DictionaryBasedBreakIterator, it should return 1 and fillInArray[0] == 0.");
    588        }
    589    }
    590 
    591 
    592    // TODO: Move these test cases to rbbitst.txt if they aren't there already, then remove this test. It is redundant.
    593     public void TestTailoredBreaks() {
    594         class TBItem {
    595             private int     type;
    596             private ULocale locale;
    597             private String  text;
    598             private int[]   expectOffsets;
    599             TBItem(int typ, ULocale loc, String txt, int[] eOffs) {
    600                 type          = typ;
    601                 locale        = loc;
    602                 text          = txt;
    603                 expectOffsets = eOffs;
    604             }
    605             private static final int maxOffsetCount = 128;
    606             private boolean offsetsMatchExpected(int[] foundOffsets, int foundOffsetsLength) {
    607                 if ( foundOffsetsLength != expectOffsets.length ) {
    608                     return false;
    609                 }
    610                 for (int i = 0; i < foundOffsetsLength; i++) {
    611                     if ( foundOffsets[i] != expectOffsets[i] ) {
    612                         return false;
    613                     }
    614                 }
    615                 return true;
    616             }
    617             private String formatOffsets(int[] offsets, int length) {
    618                 StringBuffer buildString = new StringBuffer(4*maxOffsetCount);
    619                 for (int i = 0; i < length; i++) {
    620                     buildString.append(" " + offsets[i]);
    621                 }
    622                 return buildString.toString();
    623             }
    624             public void doTest() {
    625                 BreakIterator brkIter;
    626                 switch( type ) {
    627                     case BreakIterator.KIND_CHARACTER: brkIter = BreakIterator.getCharacterInstance(locale); break;
    628                     case BreakIterator.KIND_WORD:      brkIter = BreakIterator.getWordInstance(locale); break;
    629                     case BreakIterator.KIND_LINE:      brkIter = BreakIterator.getLineInstance(locale); break;
    630                     case BreakIterator.KIND_SENTENCE:  brkIter = BreakIterator.getSentenceInstance(locale); break;
    631                     default: errln("Unsupported break iterator type " + type); return;
    632                 }
    633                 brkIter.setText(text);
    634                 int[] foundOffsets = new int[maxOffsetCount];
    635                 int offset, foundOffsetsCount = 0;
    636                 // do forwards iteration test
    637                 while ( foundOffsetsCount < maxOffsetCount && (offset = brkIter.next()) != BreakIterator.DONE ) {
    638                     foundOffsets[foundOffsetsCount++] = offset;
    639                 }
    640                 if ( !offsetsMatchExpected(foundOffsets, foundOffsetsCount) ) {
    641                     // log error for forwards test
    642                     String textToDisplay = (text.length() <= 16)? text: text.substring(0,16);
    643                     errln("For type " + type + " " + locale + ", text \"" + textToDisplay + "...\"" +
    644                             "; expect " + expectOffsets.length + " offsets:" + formatOffsets(expectOffsets, expectOffsets.length) +
    645                             "; found " + foundOffsetsCount + " offsets fwd:" + formatOffsets(foundOffsets, foundOffsetsCount) );
    646                 } else {
    647                     // do backwards iteration test
    648                     --foundOffsetsCount; // back off one from the end offset
    649                     while ( foundOffsetsCount > 0 ) {
    650                         offset = brkIter.previous();
    651                         if ( offset != foundOffsets[--foundOffsetsCount] ) {
    652                             // log error for backwards test
    653                             String textToDisplay = (text.length() <= 16)? text: text.substring(0,16);
    654                             errln("For type " + type + " " + locale + ", text \"" + textToDisplay + "...\"" +
    655                                     "; expect " + expectOffsets.length + " offsets:" + formatOffsets(expectOffsets, expectOffsets.length) +
    656                                     "; found rev offset " + offset + " where expect " + foundOffsets[foundOffsetsCount] );
    657                             break;
    658                         }
    659                     }
    660                 }
    661             }
    662         }
    663         // KIND_SENTENCE "el"
    664         final String elSentText     = "\u0391\u03B2, \u03B3\u03B4; \u0395 \u03B6\u03B7\u037E \u0398 \u03B9\u03BA. " +
    665                                       "\u039B\u03BC \u03BD\u03BE! \u039F\u03C0, \u03A1\u03C2? \u03A3";
    666         final int[]  elSentTOffsets = { 8, 14, 20, 27, 35, 36 };
    667         final int[]  elSentROffsets = {        20, 27, 35, 36 };
    668         // KIND_CHARACTER "th"
    669         final String thCharText     = "\u0E01\u0E23\u0E30\u0E17\u0E48\u0E2D\u0E21\u0E23\u0E08\u0E19\u0E32 " +
    670                                       "(\u0E2A\u0E38\u0E0A\u0E32\u0E15\u0E34-\u0E08\u0E38\u0E11\u0E32\u0E21\u0E32\u0E28) " +
    671                                       "\u0E40\u0E14\u0E47\u0E01\u0E21\u0E35\u0E1B\u0E31\u0E0D\u0E2B\u0E32 ";
    672         final int[]  thCharTOffsets = { 1, 2, 3, 5, 6, 7, 8, 9, 10, 11,
    673                                         12, 13, 15, 16, 17, 19, 20, 22, 23, 24, 25, 26, 27, 28,
    674                                         29, 30, 32, 33, 35, 37, 38, 39, 40, 41 };
    675         //starting in Unicode 6.1, root behavior should be the same as Thai above
    676         //final int[]  thCharROffsets = { 1,    3, 5, 6, 7, 8, 9,     11,
    677         //                                12, 13, 15,     17, 19, 20, 22,     24,     26, 27, 28,
    678         //                                29,     32, 33, 35, 37, 38,     40, 41 };
    679 
    680         final TBItem[] tests = {
    681             new TBItem( BreakIterator.KIND_SENTENCE,  new ULocale("el"),          elSentText,   elSentTOffsets   ),
    682             new TBItem( BreakIterator.KIND_SENTENCE,  ULocale.ROOT,               elSentText,   elSentROffsets   ),
    683             new TBItem( BreakIterator.KIND_CHARACTER, new ULocale("th"),          thCharText,   thCharTOffsets   ),
    684             new TBItem( BreakIterator.KIND_CHARACTER, ULocale.ROOT,               thCharText,   thCharTOffsets   ),
    685         };
    686         for (int iTest = 0; iTest < tests.length; iTest++) {
    687             tests[iTest].doTest();
    688         }
    689     }
    690 
    691     /* Tests the method public Object clone() */
    692     public void TestClone() {
    693         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
    694         try {
    695             rbbi.setText((CharacterIterator) null);
    696             if (((RuleBasedBreakIterator) rbbi.clone()).getText() != null)
    697                 errln("RuleBasedBreakIterator.clone() was suppose to return "
    698                         + "the same object because fText is set to null.");
    699         } catch (Exception e) {
    700             errln("RuleBasedBreakIterator.clone() was not suppose to return " + "an exception.");
    701         }
    702     }
    703 
    704     /*
    705      * Tests the method public boolean equals(Object that)
    706      */
    707     public void TestEquals() {
    708         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
    709         RuleBasedBreakIterator rbbi1 = new RuleBasedBreakIterator("");
    710 
    711         // TODO: Tests when "if (fRData != other.fRData && (fRData == null || other.fRData == null))" is true
    712 
    713         // Tests when "if (fText == null || other.fText == null)" is true
    714         rbbi.setText((CharacterIterator) null);
    715         if (rbbi.equals(rbbi1)) {
    716             errln("RuleBasedBreakIterator.equals(Object) was not suppose to return "
    717                     + "true when the other object has a null fText.");
    718         }
    719 
    720         // Tests when "if (fText == null && other.fText == null)" is true
    721         rbbi1.setText((CharacterIterator) null);
    722         if (!rbbi.equals(rbbi1)) {
    723             errln("RuleBasedBreakIterator.equals(Object) was not suppose to return "
    724                     + "false when both objects has a null fText.");
    725         }
    726 
    727         // Tests when an exception occurs
    728         if (rbbi.equals(0)) {
    729             errln("RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to integer 0.");
    730         }
    731         if (rbbi.equals(0.0)) {
    732             errln("RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to float 0.0.");
    733         }
    734         if (rbbi.equals("0")) {
    735             errln("RuleBasedBreakIterator.equals(Object) was suppose to return "
    736                     + "false when comparing to string '0'.");
    737         }
    738     }
    739 
    740     /*
    741      * Tests the method public void dump()
    742      */
    743     public void TestDump() {
    744         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
    745         try {
    746             rbbi.dump();
    747             errln("RuleBasedBreakIterator.dump() was suppose to return "
    748                     + "an exception for a blank RuleBasedBreakIterator object.");
    749         } catch (Exception e) {
    750         }
    751     }
    752 
    753     /*
    754      * Tests the method public int first()
    755      */
    756     public void TestFirst() {
    757         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
    758         // Tests when "if (fText == null)" is true
    759         rbbi.setText((CharacterIterator) null);
    760         if (rbbi.first() != BreakIterator.DONE) {
    761             errln("RuleBasedBreakIterator.first() was suppose to return "
    762                     + "BreakIterator.DONE when the object has a null fText.");
    763         }
    764     }
    765 
    766     /*
    767      * Tests the method public int last()
    768      */
    769     public void TestLast() {
    770         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
    771         // Tests when "if (fText == null)" is true
    772         rbbi.setText((CharacterIterator) null);
    773         if (rbbi.last() != BreakIterator.DONE) {
    774             errln("RuleBasedBreakIterator.last() was suppose to return "
    775                     + "BreakIterator.DONE when the object has a null fText.");
    776         }
    777     }
    778 
    779     /*
    780      * Tests the method public int following(int offset)
    781      */
    782     public void TestFollowing() {
    783         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
    784         // Tests when "else if (offset < fText.getBeginIndex())" is true
    785         rbbi.setText("dummy");
    786         if (rbbi.following(-1) != 0) {
    787             errln("RuleBasedBreakIterator.following(-1) was suppose to return "
    788                     + "0 when the object has a fText of dummy.");
    789         }
    790     }
    791 
    792     /*
    793      * Tests the method public int preceding(int offset)
    794      */
    795     public void TestPreceding() {
    796         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
    797         // Tests when "if (fText == null || offset > fText.getEndIndex())" is true
    798         rbbi.setText((CharacterIterator)null);
    799         if (rbbi.preceding(-1) != BreakIterator.DONE) {
    800             errln("RuleBasedBreakIterator.preceding(-1) was suppose to return "
    801                     + "0 when the object has a fText of null.");
    802         }
    803 
    804         // Tests when "else if (offset < fText.getBeginIndex())" is true
    805         rbbi.setText("dummy");
    806         if (rbbi.preceding(-1) != 0) {
    807             errln("RuleBasedBreakIterator.preceding(-1) was suppose to return "
    808                     + "0 when the object has a fText of dummy.");
    809         }
    810     }
    811 
    812     /* Tests the method public int current() */
    813     public void TestCurrent(){
    814         RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator("");
    815         // Tests when "(fText != null) ? fText.getIndex() : BreakIterator.DONE" is true and false
    816         rbbi.setText((CharacterIterator)null);
    817         if(rbbi.current() != BreakIterator.DONE){
    818             errln("RuleBasedBreakIterator.current() was suppose to return "
    819                     + "BreakIterator.DONE when the object has a fText of null.");
    820         }
    821         rbbi.setText("dummy");
    822         if(rbbi.current() != 0){
    823             errln("RuleBasedBreakIterator.current() was suppose to return "
    824                     + "0 when the object has a fText of dummy.");
    825         }
    826     }
    827 }
    828