Home | History | Annotate | Download | only in rbbi
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 2003-2016 International Business Machines Corporation and
      7  * others. All Rights Reserved.
      8  *******************************************************************************
      9  */
     10 package android.icu.dev.test.rbbi;
     11 
     12 
     13 // Monkey testing of RuleBasedBreakIterator.
     14 //    The old, original monkey test. TODO: remove
     15 //    The new monkey test is class RBBIMonkeyTest.
     16 
     17 import java.util.ArrayList;
     18 import java.util.Arrays;
     19 import java.util.List;
     20 import java.util.Locale;
     21 
     22 import org.junit.Test;
     23 import org.junit.runner.RunWith;
     24 import org.junit.runners.JUnit4;
     25 
     26 import android.icu.dev.test.TestFmwk;
     27 import android.icu.lang.UCharacter;
     28 import android.icu.lang.UProperty;
     29 import android.icu.text.BreakIterator;
     30 import android.icu.text.RuleBasedBreakIterator;
     31 import android.icu.text.UTF16;
     32 import android.icu.text.UnicodeSet;
     33 import android.icu.testsharding.MainTestShard;
     34 
     35 
     36 /**
     37  * Monkey tests for RBBI.  These tests have independent implementations of
     38  * the Unicode TR boundary rules, and compare results between these and ICU's
     39  * implementation, using random data.
     40  *
     41  * Tests cover Grapheme Cluster (char), Word and Line breaks
     42  *
     43  * Ported from ICU4C, original code in file source/test/intltest/rbbitst.cpp
     44  *
     45  */
     46 @MainTestShard
     47 @RunWith(JUnit4.class)
     48 public class RBBITestMonkey extends TestFmwk {
     49     //
     50     //     class RBBIMonkeyKind
     51     //
     52     //        Monkey Test for Break Iteration
     53     //        Abstract interface class.   Concrete derived classes independently
     54     //        implement the break rules for different iterator types.
     55     //
     56     //        The Monkey Test itself uses doesn't know which type of break iterator it is
     57     //        testing, but works purely in terms of the interface defined here.
     58     //
     59     abstract static class RBBIMonkeyKind {
     60 
     61         // Return a List of UnicodeSets, representing the character classes used
     62         //   for this type of iterator.
     63         abstract  List  charClasses();
     64 
     65         // Set the test text on which subsequent calls to next() will operate
     66         abstract  void   setText(StringBuffer text);
     67 
     68         // Find the next break position, starting from the specified position.
     69         // Return -1 after reaching end of string.
     70         abstract   int   next(int i);
     71 
     72         // A Character Property, one of the constants defined in class UProperty.
     73         //   The value of this property will be displayed for the characters
     74         //    near any test failure.
     75         int   fCharProperty;
     76     }
     77 
     78     //
     79     // Data for Extended Pictographic scraped from CLDR common/properties/ExtendedPictographic.txt, 13267
     80     //
     81     static String gExtended_Pict = "[" +
     82             "\\U0001F774-\\U0001F77F\\U00002700-\\U00002701\\U00002703-\\U00002704\\U0000270E\\U00002710-\\U00002711\\U00002765-\\U00002767" +
     83             "\\U0001F030-\\U0001F093\\U0001F094-\\U0001F09F\\U0001F10D-\\U0001F10F\\U0001F12F\\U0001F16C-\\U0001F16F\\U0001F1AD-\\U0001F1E5" +
     84             "\\U0001F260-\\U0001F265\\U0001F203-\\U0001F20F\\U0001F23C-\\U0001F23F\\U0001F249-\\U0001F24F\\U0001F252-\\U0001F25F" +
     85             "\\U0001F266-\\U0001F2FF\\U0001F7D5-\\U0001F7FF\\U0001F000-\\U0001F003\\U0001F005-\\U0001F02B\\U0001F02C-\\U0001F02F" +
     86             "\\U0001F322-\\U0001F323\\U0001F394-\\U0001F395\\U0001F398\\U0001F39C-\\U0001F39D\\U0001F3F1-\\U0001F3F2\\U0001F3F6" +
     87             "\\U0001F4FE\\U0001F53E-\\U0001F548\\U0001F54F\\U0001F568-\\U0001F56E\\U0001F571-\\U0001F572\\U0001F57B-\\U0001F586" +
     88             "\\U0001F588-\\U0001F589\\U0001F58E-\\U0001F58F\\U0001F591-\\U0001F594\\U0001F597-\\U0001F5A3\\U0001F5A6-\\U0001F5A7" +
     89             "\\U0001F5A9-\\U0001F5B0\\U0001F5B3-\\U0001F5BB\\U0001F5BD-\\U0001F5C1\\U0001F5C5-\\U0001F5D0\\U0001F5D4-\\U0001F5DB" +
     90             "\\U0001F5DF-\\U0001F5E0\\U0001F5E2\\U0001F5E4-\\U0001F5E7\\U0001F5E9-\\U0001F5EE\\U0001F5F0-\\U0001F5F2\\U0001F5F4-\\U0001F5F9" +
     91             "\\U00002605\\U00002607-\\U0000260D\\U0000260F-\\U00002610\\U00002612\\U00002616-\\U00002617\\U00002619-\\U0000261C" +
     92             "\\U0000261E-\\U0000261F\\U00002621\\U00002624-\\U00002625\\U00002627-\\U00002629\\U0000262B-\\U0000262D\\U00002630-\\U00002637" +
     93             "\\U0000263B-\\U00002647\\U00002654-\\U0000265F\\U00002661-\\U00002662\\U00002664\\U00002667\\U00002669-\\U0000267A" +
     94             "\\U0000267C-\\U0000267E\\U00002680-\\U00002691\\U00002695\\U00002698\\U0000269A\\U0000269D-\\U0000269F\\U000026A2-\\U000026A9" +
     95             "\\U000026AC-\\U000026AF\\U000026B2-\\U000026BC\\U000026BF-\\U000026C3\\U000026C6-\\U000026C7\\U000026C9-\\U000026CD" +
     96             "\\U000026D0\\U000026D2\\U000026D5-\\U000026E8\\U000026EB-\\U000026EF\\U000026F6\\U000026FB-\\U000026FC\\U000026FE-\\U000026FF" +
     97             "\\U00002388\\U0001FA00-\\U0001FFFD\\U0001F0A0-\\U0001F0AE\\U0001F0B1-\\U0001F0BF\\U0001F0C1-\\U0001F0CF\\U0001F0D1-\\U0001F0F5" +
     98             "\\U0001F0AF-\\U0001F0B0\\U0001F0C0\\U0001F0D0\\U0001F0F6-\\U0001F0FF\\U0001F80C-\\U0001F80F\\U0001F848-\\U0001F84F" +
     99             "\\U0001F85A-\\U0001F85F\\U0001F888-\\U0001F88F\\U0001F8AE-\\U0001F8FF\\U0001F900-\\U0001F90B\\U0001F91F\\U0001F928-\\U0001F92F" +
    100             "\\U0001F931-\\U0001F932\\U0001F94C\\U0001F95F-\\U0001F96B\\U0001F992-\\U0001F997\\U0001F9D0-\\U0001F9E6\\U0001F90C-\\U0001F90F" +
    101             "\\U0001F93F\\U0001F94D-\\U0001F94F\\U0001F96C-\\U0001F97F\\U0001F998-\\U0001F9BF\\U0001F9C1-\\U0001F9CF\\U0001F9E7-\\U0001F9FF" +
    102             "\\U0001F6C6-\\U0001F6CA\\U0001F6D3-\\U0001F6D4\\U0001F6E6-\\U0001F6E8\\U0001F6EA\\U0001F6F1-\\U0001F6F2\\U0001F6F7-\\U0001F6F8" +
    103             "\\U0001F6D5-\\U0001F6DF\\U0001F6ED-\\U0001F6EF\\U0001F6F9-\\U0001F6FF" +
    104             "]";
    105 
    106 
    107     /**
    108      * Monkey test subclass for testing Character (Grapheme Cluster) boundaries.
    109      * Note: As of Unicode 6.1, fPrependSet is empty, so don't add it to fSets
    110      */
    111     static class RBBICharMonkey extends RBBIMonkeyKind {
    112         List                      fSets;
    113 
    114         UnicodeSet                fCRLFSet;
    115         UnicodeSet                fControlSet;
    116         UnicodeSet                fExtendSet;
    117         UnicodeSet                fRegionalIndicatorSet;
    118         UnicodeSet                fPrependSet;
    119         UnicodeSet                fSpacingSet;
    120         UnicodeSet                fLSet;
    121         UnicodeSet                fVSet;
    122         UnicodeSet                fTSet;
    123         UnicodeSet                fLVSet;
    124         UnicodeSet                fLVTSet;
    125         UnicodeSet                fHangulSet;
    126         UnicodeSet                fEmojiModifierSet;
    127         UnicodeSet                fEmojiBaseSet;
    128         UnicodeSet                fZWJSet;
    129         UnicodeSet                fExtendedPictSet;
    130         UnicodeSet                fEBGSet;
    131         UnicodeSet                fEmojiNRKSet;
    132         UnicodeSet                fAnySet;
    133 
    134 
    135         StringBuffer              fText;
    136 
    137 
    138         RBBICharMonkey() {
    139             fText       = null;
    140             fCharProperty = UProperty.GRAPHEME_CLUSTER_BREAK;
    141             fCRLFSet    = new UnicodeSet("[\\r\\n]");
    142             fControlSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Control}]");
    143             fExtendSet  = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Extend}]");
    144             fZWJSet     = new UnicodeSet("[\\p{Grapheme_Cluster_Break = ZWJ}]");
    145             fRegionalIndicatorSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Regional_Indicator}]");
    146             fPrependSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Prepend}]");
    147             fSpacingSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = SpacingMark}]");
    148             fLSet       = new UnicodeSet("[\\p{Grapheme_Cluster_Break = L}]");
    149             fVSet       = new UnicodeSet("[\\p{Grapheme_Cluster_Break = V}]");
    150             fTSet       = new UnicodeSet("[\\p{Grapheme_Cluster_Break = T}]");
    151             fLVSet      = new UnicodeSet("[\\p{Grapheme_Cluster_Break = LV}]");
    152             fLVTSet     = new UnicodeSet("[\\p{Grapheme_Cluster_Break = LVT}]");
    153             fHangulSet  = new UnicodeSet();
    154             fHangulSet.addAll(fLSet);
    155             fHangulSet.addAll(fVSet);
    156             fHangulSet.addAll(fTSet);
    157             fHangulSet.addAll(fLVSet);
    158             fHangulSet.addAll(fLVTSet);
    159 
    160             fEmojiBaseSet     = new UnicodeSet("[\\p{Grapheme_Cluster_Break = EB}]");
    161             fEmojiModifierSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = EM}]");
    162             fExtendedPictSet  = new UnicodeSet(gExtended_Pict);
    163             fEBGSet           = new UnicodeSet("[\\p{Grapheme_Cluster_Break = EBG}]");
    164             fEmojiNRKSet      = new UnicodeSet("[[\\p{Emoji}]-[\\p{Grapheme_Cluster_Break = Regional_Indicator}*#0-9]]");
    165             fAnySet           = new UnicodeSet("[\\u0000-\\U0010ffff]");
    166 
    167 
    168             fSets       = new ArrayList();
    169             fSets.add(fCRLFSet);
    170             fSets.add(fControlSet);
    171             fSets.add(fExtendSet);
    172             fSets.add(fRegionalIndicatorSet);
    173             if (!fPrependSet.isEmpty()) {
    174                 fSets.add(fPrependSet);
    175             }
    176             fSets.add(fSpacingSet);
    177             fSets.add(fHangulSet);
    178             fSets.add(fAnySet);
    179             fSets.add(fEmojiBaseSet);
    180             fSets.add(fEmojiModifierSet);
    181             fSets.add(fZWJSet);
    182             fSets.add(fExtendedPictSet);
    183             fSets.add(fEBGSet);
    184             fSets.add(fEmojiNRKSet);
    185         }
    186 
    187 
    188         @Override
    189         void setText(StringBuffer s) {
    190             fText = s;
    191         }
    192 
    193         @Override
    194         List charClasses() {
    195             return fSets;
    196         }
    197 
    198         @Override
    199         int next(int prevPos) {
    200             int    /*p0,*/ p1, p2, p3;    // Indices of the significant code points around the
    201             //   break position being tested.  The candidate break
    202             //   location is before p2.
    203 
    204             int     breakPos = -1;
    205 
    206             int   c0, c1, c2, c3;     // The code points at p0, p1, p2 & p3.
    207             int   cBase;              // for (X Extend*) patterns, the X character.
    208 
    209             // Previous break at end of string.  return DONE.
    210             if (prevPos >= fText.length()) {
    211                 return -1;
    212             }
    213             /* p0 = */ p1 = p2 = p3 = prevPos;
    214             c3 =  UTF16.charAt(fText, prevPos);
    215             c0 = c1 = c2 = cBase = 0;
    216 
    217             // Loop runs once per "significant" character position in the input text.
    218             for (;;) {
    219                 // Move all of the positions forward in the input string.
    220                 /* p0 = p1;*/  c0 = c1;
    221                 p1 = p2;  c1 = c2;
    222                 p2 = p3;  c2 = c3;
    223 
    224                 // Advance p3 by one codepoint
    225                 p3 = moveIndex32(fText, p3, 1);
    226                 c3 = (p3>=fText.length())? -1: UTF16.charAt(fText, p3);
    227 
    228                 if (p1 == p2) {
    229                     // Still warming up the loop.  (won't work with zero length strings, but we don't care)
    230                     continue;
    231                 }
    232                 if (p2 == fText.length()) {
    233                     // Reached end of string.  Always a break position.
    234                     break;
    235                 }
    236 
    237                 // Rule  GB3   CR x LF
    238                 //     No Extend or Format characters may appear between the CR and LF,
    239                 //     which requires the additional check for p2 immediately following p1.
    240                 //
    241                 if (c1==0x0D && c2==0x0A && p1==(p2-1)) {
    242                     continue;
    243                 }
    244 
    245                 // Rule (GB4).   ( Control | CR | LF ) <break>
    246                 if (fControlSet.contains(c1) ||
    247                         c1 == 0x0D ||
    248                         c1 == 0x0A)  {
    249                     break;
    250                 }
    251 
    252                 // Rule (GB5)    <break>  ( Control | CR | LF )
    253                 //
    254                 if (fControlSet.contains(c2) ||
    255                         c2 == 0x0D ||
    256                         c2 == 0x0A)  {
    257                     break;
    258                 }
    259 
    260 
    261                 // Rule (GB6)  L x ( L | V | LV | LVT )
    262                 if (fLSet.contains(c1) &&
    263                         (fLSet.contains(c2)  ||
    264                                 fVSet.contains(c2)  ||
    265                                 fLVSet.contains(c2) ||
    266                                 fLVTSet.contains(c2))) {
    267                     continue;
    268                 }
    269 
    270                 // Rule (GB7)    ( LV | V )  x  ( V | T )
    271                 if ((fLVSet.contains(c1) || fVSet.contains(c1)) &&
    272                         (fVSet.contains(c2) || fTSet.contains(c2)))  {
    273                     continue;
    274                 }
    275 
    276                 // Rule (GB8)    ( LVT | T)  x T
    277                 if ((fLVTSet.contains(c1) || fTSet.contains(c1)) &&
    278                         fTSet.contains(c2))  {
    279                     continue;
    280                 }
    281 
    282                 // Rule (GB9)    x (Extend | ZWJ)
    283                 if (fExtendSet.contains(c2) || fZWJSet.contains(c2))  {
    284                     if (!fExtendSet.contains(c1)) {
    285                         cBase = c1;
    286                     }
    287                     continue;
    288                 }
    289 
    290                 // Rule (GB9a)   x  SpacingMark
    291                 if (fSpacingSet.contains(c2)) {
    292                     continue;
    293                 }
    294 
    295                 // Rule (GB9b)   Prepend x
    296                 if (fPrependSet.contains(c1)) {
    297                     continue;
    298                 }
    299                 // Rule (GB10)   (Emoji_Base | EBG) Extend* x Emoji_Modifier
    300                 if ((fEmojiBaseSet.contains(c1) || fEBGSet.contains(c1)) && fEmojiModifierSet.contains(c2)) {
    301                     continue;
    302                 }
    303                 if ((fEmojiBaseSet.contains(cBase) || fEBGSet.contains(cBase)) &&
    304                         fExtendSet.contains(c1) && fEmojiModifierSet.contains(c2)) {
    305                     continue;
    306                 }
    307 
    308                 // Rule (GB11)   (Extended_Pictographic | Emoji) ZWJ x (Extended_Pictographic | Emoji)
    309                 if ((fExtendedPictSet.contains(c0) || fEmojiNRKSet.contains(c0)) && fZWJSet.contains(c1) &&
    310                         (fExtendedPictSet.contains(c2) || fEmojiNRKSet.contains(c2))) {
    311                     continue;
    312                 }
    313                 if ((fExtendedPictSet.contains(cBase) || fEmojiNRKSet.contains(cBase)) && fExtendSet.contains(c0) && fZWJSet.contains(c1) &&
    314                         (fExtendedPictSet.contains(c2) || fEmojiNRKSet.contains(c2))) {
    315                     continue;
    316                 }
    317 
    318                 // Rule (GB12-13)   Regional_Indicator x Regional_Indicator
    319                 //                  Note: The first if condition is a little tricky. We only need to force
    320                 //                      a break if there are three or more contiguous RIs. If there are
    321                 //                      only two, a break following will occur via other rules, and will include
    322                 //                      any trailing extend characters, which is needed behavior.
    323                 if (fRegionalIndicatorSet.contains(c0) && fRegionalIndicatorSet.contains(c1)
    324                         && fRegionalIndicatorSet.contains(c2)) {
    325                     break;
    326                 }
    327                 if (fRegionalIndicatorSet.contains(c1) && fRegionalIndicatorSet.contains(c2)) {
    328                     continue;
    329                 }
    330 
    331                 // Rule (GB999)  Any  <break>  Any
    332                 break;
    333             }
    334 
    335             breakPos = p2;
    336             return breakPos;
    337         }
    338     }
    339 
    340 
    341     /**
    342      *
    343      * Word Monkey Test Class
    344      *
    345      *
    346      *
    347      */
    348     static class RBBIWordMonkey extends RBBIMonkeyKind {
    349         List                      fSets;
    350         StringBuffer              fText;
    351 
    352         UnicodeSet                fCRSet;
    353         UnicodeSet                fLFSet;
    354         UnicodeSet                fNewlineSet;
    355         UnicodeSet                fRegionalIndicatorSet;
    356         UnicodeSet                fKatakanaSet;
    357         UnicodeSet                fHebrew_LetterSet;
    358         UnicodeSet                fALetterSet;
    359         UnicodeSet                fSingle_QuoteSet;
    360         UnicodeSet                fDouble_QuoteSet;
    361         UnicodeSet                fMidNumLetSet;
    362         UnicodeSet                fMidLetterSet;
    363         UnicodeSet                fMidNumSet;
    364         UnicodeSet                fNumericSet;
    365         UnicodeSet                fFormatSet;
    366         UnicodeSet                fExtendSet;
    367         UnicodeSet                fExtendNumLetSet;
    368         UnicodeSet                fOtherSet;
    369         UnicodeSet                fDictionarySet;
    370         UnicodeSet                fEBaseSet;
    371         UnicodeSet                fEBGSet;
    372         UnicodeSet                fEModifierSet;
    373         UnicodeSet                fZWJSet;
    374         UnicodeSet                fExtendedPictSet;
    375         UnicodeSet                fEmojiNRKSet;
    376 
    377 
    378         RBBIWordMonkey() {
    379             fCharProperty    = UProperty.WORD_BREAK;
    380 
    381             fCRSet           = new UnicodeSet("[\\p{Word_Break = CR}]");
    382             fLFSet           = new UnicodeSet("[\\p{Word_Break = LF}]");
    383             fNewlineSet      = new UnicodeSet("[\\p{Word_Break = Newline}]");
    384             fRegionalIndicatorSet = new UnicodeSet("[\\p{Word_Break = Regional_Indicator}]");
    385             fKatakanaSet     = new UnicodeSet("[\\p{Word_Break = Katakana}]");
    386             fHebrew_LetterSet = new UnicodeSet("[\\p{Word_Break = Hebrew_Letter}]");
    387             fALetterSet      = new UnicodeSet("[\\p{Word_Break = ALetter}]");
    388             fSingle_QuoteSet = new UnicodeSet("[\\p{Word_Break = Single_Quote}]");
    389             fDouble_QuoteSet = new UnicodeSet("[\\p{Word_Break = Double_Quote}]");
    390             fMidNumLetSet    = new UnicodeSet("[\\p{Word_Break = MidNumLet}]");
    391             fMidLetterSet    = new UnicodeSet("[\\p{Word_Break = MidLetter}]");
    392             fMidNumSet       = new UnicodeSet("[\\p{Word_Break = MidNum}]");
    393             fNumericSet      = new UnicodeSet("[\\p{Word_Break = Numeric}]");
    394             fFormatSet       = new UnicodeSet("[\\p{Word_Break = Format}]");
    395             fExtendNumLetSet = new UnicodeSet("[\\p{Word_Break = ExtendNumLet}]");
    396             fExtendSet       = new UnicodeSet("[\\p{Word_Break = Extend}]");
    397             fEBaseSet        = new UnicodeSet("[\\p{Word_Break = EB}]");
    398             fEBGSet          = new UnicodeSet("[\\p{Word_Break = EBG}]");
    399             fEModifierSet    = new UnicodeSet("[\\p{Word_Break = EM}]");
    400             fZWJSet          = new UnicodeSet("[\\p{Word_Break = ZWJ}]");
    401             fExtendedPictSet = new UnicodeSet(gExtended_Pict);
    402             fEmojiNRKSet     = new UnicodeSet("[[\\p{Emoji}]-[\\p{Grapheme_Cluster_Break = Regional_Indicator}*#0-9]]");
    403 
    404             fDictionarySet = new UnicodeSet("[[\\uac00-\\ud7a3][:Han:][:Hiragana:]]");
    405             fDictionarySet.addAll(fKatakanaSet);
    406             fDictionarySet.addAll(new UnicodeSet("[\\p{LineBreak = Complex_Context}]"));
    407 
    408             fALetterSet.removeAll(fDictionarySet);
    409 
    410             fOtherSet        = new UnicodeSet();
    411             fOtherSet.complement();
    412             fOtherSet.removeAll(fCRSet);
    413             fOtherSet.removeAll(fLFSet);
    414             fOtherSet.removeAll(fNewlineSet);
    415             fOtherSet.removeAll(fALetterSet);
    416             fOtherSet.removeAll(fSingle_QuoteSet);
    417             fOtherSet.removeAll(fDouble_QuoteSet);
    418             fOtherSet.removeAll(fKatakanaSet);
    419             fOtherSet.removeAll(fHebrew_LetterSet);
    420             fOtherSet.removeAll(fMidLetterSet);
    421             fOtherSet.removeAll(fMidNumSet);
    422             fOtherSet.removeAll(fNumericSet);
    423             fOtherSet.removeAll(fFormatSet);
    424             fOtherSet.removeAll(fExtendSet);
    425             fOtherSet.removeAll(fExtendNumLetSet);
    426             fOtherSet.removeAll(fRegionalIndicatorSet);
    427             fOtherSet.removeAll(fEBaseSet);
    428             fOtherSet.removeAll(fEBGSet);
    429             fOtherSet.removeAll(fEModifierSet);
    430             fOtherSet.removeAll(fZWJSet);
    431             fOtherSet.removeAll(fExtendedPictSet);
    432             fOtherSet.removeAll(fEmojiNRKSet);
    433 
    434             // Inhibit dictionary characters from being tested at all.
    435             // remove surrogates so as to not generate higher CJK characters
    436             fOtherSet.removeAll(new UnicodeSet("[[\\p{LineBreak = Complex_Context}][:Line_Break=Surrogate:]]"));
    437             fOtherSet.removeAll(fDictionarySet);
    438 
    439             fSets            = new ArrayList();
    440             fSets.add(fCRSet);
    441             fSets.add(fLFSet);
    442             fSets.add(fNewlineSet);
    443             fSets.add(fRegionalIndicatorSet);
    444             fSets.add(fHebrew_LetterSet);
    445             fSets.add(fALetterSet);
    446             //fSets.add(fKatakanaSet);  // Omit Katakana from fSets, which omits Katakana characters
    447             // from the test data. They are all in the dictionary set,
    448             // which this (old, to be retired) monkey test cannot handle.
    449             fSets.add(fSingle_QuoteSet);
    450             fSets.add(fDouble_QuoteSet);
    451             fSets.add(fMidLetterSet);
    452             fSets.add(fMidNumLetSet);
    453             fSets.add(fMidNumSet);
    454             fSets.add(fNumericSet);
    455             fSets.add(fFormatSet);
    456             fSets.add(fExtendSet);
    457             fSets.add(fExtendNumLetSet);
    458             fSets.add(fRegionalIndicatorSet);
    459             fSets.add(fEBaseSet);
    460             fSets.add(fEBGSet);
    461             fSets.add(fEModifierSet);
    462             fSets.add(fZWJSet);
    463             fSets.add(fExtendedPictSet);
    464             fSets.add(fEmojiNRKSet);
    465             fSets.add(fOtherSet);
    466         }
    467 
    468 
    469         @Override
    470         List  charClasses() {
    471             return fSets;
    472         }
    473 
    474         @Override
    475         void   setText(StringBuffer s) {
    476             fText = s;
    477         }
    478 
    479         @Override
    480         int   next(int prevPos) {
    481             int    /*p0,*/ p1, p2, p3;      // Indices of the significant code points around the
    482             //   break position being tested.  The candidate break
    483             //   location is before p2.
    484             int     breakPos = -1;
    485 
    486             int c0, c1, c2, c3;   // The code points at p0, p1, p2 & p3.
    487 
    488             // Previous break at end of string.  return DONE.
    489             if (prevPos >= fText.length()) {
    490                 return -1;
    491             }
    492             /*p0 =*/ p1 = p2 = p3 = prevPos;
    493             c3 = UTF16.charAt(fText, prevPos);
    494             c0 = c1 = c2 = 0;
    495 
    496 
    497 
    498             // Loop runs once per "significant" character position in the input text.
    499             for (;;) {
    500                 // Move all of the positions forward in the input string.
    501                 /*p0 = p1;*/  c0 = c1;
    502                 p1 = p2;  c1 = c2;
    503                 p2 = p3;  c2 = c3;
    504 
    505                 // Advance p3 by    X(Extend | Format)*   Rule 4
    506                 //    But do not advance over Extend & Format following a new line. (Unicode 5.1 change)
    507                 do {
    508                     p3 = moveIndex32(fText, p3, 1);
    509                     c3 = -1;
    510                     if (p3>=fText.length()) {
    511                         break;
    512                     }
    513                     c3 = UTF16.charAt(fText, p3);
    514                     if (fCRSet.contains(c2) || fLFSet.contains(c2) || fNewlineSet.contains(c2)) {
    515                         break;
    516                     }
    517                 }
    518                 while (setContains(fFormatSet, c3) || setContains(fExtendSet, c3) || setContains(fZWJSet, c3));
    519 
    520                 if (p1 == p2) {
    521                     // Still warming up the loop.  (won't work with zero length strings, but we don't care)
    522                     continue;
    523                 }
    524                 if (p2 == fText.length()) {
    525                     // Reached end of string.  Always a break position.
    526                     break;
    527                 }
    528 
    529                 // Rule (3)   CR x LF
    530                 //     No Extend or Format characters may appear between the CR and LF,
    531                 //     which requires the additional check for p2 immediately following p1.
    532                 //
    533                 if (c1==0x0D && c2==0x0A) {
    534                     continue;
    535                 }
    536 
    537                 // Rule (3a)  Break before and after newlines (including CR and LF)
    538                 //
    539                 if (fCRSet.contains(c1) || fLFSet.contains(c1) || fNewlineSet.contains(c1)) {
    540                     break;
    541                 }
    542                 if (fCRSet.contains(c2) || fLFSet.contains(c2) || fNewlineSet.contains(c2)) {
    543                     break;
    544                 }
    545 
    546                 // Rule (3c)    ZWJ x (Extended_Pictographic | Emoji).
    547                 //              Not ignoring extend chars, so peek into input text to
    548                 //              get the potential ZWJ, the character immediately preceding c2.
    549                 if (fZWJSet.contains(fText.codePointBefore(p2)) && (fExtendedPictSet.contains(c2) || fEmojiNRKSet.contains(c2))) {
    550                     continue;
    551                 }
    552 
    553                 // Rule (5).   (ALetter | Hebrew_Letter) x (ALetter | Hebrew_Letter)
    554                 if ((fALetterSet.contains(c1) || fHebrew_LetterSet.contains(c1)) &&
    555                         (fALetterSet.contains(c2) || fHebrew_LetterSet.contains(c2)))  {
    556                     continue;
    557                 }
    558 
    559                 // Rule (6)  (ALetter | Hebrew_Letter)  x  (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
    560                 //
    561                 if ( (fALetterSet.contains(c1) || fHebrew_LetterSet.contains(c1))   &&
    562                         (fMidLetterSet.contains(c2) || fMidNumLetSet.contains(c2) || fSingle_QuoteSet.contains(c2)) &&
    563                         (setContains(fALetterSet, c3) || setContains(fHebrew_LetterSet, c3))) {
    564                     continue;
    565                 }
    566 
    567                 // Rule (7)  (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote)  x  (ALetter | Hebrew_Letter)
    568                 if ((fALetterSet.contains(c0) || fHebrew_LetterSet.contains(c0)) &&
    569                         (fMidLetterSet.contains(c1) || fMidNumLetSet.contains(c1) || fSingle_QuoteSet.contains(c1)) &&
    570                         (fALetterSet.contains(c2) || fHebrew_LetterSet.contains(c2))) {
    571                     continue;
    572                 }
    573 
    574                 // Rule (7a)     Hebrew_Letter x Single_Quote
    575                 if (fHebrew_LetterSet.contains(c1) && fSingle_QuoteSet.contains(c2)) {
    576                     continue;
    577                 }
    578 
    579                 // Rule (7b)    Hebrew_Letter x Double_Quote Hebrew_Letter
    580                 if (fHebrew_LetterSet.contains(c1) && fDouble_QuoteSet.contains(c2) && setContains(fHebrew_LetterSet,c3)) {
    581                     continue;
    582                 }
    583 
    584                 // Rule (7c)    Hebrew_Letter Double_Quote x Hebrew_Letter
    585                 if (fHebrew_LetterSet.contains(c0) && fDouble_QuoteSet.contains(c1) && fHebrew_LetterSet.contains(c2)) {
    586                     continue;
    587                 }
    588 
    589                 //  Rule (8)    Numeric x Numeric
    590                 if (fNumericSet.contains(c1) &&
    591                         fNumericSet.contains(c2))  {
    592                     continue;
    593                 }
    594 
    595                 // Rule (9)    (ALetter | Hebrew_Letter) x Numeric
    596                 if ((fALetterSet.contains(c1) || fHebrew_LetterSet.contains(c1)) &&
    597                         fNumericSet.contains(c2))  {
    598                     continue;
    599                 }
    600 
    601                 // Rule (10)    Numeric x (ALetter | Hebrew_Letter)
    602                 if (fNumericSet.contains(c1) &&
    603                         (fALetterSet.contains(c2) || fHebrew_LetterSet.contains(c2)))  {
    604                     continue;
    605                 }
    606 
    607                 // Rule (11)   Numeric (MidNum | MidNumLet | Single_Quote)  x  Numeric
    608                 if (fNumericSet.contains(c0) &&
    609                         (fMidNumSet.contains(c1) || fMidNumLetSet.contains(c1) || fSingle_QuoteSet.contains(c1))  &&
    610                         fNumericSet.contains(c2)) {
    611                     continue;
    612                 }
    613 
    614                 // Rule (12)  Numeric x (MidNum | MidNumLet | SingleQuote) Numeric
    615                 if (fNumericSet.contains(c1) &&
    616                         (fMidNumSet.contains(c2) || fMidNumLetSet.contains(c2) || fSingle_QuoteSet.contains(c2))  &&
    617                         setContains(fNumericSet, c3)) {
    618                     continue;
    619                 }
    620 
    621                 // Rule (13)  Katakana x Katakana
    622                 //            Note: matches UAX 29 rules, but doesn't come into play for ICU because
    623                 //                  all Katakana are handled by the dictionary breaker.
    624                 if (fKatakanaSet.contains(c1) &&
    625                         fKatakanaSet.contains(c2))  {
    626                     continue;
    627                 }
    628 
    629                 // Rule 13a    (ALetter | Hebrew_Letter | Numeric | KataKana | ExtendNumLet) x ExtendNumLet
    630                 if ((fALetterSet.contains(c1) || fHebrew_LetterSet.contains(c1) ||fNumericSet.contains(c1) ||
    631                         fKatakanaSet.contains(c1) || fExtendNumLetSet.contains(c1)) &&
    632                         fExtendNumLetSet.contains(c2)) {
    633                     continue;
    634                 }
    635 
    636                 // Rule 13b   ExtendNumLet x (ALetter | Hebrew_Letter | Numeric | Katakana)
    637                 if (fExtendNumLetSet.contains(c1) &&
    638                         (fALetterSet.contains(c2) || fHebrew_LetterSet.contains(c2) ||
    639                                 fNumericSet.contains(c2) || fKatakanaSet.contains(c2)))  {
    640                     continue;
    641                 }
    642 
    643 
    644                 // Rule 14 (E_Base | EBG) x E_Modifier
    645                 if ((fEBaseSet.contains(c1)  || fEBGSet.contains(c1)) && fEModifierSet.contains(c2)) {
    646                     continue;
    647                 }
    648 
    649                 // Rule 15 - 17   Group piars of Regional Indicators
    650                 if (fRegionalIndicatorSet.contains(c0) && fRegionalIndicatorSet.contains(c1)) {
    651                     break;
    652                 }
    653                 if (fRegionalIndicatorSet.contains(c1) && fRegionalIndicatorSet.contains(c2)) {
    654                     continue;
    655                 }
    656 
    657                 // Rule 999.  Break found here.
    658                 break;
    659             }
    660 
    661             breakPos = p2;
    662             return breakPos;
    663         }
    664 
    665     }
    666 
    667 
    668     static class RBBILineMonkey extends RBBIMonkeyKind {
    669 
    670         List        fSets;
    671 
    672         // UnicodeSets for each of the Line Breaking character classes.
    673         // Order matches that of Unicode UAX 14, Table 1, which makes it a little easier
    674         // to verify that they are all accounted for.
    675 
    676         UnicodeSet  fBK;
    677         UnicodeSet  fCR;
    678         UnicodeSet  fLF;
    679         UnicodeSet  fCM;
    680         UnicodeSet  fNL;
    681         UnicodeSet  fSG;
    682         UnicodeSet  fWJ;
    683         UnicodeSet  fZW;
    684         UnicodeSet  fGL;
    685         UnicodeSet  fSP;
    686         UnicodeSet  fB2;
    687         UnicodeSet  fBA;
    688         UnicodeSet  fBB;
    689         UnicodeSet  fHY;
    690         UnicodeSet  fCB;
    691         UnicodeSet  fCL;
    692         UnicodeSet  fCP;
    693         UnicodeSet  fEX;
    694         UnicodeSet  fIN;
    695         UnicodeSet  fNS;
    696         UnicodeSet  fOP;
    697         UnicodeSet  fQU;
    698         UnicodeSet  fIS;
    699         UnicodeSet  fNU;
    700         UnicodeSet  fPO;
    701         UnicodeSet  fPR;
    702         UnicodeSet  fSY;
    703         UnicodeSet  fAI;
    704         UnicodeSet  fAL;
    705         UnicodeSet  fCJ;
    706         UnicodeSet  fH2;
    707         UnicodeSet  fH3;
    708         UnicodeSet  fHL;
    709         UnicodeSet  fID;
    710         UnicodeSet  fJL;
    711         UnicodeSet  fJV;
    712         UnicodeSet  fJT;
    713         UnicodeSet  fRI;
    714         UnicodeSet  fXX;
    715         UnicodeSet  fEB;
    716         UnicodeSet  fEM;
    717         UnicodeSet  fZWJ;
    718         UnicodeSet  fExtendedPict;
    719         UnicodeSet  fEmojiNRK;
    720 
    721         StringBuffer  fText;
    722         int           fOrigPositions;
    723 
    724 
    725 
    726         RBBILineMonkey()
    727         {
    728             fCharProperty  = UProperty.LINE_BREAK;
    729             fSets          = new ArrayList();
    730 
    731             fBK    = new UnicodeSet("[\\p{Line_Break=BK}]");
    732             fCR    = new UnicodeSet("[\\p{Line_break=CR}]");
    733             fLF    = new UnicodeSet("[\\p{Line_break=LF}]");
    734             fCM    = new UnicodeSet("[\\p{Line_break=CM}]");
    735             fNL    = new UnicodeSet("[\\p{Line_break=NL}]");
    736             fSG    = new UnicodeSet("[\\ud800-\\udfff]");
    737             fWJ    = new UnicodeSet("[\\p{Line_break=WJ}]");
    738             fZW    = new UnicodeSet("[\\p{Line_break=ZW}]");
    739             fGL    = new UnicodeSet("[\\p{Line_break=GL}]");
    740             fSP    = new UnicodeSet("[\\p{Line_break=SP}]");
    741             fB2    = new UnicodeSet("[\\p{Line_break=B2}]");
    742             fBA    = new UnicodeSet("[\\p{Line_break=BA}]");
    743             fBB    = new UnicodeSet("[\\p{Line_break=BB}]");
    744             fHY    = new UnicodeSet("[\\p{Line_break=HY}]");
    745             fCB    = new UnicodeSet("[\\p{Line_break=CB}]");
    746             fCL    = new UnicodeSet("[\\p{Line_break=CL}]");
    747             fCP    = new UnicodeSet("[\\p{Line_break=CP}]");
    748             fEX    = new UnicodeSet("[\\p{Line_break=EX}]");
    749             fIN    = new UnicodeSet("[\\p{Line_break=IN}]");
    750             fNS    = new UnicodeSet("[\\p{Line_break=NS}]");
    751             fOP    = new UnicodeSet("[\\p{Line_break=OP}]");
    752             fQU    = new UnicodeSet("[\\p{Line_break=QU}]");
    753             fIS    = new UnicodeSet("[\\p{Line_break=IS}]");
    754             fNU    = new UnicodeSet("[\\p{Line_break=NU}]");
    755             fPO    = new UnicodeSet("[\\p{Line_break=PO}]");
    756             fPR    = new UnicodeSet("[\\p{Line_break=PR}]");
    757             fSY    = new UnicodeSet("[\\p{Line_break=SY}]");
    758             fAI    = new UnicodeSet("[\\p{Line_break=AI}]");
    759             fAL    = new UnicodeSet("[\\p{Line_break=AL}]");
    760             fCJ    = new UnicodeSet("[\\p{Line_break=CJ}]");
    761             fH2    = new UnicodeSet("[\\p{Line_break=H2}]");
    762             fH3    = new UnicodeSet("[\\p{Line_break=H3}]");
    763             fHL    = new UnicodeSet("[\\p{Line_break=HL}]");
    764             fID    = new UnicodeSet("[\\p{Line_break=ID}]");
    765             fJL    = new UnicodeSet("[\\p{Line_break=JL}]");
    766             fJV    = new UnicodeSet("[\\p{Line_break=JV}]");
    767             fJT    = new UnicodeSet("[\\p{Line_break=JT}]");
    768             fRI    = new UnicodeSet("[\\p{Line_break=RI}]");
    769             fXX    = new UnicodeSet("[\\p{Line_break=XX}]");
    770             fEB    = new UnicodeSet("[\\p{Line_break=EB}]");
    771             fEM    = new UnicodeSet("[\\p{Line_break=EM}]");
    772             fZWJ   = new UnicodeSet("[\\p{Line_break=ZWJ}]");
    773             fEmojiNRK = new UnicodeSet("[[\\p{Emoji}]-[\\p{Line_break=RI}*#0-9]]");
    774             fExtendedPict = new UnicodeSet(gExtended_Pict);
    775 
    776 
    777             // Remove dictionary characters.
    778             // The monkey test reference implementation of line break does not replicate the dictionary behavior,
    779             // so dictionary characters are omitted from the monkey test data.
    780             @SuppressWarnings("unused")
    781             UnicodeSet dictionarySet = new UnicodeSet(
    782                     "[[:LineBreak = Complex_Context:] & [[:Script = Thai:][:Script = Lao:][:Script = Khmer:] [:script = Myanmar:]]]");
    783 
    784             fAL.addAll(fXX);     // Default behavior for XX is identical to AL
    785             fAL.addAll(fAI);     // Default behavior for AI is identical to AL
    786             fAL.addAll(fSG);     // Default behavior for SG (unpaired surrogates) is AL
    787 
    788             fNS.addAll(fCJ);     // Default behavior for CJ is identical to NS.
    789             fCM.addAll(fZWJ);    // ZWJ behaves as a CM.
    790 
    791             fSets.add(fBK);
    792             fSets.add(fCR);
    793             fSets.add(fLF);
    794             fSets.add(fCM);
    795             fSets.add(fNL);
    796             fSets.add(fWJ);
    797             fSets.add(fZW);
    798             fSets.add(fGL);
    799             fSets.add(fSP);
    800             fSets.add(fB2);
    801             fSets.add(fBA);
    802             fSets.add(fBB);
    803             fSets.add(fHY);
    804             fSets.add(fCB);
    805             fSets.add(fCL);
    806             fSets.add(fCP);
    807             fSets.add(fEX);
    808             fSets.add(fIN);
    809             fSets.add(fJL);
    810             fSets.add(fJT);
    811             fSets.add(fJV);
    812             fSets.add(fNS);
    813             fSets.add(fOP);
    814             fSets.add(fQU);
    815             fSets.add(fIS);
    816             fSets.add(fNU);
    817             fSets.add(fPO);
    818             fSets.add(fPR);
    819             fSets.add(fSY);
    820             fSets.add(fAI);
    821             fSets.add(fAL);
    822             fSets.add(fH2);
    823             fSets.add(fH3);
    824             fSets.add(fHL);
    825             fSets.add(fID);
    826             fSets.add(fWJ);
    827             fSets.add(fRI);
    828             fSets.add(fSG);
    829             fSets.add(fEB);
    830             fSets.add(fEM);
    831             fSets.add(fZWJ);
    832             fSets.add(fExtendedPict);
    833             fSets.add(fEmojiNRK);
    834         }
    835 
    836         @Override
    837         void setText(StringBuffer s) {
    838             fText       = s;
    839         }
    840 
    841 
    842 
    843 
    844         @Override
    845         int next(int startPos) {
    846             int    pos;       //  Index of the char following a potential break position
    847             int    thisChar;  //  Character at above position "pos"
    848 
    849             int    prevPos;   //  Index of the char preceding a potential break position
    850             int    prevChar;  //  Character at above position.  Note that prevChar
    851             //   and thisChar may not be adjacent because combining
    852             //   characters between them will be ignored.
    853             int    prevCharX2; //  Character before prevChar, more contex for LB 21a
    854 
    855             int    nextPos;   //  Index of the next character following pos.
    856             //     Usually skips over combining marks.
    857             int    tPos;      //  temp value.
    858             int    matchVals[]  = null;       // Number  Expression Match Results
    859 
    860 
    861             if (startPos >= fText.length()) {
    862                 return -1;
    863             }
    864 
    865 
    866             // Initial values for loop.  Loop will run the first time without finding breaks,
    867             //                           while the invalid values shift out and the "this" and
    868             //                           "prev" positions are filled in with good values.
    869             pos      = prevPos   = -1;    // Invalid value, serves as flag for initial loop iteration.
    870             thisChar = prevChar  = prevCharX2 = 0;
    871             nextPos  = startPos;
    872 
    873 
    874             // Loop runs once per position in the test text, until a break position
    875             //  is found.  In each iteration, we are testing for a possible break
    876             //  just preceding the character at index "pos".  The character preceding
    877             //  this char is at postion "prevPos"; because of combining sequences,
    878             //  "prevPos" can be arbitrarily far before "pos".
    879             for (;;) {
    880                 // Advance to the next position to be tested.
    881                 prevCharX2 = prevChar;
    882                 prevPos   = pos;
    883                 prevChar  = thisChar;
    884                 pos       = nextPos;
    885                 nextPos   = moveIndex32(fText, pos, 1);
    886 
    887                 // Rule LB2 - Break at end of text.
    888                 if (pos >= fText.length()) {
    889                     break;
    890                 }
    891 
    892                 // Rule LB 9 - adjust for combining sequences.
    893                 //             We do this rule out-of-order because the adjustment does
    894                 //             not effect the way that rules LB 3 through LB 6 match,
    895                 //             and doing it here rather than after LB 6 is substantially
    896                 //             simpler when combining sequences do occur.
    897 
    898 
    899                 // LB 9         Keep combining sequences together.
    900                 //              advance over any CM class chars at "pos",
    901                 //              result is "nextPos" for the following loop iteration.
    902                 thisChar  = UTF16.charAt(fText, pos);
    903                 if (!(fSP.contains(thisChar) || fBK.contains(thisChar) || thisChar==0x0d ||
    904                         thisChar==0x0a || fNL.contains(thisChar) || fZW.contains(thisChar) )) {
    905                     for (;;) {
    906                         if (nextPos == fText.length()) {
    907                             break;
    908                         }
    909                         int nextChar = UTF16.charAt(fText, nextPos);
    910                         if (!fCM.contains(nextChar)) {
    911                             break;
    912                         }
    913                         nextPos = moveIndex32(fText, nextPos, 1);
    914                     }
    915                 }
    916 
    917                 // LB 9 Treat X CM* as if it were X
    918                 //        No explicit action required.
    919 
    920                 // LB 10     Treat any remaining combining mark as AL
    921                 if (fCM.contains(thisChar)) {
    922                     thisChar = 'A';
    923                 }
    924 
    925 
    926                 // If the loop is still warming up - if we haven't shifted the initial
    927                 //   -1 positions out of prevPos yet - loop back to advance the
    928                 //    position in the input without any further looking for breaks.
    929                 if (prevPos == -1) {
    930                     continue;
    931                 }
    932 
    933                 // LB 4  Always break after hard line breaks,
    934                 if (fBK.contains(prevChar)) {
    935                     break;
    936                 }
    937 
    938                 // LB 5  Break after CR, LF, NL, but not inside CR LF
    939                 if (fCR.contains(prevChar) && fLF.contains(thisChar)) {
    940                     continue;
    941                 }
    942                 if  (fCR.contains(prevChar) ||
    943                         fLF.contains(prevChar) ||
    944                         fNL.contains(prevChar))  {
    945                     break;
    946                 }
    947 
    948                 // LB 6  Don't break before hard line breaks
    949                 if (fBK.contains(thisChar) || fCR.contains(thisChar) ||
    950                         fLF.contains(thisChar) || fNL.contains(thisChar) ) {
    951                     continue;
    952                 }
    953 
    954 
    955                 // LB 7  Don't break before spaces or zero-width space.
    956                 if (fSP.contains(thisChar)) {
    957                     continue;
    958                 }
    959 
    960                 if (fZW.contains(thisChar)) {
    961                     continue;
    962                 }
    963 
    964                 // LB 8  Break after zero width space
    965                 if (fZW.contains(prevChar)) {
    966                     break;
    967                 }
    968 
    969                 // LB 8a:  ZWJ x (ID | Extended_Pictographic | Emoji)
    970                 //       The monkey test's way of ignoring combining characters doesn't work
    971                 //       for this rule. ZWJ is also a CM. Need to get the actual character
    972                 //       preceding "thisChar", not ignoring combining marks, possibly ZWJ.
    973                 {
    974                     int prevC = fText.codePointBefore(pos);
    975                     if (fZWJ.contains(prevC) && (fID.contains(thisChar) || fExtendedPict.contains(thisChar) || fEmojiNRK.contains(thisChar))) {
    976                         continue;
    977                     }
    978                 }
    979 
    980                 //  LB 9, 10  Already done, at top of loop.
    981                 //
    982 
    983 
    984                 // LB 11
    985                 //    x  WJ
    986                 //    WJ  x
    987                 if (fWJ.contains(thisChar) || fWJ.contains(prevChar)) {
    988                     continue;
    989                 }
    990 
    991 
    992                 // LB 12
    993                 //        GL x
    994                 if (fGL.contains(prevChar)) {
    995                     continue;
    996                 }
    997 
    998                 // LB 12a
    999                 //    [^SP BA HY] x GL
   1000                 if (!(fSP.contains(prevChar) ||
   1001                         fBA.contains(prevChar) ||
   1002                         fHY.contains(prevChar)     ) && fGL.contains(thisChar)) {
   1003                     continue;
   1004                 }
   1005 
   1006 
   1007 
   1008                 // LB 13  Don't break before closings.
   1009                 //       NU x CL, NU x CP  and NU x IS are not matched here so that they will
   1010                 //       fall into LB 17 and the more general number regular expression.
   1011                 //
   1012                 if (!fNU.contains(prevChar) && fCL.contains(thisChar) ||
   1013                         !fNU.contains(prevChar) && fCP.contains(thisChar) ||
   1014                         fEX.contains(thisChar) ||
   1015                         !fNU.contains(prevChar) && fIS.contains(thisChar) ||
   1016                         !fNU.contains(prevChar) && fSY.contains(thisChar))    {
   1017                     continue;
   1018                 }
   1019 
   1020                 // LB 14  Don't break after OP SP*
   1021                 //       Scan backwards, checking for this sequence.
   1022                 //       The OP char could include combining marks, so we actually check for
   1023                 //           OP CM* SP* x
   1024                 tPos = prevPos;
   1025                 if (fSP.contains(prevChar)) {
   1026                     while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
   1027                         tPos=moveIndex32(fText, tPos, -1);
   1028                     }
   1029                 }
   1030                 while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) {
   1031                     tPos=moveIndex32(fText, tPos, -1);
   1032                 }
   1033                 if (fOP.contains(UTF16.charAt(fText, tPos))) {
   1034                     continue;
   1035                 }
   1036 
   1037                 // LB 15 Do not break within "[
   1038                 //       QU CM* SP* x OP
   1039                 if (fOP.contains(thisChar)) {
   1040                     // Scan backwards from prevChar to see if it is preceded by QU CM* SP*
   1041                     tPos = prevPos;
   1042                     while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
   1043                         tPos = moveIndex32(fText, tPos, -1);
   1044                     }
   1045                     while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) {
   1046                         tPos = moveIndex32(fText, tPos, -1);
   1047                     }
   1048                     if (fQU.contains(UTF16.charAt(fText, tPos))) {
   1049                         continue;
   1050                     }
   1051                 }
   1052 
   1053                 // LB 16   (CL | CP) SP* x NS
   1054                 if (fNS.contains(thisChar)) {
   1055                     tPos = prevPos;
   1056                     while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
   1057                         tPos = moveIndex32(fText, tPos, -1);
   1058                     }
   1059                     while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) {
   1060                         tPos = moveIndex32(fText, tPos, -1);
   1061                     }
   1062                     if (fCL.contains(UTF16.charAt(fText, tPos)) || fCP.contains(UTF16.charAt(fText, tPos))) {
   1063                         continue;
   1064                     }
   1065                 }
   1066 
   1067 
   1068                 // LB 17        B2 SP* x B2
   1069                 if (fB2.contains(thisChar)) {
   1070                     tPos = prevPos;
   1071                     while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
   1072                         tPos = moveIndex32(fText, tPos, -1);
   1073                     }
   1074                     while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) {
   1075                         tPos = moveIndex32(fText, tPos, -1);
   1076                     }
   1077                     if (fB2.contains(UTF16.charAt(fText, tPos))) {
   1078                         continue;
   1079                     }
   1080                 }
   1081 
   1082                 // LB 18    break after space
   1083                 if (fSP.contains(prevChar)) {
   1084                     break;
   1085                 }
   1086 
   1087                 // LB 19
   1088                 //    x   QU
   1089                 //    QU  x
   1090                 if (fQU.contains(thisChar) || fQU.contains(prevChar)) {
   1091                     continue;
   1092                 }
   1093 
   1094                 // LB 20  Break around a CB
   1095                 if (fCB.contains(thisChar) || fCB.contains(prevChar)) {
   1096                     break;
   1097                 }
   1098 
   1099                 // LB 21
   1100                 if (fBA.contains(thisChar) ||
   1101                         fHY.contains(thisChar) ||
   1102                         fNS.contains(thisChar) ||
   1103                         fBB.contains(prevChar) )   {
   1104                     continue;
   1105                 }
   1106 
   1107                 // LB 21a, HL (HY | BA) x
   1108                 if (fHL.contains(prevCharX2) && (fHY.contains(prevChar) || fBA.contains(prevChar))) {
   1109                     continue;
   1110                 }
   1111 
   1112                 // LB 21b, SY x HL
   1113                 if (fSY.contains(prevChar) && fHL.contains(thisChar)) {
   1114                     continue;
   1115                 }
   1116 
   1117                 // LB 22
   1118                 if (fAL.contains(prevChar) && fIN.contains(thisChar) ||
   1119                         fEX.contains(prevChar) && fIN.contains(thisChar) ||
   1120                         fHL.contains(prevChar) && fIN.contains(thisChar) ||
   1121                         (fID.contains(prevChar) || fEB.contains(prevChar) || fEM.contains(prevChar)) && fIN.contains(thisChar) ||
   1122                         fIN.contains(prevChar) && fIN.contains(thisChar) ||
   1123                         fNU.contains(prevChar) && fIN.contains(thisChar) )   {
   1124                     continue;
   1125                 }
   1126 
   1127                 // LB 23    (AL | HL) x NU
   1128                 //          NU x (AL | HL)
   1129                 if ((fAL.contains(prevChar) || fHL.contains(prevChar)) && fNU.contains(thisChar)) {
   1130                     continue;
   1131                 }
   1132                 if (fNU.contains(prevChar) && (fAL.contains(thisChar) || fHL.contains(thisChar))) {
   1133                     continue;
   1134                 }
   1135 
   1136                 // LB 23a Do not break between numeric prefixes and ideographs, or between ideographs and numeric postfixes.
   1137                 //      PR x (ID | EB | EM)
   1138                 //     (ID | EB | EM) x PO
   1139                 if (fPR.contains(prevChar) &&
   1140                         (fID.contains(thisChar) || fEB.contains(thisChar) || fEM.contains(thisChar)))  {
   1141                     continue;
   1142                 }
   1143                 if ((fID.contains(prevChar) || fEB.contains(prevChar) || fEM.contains(prevChar)) &&
   1144                         fPO.contains(thisChar)) {
   1145                     continue;
   1146                 }
   1147 
   1148                 // LB 24  Do not break between prefix and letters or ideographs.
   1149                 //         (PR | PO) x (AL | HL)
   1150                 //         (AL | HL) x (PR | PO)
   1151                 if ((fPR.contains(prevChar) || fPO.contains(prevChar)) &&
   1152                         (fAL.contains(thisChar) || fHL.contains(thisChar))) {
   1153                     continue;
   1154                 }
   1155                 if ((fAL.contains(prevChar) || fHL.contains(prevChar)) &&
   1156                         (fPR.contains(thisChar) || fPO.contains(thisChar))) {
   1157                     continue;
   1158                 }
   1159 
   1160 
   1161                 // LB 25    Numbers
   1162                 matchVals = LBNumberCheck(fText, prevPos, matchVals);
   1163                 if (matchVals[0] != -1) {
   1164                     // Matched a number.  But could have been just a single digit, which would
   1165                     //    not represent a "no break here" between prevChar and thisChar
   1166                     int numEndIdx = matchVals[1];  // idx of first char following num
   1167                     if (numEndIdx > pos) {
   1168                         // Number match includes at least the two chars being checked
   1169                         if (numEndIdx > nextPos) {
   1170                             // Number match includes additional chars.  Update pos and nextPos
   1171                             //   so that next loop iteration will continue at the end of the number,
   1172                             //   checking for breaks between last char in number & whatever follows.
   1173                             nextPos = numEndIdx;
   1174                             pos     = numEndIdx;
   1175                             do {
   1176                                 pos = moveIndex32(fText, pos, -1);
   1177                                 thisChar = UTF16.charAt(fText, pos);
   1178                             }
   1179                             while (fCM.contains(thisChar));
   1180                         }
   1181                         continue;
   1182                     }
   1183                 }
   1184 
   1185 
   1186                 // LB 26  Do not break Korean Syllables
   1187                 if (fJL.contains(prevChar) && (fJL.contains(thisChar) ||
   1188                         fJV.contains(thisChar) ||
   1189                         fH2.contains(thisChar) ||
   1190                         fH3.contains(thisChar))) {
   1191                     continue;
   1192                 }
   1193 
   1194                 if ((fJV.contains(prevChar) || fH2.contains(prevChar))  &&
   1195                         (fJV.contains(thisChar) || fJT.contains(thisChar))) {
   1196                     continue;
   1197                 }
   1198 
   1199                 if ((fJT.contains(prevChar) || fH3.contains(prevChar)) &&
   1200                         fJT.contains(thisChar)) {
   1201                     continue;
   1202                 }
   1203 
   1204                 // LB 27 Treat a Korean Syllable Block the same as ID
   1205                 if ((fJL.contains(prevChar) || fJV.contains(prevChar) ||
   1206                         fJT.contains(prevChar) || fH2.contains(prevChar) || fH3.contains(prevChar)) &&
   1207                         fIN.contains(thisChar)) {
   1208                     continue;
   1209                 }
   1210                 if ((fJL.contains(prevChar) || fJV.contains(prevChar) ||
   1211                         fJT.contains(prevChar) || fH2.contains(prevChar) || fH3.contains(prevChar)) &&
   1212                         fPO.contains(thisChar)) {
   1213                     continue;
   1214                 }
   1215                 if (fPR.contains(prevChar) && (fJL.contains(thisChar) || fJV.contains(thisChar) ||
   1216                         fJT.contains(thisChar) || fH2.contains(thisChar) || fH3.contains(thisChar))) {
   1217                     continue;
   1218                 }
   1219 
   1220 
   1221 
   1222                 // LB 28 Do not break between alphabetics
   1223                 if ((fAL.contains(prevChar) || fHL.contains(prevChar)) && (fAL.contains(thisChar) || fHL.contains(thisChar))) {
   1224                     continue;
   1225                 }
   1226 
   1227                 // LB 29  Do not break between numeric punctuation and alphabetics
   1228                 if (fIS.contains(prevChar) && (fAL.contains(thisChar) || fHL.contains(thisChar))) {
   1229                     continue;
   1230                 }
   1231 
   1232                 // LB 30    Do not break between letters, numbers, or ordinary symbols and opening or closing punctuation.
   1233                 //          (AL | NU) x OP
   1234                 //          CP x (AL | NU)
   1235                 if ((fAL.contains(prevChar) || fHL.contains(prevChar) || fNU.contains(prevChar)) && fOP.contains(thisChar)) {
   1236                     continue;
   1237                 }
   1238                 if (fCP.contains(prevChar) && (fAL.contains(thisChar) || fHL.contains(thisChar) || fNU.contains(thisChar))) {
   1239                     continue;
   1240                 }
   1241 
   1242                 // LB 30a   Break between pairs of Regional Indicators.
   1243                 //             RI RI <break> RI
   1244                 //             RI    x    RI
   1245                 if (fRI.contains(prevCharX2) && fRI.contains(prevChar) && fRI.contains(thisChar)) {
   1246                     break;
   1247                 }
   1248                 if (fRI.contains(prevChar) && fRI.contains(thisChar)) {
   1249                     continue;
   1250                 }
   1251 
   1252                 // LB30b    Emoji Base x Emoji Modifier
   1253                 if (fEB.contains(prevChar) && fEM.contains(thisChar)) {
   1254                     continue;
   1255                 }
   1256                 // LB 31    Break everywhere else
   1257                 break;
   1258             }
   1259 
   1260             return pos;
   1261         }
   1262 
   1263 
   1264 
   1265         // Match the following regular expression in the input text.
   1266         //    ((PR | PO) CM*)? ((OP | HY) CM*)? NU CM* ((NU | IS | SY) CM*) * ((CL | CP) CM*)?  (PR | PO) CM*)?
   1267         //      0    0   1       3    3    4              7    7    7    7      9    9    9     11   11    (match states)
   1268         //  retVals array  [0]  index of the start of the match, or -1 if no match
   1269         //                 [1]  index of first char following the match.
   1270         //  Can not use Java regex because need supplementary character support,
   1271         //     and because Unicode char properties version must be the same as in
   1272         //     the version of ICU being tested.
   1273         private int[] LBNumberCheck(StringBuffer s, int startIdx, int[] retVals) {
   1274             if (retVals == null) {
   1275                 retVals = new int[2];
   1276             }
   1277             retVals[0]     = -1;  // Indicates no match.
   1278             int matchState = 0;
   1279             int idx        = startIdx;
   1280 
   1281             matchLoop: for (idx = startIdx; idx<s.length(); idx = moveIndex32(s, idx, 1)){
   1282                 int c = UTF16.charAt(s, idx);
   1283                 int cLBType = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK);
   1284                 switch (matchState) {
   1285                 case 0:
   1286                     if (cLBType == UCharacter.LineBreak.PREFIX_NUMERIC ||
   1287                     cLBType == UCharacter.LineBreak.POSTFIX_NUMERIC) {
   1288                         matchState = 1;
   1289                         break;
   1290                     }
   1291                     if (cLBType == UCharacter.LineBreak.OPEN_PUNCTUATION) {
   1292                         matchState = 4;
   1293                         break;
   1294                     }
   1295                     if (cLBType == UCharacter.LineBreak.HYPHEN) {
   1296                         matchState = 4;
   1297                         break;
   1298                     }
   1299                     if (cLBType == UCharacter.LineBreak.NUMERIC) {
   1300                         matchState = 7;
   1301                         break;
   1302                     }
   1303                     break matchLoop;   /* No Match  */
   1304 
   1305                 case 1:
   1306                     if (cLBType == UCharacter.LineBreak.COMBINING_MARK || cLBType == UCharacter.LineBreak.ZWJ) {
   1307                         matchState = 1;
   1308                         break;
   1309                     }
   1310                     if (cLBType == UCharacter.LineBreak.OPEN_PUNCTUATION) {
   1311                         matchState = 4;
   1312                         break;
   1313                     }
   1314                     if (cLBType == UCharacter.LineBreak.HYPHEN) {
   1315                         matchState = 4;
   1316                         break;
   1317                     }
   1318                     if (cLBType == UCharacter.LineBreak.NUMERIC) {
   1319                         matchState = 7;
   1320                         break;
   1321                     }
   1322                     break matchLoop;   /* No Match  */
   1323 
   1324 
   1325                 case 4:
   1326                     if (cLBType == UCharacter.LineBreak.COMBINING_MARK || cLBType == UCharacter.LineBreak.ZWJ) {
   1327                         matchState = 4;
   1328                         break;
   1329                     }
   1330                     if (cLBType == UCharacter.LineBreak.NUMERIC) {
   1331                         matchState = 7;
   1332                         break;
   1333                     }
   1334                     break matchLoop;   /* No Match  */
   1335                     //    ((PR | PO) CM*)? ((OP | HY) CM*)? NU CM* ((NU | IS | SY) CM*) * (CL CM*)?  (PR | PO) CM*)?
   1336                     //      0    0   1       3    3    4              7    7    7    7      9   9     11   11    (match states)
   1337 
   1338                 case 7:
   1339                     if (cLBType == UCharacter.LineBreak.COMBINING_MARK || cLBType == UCharacter.LineBreak.ZWJ) {
   1340                         matchState = 7;
   1341                         break;
   1342                     }
   1343                     if (cLBType == UCharacter.LineBreak.NUMERIC) {
   1344                         matchState = 7;
   1345                         break;
   1346                     }
   1347                     if (cLBType == UCharacter.LineBreak.INFIX_NUMERIC) {
   1348                         matchState = 7;
   1349                         break;
   1350                     }
   1351                     if (cLBType == UCharacter.LineBreak.BREAK_SYMBOLS) {
   1352                         matchState = 7;
   1353                         break;
   1354                     }
   1355                     if (cLBType == UCharacter.LineBreak.CLOSE_PUNCTUATION) {
   1356                         matchState = 9;
   1357                         break;
   1358                     }
   1359                     if (cLBType == UCharacter.LineBreak.CLOSE_PARENTHESIS) {
   1360                         matchState = 9;
   1361                         break;
   1362                     }
   1363                     if (cLBType == UCharacter.LineBreak.POSTFIX_NUMERIC) {
   1364                         matchState = 11;
   1365                         break;
   1366                     }
   1367                     if (cLBType == UCharacter.LineBreak.PREFIX_NUMERIC) {
   1368                         matchState = 11;
   1369                         break;
   1370                     }
   1371 
   1372                     break matchLoop;    // Match Complete.
   1373                 case 9:
   1374                     if (cLBType == UCharacter.LineBreak.COMBINING_MARK || cLBType == UCharacter.LineBreak.ZWJ) {
   1375                         matchState = 9;
   1376                         break;
   1377                     }
   1378                     if (cLBType == UCharacter.LineBreak.POSTFIX_NUMERIC) {
   1379                         matchState = 11;
   1380                         break;
   1381                     }
   1382                     if (cLBType == UCharacter.LineBreak.PREFIX_NUMERIC) {
   1383                         matchState = 11;
   1384                         break;
   1385                     }
   1386                     break matchLoop;    // Match Complete.
   1387                 case 11:
   1388                     if (cLBType == UCharacter.LineBreak.COMBINING_MARK || cLBType == UCharacter.LineBreak.ZWJ) {
   1389                         matchState = 11;
   1390                         break;
   1391                     }
   1392                     break matchLoop;    // Match Complete.
   1393                 }
   1394             }
   1395             if (matchState > 4) {
   1396                 retVals[0] = startIdx;
   1397                 retVals[1] = idx;
   1398             }
   1399             return retVals;
   1400         }
   1401 
   1402 
   1403         @Override
   1404         List  charClasses() {
   1405             return fSets;
   1406         }
   1407 
   1408 
   1409 
   1410     }
   1411 
   1412 
   1413     /**
   1414      *
   1415      * Sentence Monkey Test Class
   1416      *
   1417      *
   1418      *
   1419      */
   1420     static class RBBISentenceMonkey extends RBBIMonkeyKind {
   1421         List                 fSets;
   1422         StringBuffer         fText;
   1423 
   1424         UnicodeSet           fSepSet;
   1425         UnicodeSet           fFormatSet;
   1426         UnicodeSet           fSpSet;
   1427         UnicodeSet           fLowerSet;
   1428         UnicodeSet           fUpperSet;
   1429         UnicodeSet           fOLetterSet;
   1430         UnicodeSet           fNumericSet;
   1431         UnicodeSet           fATermSet;
   1432         UnicodeSet           fSContinueSet;
   1433         UnicodeSet           fSTermSet;
   1434         UnicodeSet           fCloseSet;
   1435         UnicodeSet           fOtherSet;
   1436         UnicodeSet           fExtendSet;
   1437 
   1438 
   1439 
   1440         RBBISentenceMonkey() {
   1441             fCharProperty  = UProperty.SENTENCE_BREAK;
   1442 
   1443             fSets            = new ArrayList();
   1444 
   1445             //  Separator Set Note:  Beginning with Unicode 5.1, CR and LF were removed from the separator
   1446             //                       set and made into character classes of their own.  For the monkey impl,
   1447             //                       they remain in SEP, since Sep always appears with CR and LF in the rules.
   1448             fSepSet          = new UnicodeSet("[\\p{Sentence_Break = Sep} \\u000a \\u000d]");
   1449             fFormatSet       = new UnicodeSet("[\\p{Sentence_Break = Format}]");
   1450             fSpSet           = new UnicodeSet("[\\p{Sentence_Break = Sp}]");
   1451             fLowerSet        = new UnicodeSet("[\\p{Sentence_Break = Lower}]");
   1452             fUpperSet        = new UnicodeSet("[\\p{Sentence_Break = Upper}]");
   1453             fOLetterSet      = new UnicodeSet("[\\p{Sentence_Break = OLetter}]");
   1454             fNumericSet      = new UnicodeSet("[\\p{Sentence_Break = Numeric}]");
   1455             fATermSet        = new UnicodeSet("[\\p{Sentence_Break = ATerm}]");
   1456             fSContinueSet    = new UnicodeSet("[\\p{Sentence_Break = SContinue}]");
   1457             fSTermSet        = new UnicodeSet("[\\p{Sentence_Break = STerm}]");
   1458             fCloseSet        = new UnicodeSet("[\\p{Sentence_Break = Close}]");
   1459             fExtendSet       = new UnicodeSet("[\\p{Sentence_Break = Extend}]");
   1460             fOtherSet        = new UnicodeSet();
   1461 
   1462 
   1463             fOtherSet.complement();
   1464             fOtherSet.removeAll(fSepSet);
   1465             fOtherSet.removeAll(fFormatSet);
   1466             fOtherSet.removeAll(fSpSet);
   1467             fOtherSet.removeAll(fLowerSet);
   1468             fOtherSet.removeAll(fUpperSet);
   1469             fOtherSet.removeAll(fOLetterSet);
   1470             fOtherSet.removeAll(fNumericSet);
   1471             fOtherSet.removeAll(fATermSet);
   1472             fOtherSet.removeAll(fSContinueSet);
   1473             fOtherSet.removeAll(fSTermSet);
   1474             fOtherSet.removeAll(fCloseSet);
   1475             fOtherSet.removeAll(fExtendSet);
   1476 
   1477             fSets.add(fSepSet);
   1478             fSets.add(fFormatSet);
   1479 
   1480             fSets.add(fSpSet);
   1481             fSets.add(fLowerSet);
   1482             fSets.add(fUpperSet);
   1483             fSets.add(fOLetterSet);
   1484             fSets.add(fNumericSet);
   1485             fSets.add(fATermSet);
   1486             fSets.add(fSContinueSet);
   1487             fSets.add(fSTermSet);
   1488             fSets.add(fCloseSet);
   1489             fSets.add(fOtherSet);
   1490             fSets.add(fExtendSet);
   1491         }
   1492 
   1493 
   1494         @Override
   1495         List  charClasses() {
   1496             return fSets;
   1497         }
   1498 
   1499         @Override
   1500         void   setText(StringBuffer s) {
   1501             fText = s;
   1502         }
   1503 
   1504 
   1505         //      moveBack()   Find the "significant" code point preceding the index i.
   1506         //      Skips over ($Extend | $Format)*
   1507         //
   1508         private int moveBack(int i) {
   1509 
   1510             if (i <= 0) {
   1511                 return -1;
   1512             }
   1513 
   1514             int      c;
   1515             int      j = i;
   1516             do {
   1517                 j = moveIndex32(fText, j, -1);
   1518                 c = UTF16.charAt(fText, j);
   1519             }
   1520             while (j>0 &&(fFormatSet.contains(c) || fExtendSet.contains(c)));
   1521             return j;
   1522         }
   1523 
   1524 
   1525         int moveForward(int i) {
   1526             if (i>=fText.length()) {
   1527                 return fText.length();
   1528             }
   1529             int   c;
   1530             int   j = i;
   1531             do {
   1532                 j = moveIndex32(fText, j, 1);
   1533                 c = cAt(j);
   1534             }
   1535             while (c>=0 && (fFormatSet.contains(c) || fExtendSet.contains(c)));
   1536             return j;
   1537 
   1538         }
   1539 
   1540         int cAt(int pos) {
   1541             if (pos<0 || pos>=fText.length()) {
   1542                 return -1;
   1543             }
   1544             return UTF16.charAt(fText, pos);
   1545         }
   1546 
   1547         @Override
   1548         int   next(int prevPos) {
   1549             int    /*p0,*/ p1, p2, p3;      // Indices of the significant code points around the
   1550             //   break position being tested.  The candidate break
   1551             //   location is before p2.
   1552             int     breakPos = -1;
   1553 
   1554             int c0, c1, c2, c3;         // The code points at p0, p1, p2 & p3.
   1555             int c;
   1556 
   1557             // Prev break at end of string.  return DONE.
   1558             if (prevPos >= fText.length()) {
   1559                 return -1;
   1560             }
   1561             /*p0 =*/ p1 = p2 = p3 = prevPos;
   1562             c3 = UTF16.charAt(fText, prevPos);
   1563             c0 = c1 = c2 = 0;
   1564 
   1565             // Loop runs once per "significant" character position in the input text.
   1566             for (;;) {
   1567                 // Move all of the positions forward in the input string.
   1568                 /*p0 = p1;*/  c0 = c1;
   1569                 p1 = p2;  c1 = c2;
   1570                 p2 = p3;  c2 = c3;
   1571 
   1572                 // Advancd p3 by  X(Extend | Format)*   Rule 4
   1573                 p3 = moveForward(p3);
   1574                 c3 = cAt(p3);
   1575 
   1576                 // Rule (3) CR x LF
   1577                 if (c1==0x0d && c2==0x0a && p2==(p1+1)) {
   1578                     continue;
   1579                 }
   1580 
   1581                 // Rule (4)    Sep  <break>
   1582                 if (fSepSet.contains(c1)) {
   1583                     p2 = p1+1;   // Separators don't combine with Extend or Format
   1584                     break;
   1585                 }
   1586 
   1587                 if (p2 >= fText.length()) {
   1588                     // Reached end of string.  Always a break position.
   1589                     break;
   1590                 }
   1591 
   1592                 if (p2 == prevPos) {
   1593                     // Still warming up the loop.  (won't work with zero length strings, but we don't care)
   1594                     continue;
   1595                 }
   1596 
   1597                 // Rule (6).   ATerm x Numeric
   1598                 if (fATermSet.contains(c1) &&  fNumericSet.contains(c2))  {
   1599                     continue;
   1600                 }
   1601 
   1602                 // Rule (7).  (Upper | Lower) ATerm  x  Uppper
   1603                 if ((fUpperSet.contains(c0) || fLowerSet.contains(c0)) &&
   1604                         fATermSet.contains(c1) && fUpperSet.contains(c2)) {
   1605                     continue;
   1606                 }
   1607 
   1608                 // Rule (8)  ATerm Close* Sp*  x  (not (OLettter | Upper | Lower | Sep))* Lower
   1609                 //           Note:  Sterm | ATerm are added to the negated part of the expression by a
   1610                 //                  note to the Unicode 5.0 documents.
   1611                 int p8 = p1;
   1612                 while (p8>0 && fSpSet.contains(cAt(p8))) {
   1613                     p8 = moveBack(p8);
   1614                 }
   1615                 while (p8>0 && fCloseSet.contains(cAt(p8))) {
   1616                     p8 = moveBack(p8);
   1617                 }
   1618                 if (fATermSet.contains(cAt(p8))) {
   1619                     p8=p2;
   1620                     for (;;) {
   1621                         c = cAt(p8);
   1622                         if (c==-1 || fOLetterSet.contains(c) || fUpperSet.contains(c) ||
   1623                                 fLowerSet.contains(c) || fSepSet.contains(c) ||
   1624                                 fATermSet.contains(c) || fSTermSet.contains(c))
   1625                         {
   1626                             break;
   1627                         }
   1628                         p8 = moveForward(p8);
   1629                     }
   1630                     if (p8<fText.length() && fLowerSet.contains(cAt(p8))) {
   1631                         continue;
   1632                     }
   1633                 }
   1634 
   1635                 // Rule 8a  (STerm | ATerm) Close* Sp* x (SContinue | Sterm | ATerm)
   1636                 if (fSContinueSet.contains(c2) || fSTermSet.contains(c2) || fATermSet.contains(c2)) {
   1637                     p8 = p1;
   1638                     while (setContains(fSpSet, cAt(p8))) {
   1639                         p8 = moveBack(p8);
   1640                     }
   1641                     while (setContains(fCloseSet, cAt(p8))) {
   1642                         p8 = moveBack(p8);
   1643                     }
   1644                     c = cAt(p8);
   1645                     if (setContains(fSTermSet, c) || setContains(fATermSet, c)) {
   1646                         continue;
   1647                     }
   1648                 }
   1649 
   1650 
   1651                 // Rule (9)  (STerm | ATerm) Close*  x  (Close | Sp | Sep | CR | LF)
   1652                 int p9 = p1;
   1653                 while (p9>0 && fCloseSet.contains(cAt(p9))) {
   1654                     p9 = moveBack(p9);
   1655                 }
   1656                 c = cAt(p9);
   1657                 if ((fSTermSet.contains(c) || fATermSet.contains(c))) {
   1658                     if (fCloseSet.contains(c2) || fSpSet.contains(c2) || fSepSet.contains(c2)) {
   1659                         continue;
   1660                     }
   1661                 }
   1662 
   1663                 // Rule (10)  (Sterm | ATerm) Close* Sp*  x  (Sp | Sep | CR | LF)
   1664                 int p10 = p1;
   1665                 while (p10>0 && fSpSet.contains(cAt(p10))) {
   1666                     p10 = moveBack(p10);
   1667                 }
   1668                 while (p10>0 && fCloseSet.contains(cAt(p10))) {
   1669                     p10 = moveBack(p10);
   1670                 }
   1671                 if (fSTermSet.contains(cAt(p10)) || fATermSet.contains(cAt(p10))) {
   1672                     if (fSpSet.contains(c2) || fSepSet.contains(c2)) {
   1673                         continue;
   1674                     }
   1675                 }
   1676 
   1677                 // Rule (11)  (STerm | ATerm) Close* Sp*   <break>
   1678                 int p11 = p1;
   1679                 if (p11>0 && fSepSet.contains(cAt(p11))) {
   1680                     p11 = moveBack(p11);
   1681                 }
   1682                 while (p11>0 && fSpSet.contains(cAt(p11))) {
   1683                     p11 = moveBack(p11);
   1684                 }
   1685                 while (p11>0 && fCloseSet.contains(cAt(p11))) {
   1686                     p11 = moveBack(p11);
   1687                 }
   1688                 if (fSTermSet.contains(cAt(p11)) || fATermSet.contains(cAt(p11))) {
   1689                     break;
   1690                 }
   1691 
   1692                 //  Rule (12)  Any x Any
   1693                 continue;
   1694             }
   1695             breakPos = p2;
   1696             return breakPos;
   1697         }
   1698 
   1699 
   1700 
   1701     }
   1702 
   1703 
   1704     /**
   1705      * Move an index into a string by n code points.
   1706      *   Similar to UTF16.moveCodePointOffset, but without the exceptions, which were
   1707      *   complicating usage.
   1708      * @param s   a Text string
   1709      * @param pos The starting code unit index into the text string
   1710      * @param amt The amount to adjust the string by.
   1711      * @return    The adjusted code unit index, pinned to the string's length, or
   1712      *            unchanged if input index was outside of the string.
   1713      */
   1714     static int moveIndex32(StringBuffer s, int pos, int amt) {
   1715         int i;
   1716         char  c;
   1717         if (amt>0) {
   1718             for (i=0; i<amt; i++) {
   1719                 if (pos >= s.length()) {
   1720                     return s.length();
   1721                 }
   1722                 c = s.charAt(pos);
   1723                 pos++;
   1724                 if (UTF16.isLeadSurrogate(c) && pos < s.length()) {
   1725                     c = s.charAt(pos);
   1726                     if (UTF16.isTrailSurrogate(c)) {
   1727                         pos++;
   1728                     }
   1729                 }
   1730             }
   1731         } else {
   1732             for (i=0; i>amt; i--) {
   1733                 if (pos <= 0) {
   1734                     return 0;
   1735                 }
   1736                 pos--;
   1737                 c = s.charAt(pos);
   1738                 if (UTF16.isTrailSurrogate(c) && pos >= 0) {
   1739                     c = s.charAt(pos);
   1740                     if (UTF16.isLeadSurrogate(c)) {
   1741                         pos--;
   1742                     }
   1743                 }
   1744             }
   1745         }
   1746         return pos;
   1747     }
   1748 
   1749     /**
   1750      * No-exceptions form of UnicodeSet.contains(c).
   1751      *    Simplifies loops that terminate with an end-of-input character value.
   1752      * @param s  A unicode set
   1753      * @param c  A code point value
   1754      * @return   true if the set contains c.
   1755      */
   1756     static boolean setContains(UnicodeSet s, int c) {
   1757         if (c<0 || c>UTF16.CODEPOINT_MAX_VALUE ) {
   1758             return false;
   1759         }
   1760         return s.contains(c);
   1761     }
   1762 
   1763 
   1764     /**
   1765      * return the index of the next code point in the input text.
   1766      * @param i the preceding index
   1767      */
   1768     static int  nextCP(StringBuffer s, int i) {
   1769         if (i == -1) {
   1770             // End of Input indication.  Continue to return end value.
   1771             return -1;
   1772         }
   1773         int  retVal = i + 1;
   1774         if (retVal > s.length()) {
   1775             return -1;
   1776         }
   1777         int  c = UTF16.charAt(s, i);
   1778         if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && UTF16.isLeadSurrogate(s.charAt(i))) {
   1779             retVal++;
   1780         }
   1781         return retVal;
   1782     }
   1783 
   1784 
   1785     /**
   1786      * random number generator.  Not using Java's built-in Randoms for two reasons:
   1787      *    1.  Using this code allows obtaining the same sequences as those from the ICU4C monkey test.
   1788      *    2.  We need to get and restore the seed from values occurring in the middle
   1789      *        of a long sequence, to more easily reproduce failing cases.
   1790      */
   1791     private static int m_seed = 1;
   1792     private static int  m_rand()
   1793     {
   1794         m_seed = m_seed * 1103515245 + 12345;
   1795         return (m_seed >>> 16) % 32768;
   1796     }
   1797 
   1798     // Helper function for formatting error output.
   1799     //   Append a string into a fixed-size field in a StringBuffer.
   1800     //   Blank-pad the string if it is shorter than the field.
   1801     //   Truncate the source string if it is too long.
   1802     //
   1803     private static void appendToBuf(StringBuffer dest, String src, int fieldLen) {
   1804         int appendLen = src.length();
   1805         if (appendLen >= fieldLen) {
   1806             dest.append(src.substring(0, fieldLen));
   1807         } else {
   1808             dest.append(src);
   1809             while (appendLen < fieldLen) {
   1810                 dest.append(' ');
   1811                 appendLen++;
   1812             }
   1813         }
   1814     }
   1815 
   1816     // Helper function for formatting error output.
   1817     // Display a code point in "\\uxxxx" or "\Uxxxxxxxx" format
   1818     private static void appendCharToBuf(StringBuffer dest, int c, int fieldLen) {
   1819         String hexChars = "0123456789abcdef";
   1820         if (c < 0x10000) {
   1821             dest.append("\\u");
   1822             for (int bn=12; bn>=0; bn-=4) {
   1823                 dest.append(hexChars.charAt(((c)>>bn)&0xf));
   1824             }
   1825             appendToBuf(dest, " ", fieldLen-6);
   1826         } else {
   1827             dest.append("\\U");
   1828             for (int bn=28; bn>=0; bn-=4) {
   1829                 dest.append(hexChars.charAt(((c)>>bn)&0xf));
   1830             }
   1831             appendToBuf(dest, " ", fieldLen-10);
   1832 
   1833         }
   1834     }
   1835 
   1836     /**
   1837      *  Run a RBBI monkey test.  Common routine, for all break iterator types.
   1838      *    Parameters:
   1839      *       bi      - the break iterator to use
   1840      *       mk      - MonkeyKind, abstraction for obtaining expected results
   1841      *       name    - Name of test (char, word, etc.) for use in error messages
   1842      *       seed    - Seed for starting random number generator (parameter from user)
   1843      *       numIterations
   1844      */
   1845     void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int numIterations) {
   1846         int              TESTSTRINGLEN = 500;
   1847         StringBuffer     testText         = new StringBuffer();
   1848         int              numCharClasses;
   1849         List             chClasses;
   1850         int[]            expected         = new int[TESTSTRINGLEN*2 + 1];
   1851         int              expectedCount    = 0;
   1852         boolean[]        expectedBreaks   = new boolean[TESTSTRINGLEN*2 + 1];
   1853         boolean[]        forwardBreaks    = new boolean[TESTSTRINGLEN*2 + 1];
   1854         boolean[]        reverseBreaks    = new boolean[TESTSTRINGLEN*2 + 1];
   1855         boolean[]        isBoundaryBreaks = new boolean[TESTSTRINGLEN*2 + 1];
   1856         boolean[]        followingBreaks  = new boolean[TESTSTRINGLEN*2 + 1];
   1857         boolean[]        precedingBreaks  = new boolean[TESTSTRINGLEN*2 + 1];
   1858         int              i;
   1859         int              loopCount        = 0;
   1860         boolean          printTestData    = false;
   1861         boolean          printBreaksFromBI = false;
   1862 
   1863         m_seed = seed;
   1864 
   1865         numCharClasses = mk.charClasses().size();
   1866         chClasses      = mk.charClasses();
   1867 
   1868         // Verify that the character classes all have at least one member.
   1869         for (i=0; i<numCharClasses; i++) {
   1870             UnicodeSet s = (UnicodeSet)chClasses.get(i);
   1871             if (s == null || s.size() == 0) {
   1872                 errln("Character Class " + i + " is null or of zero size.");
   1873                 return;
   1874             }
   1875         }
   1876 
   1877         //--------------------------------------------------------------------------------------------
   1878         //
   1879         //  Debugging settings.  Comment out everything in the following block for normal operation
   1880         //
   1881         //--------------------------------------------------------------------------------------------
   1882         // numIterations = -1;
   1883         // numIterations = 10000;   // Same as exhaustive.
   1884         // RuleBasedBreakIterator_New.fTrace = true;
   1885         // m_seed = 859056465;
   1886         // TESTSTRINGLEN = 50;
   1887         // printTestData = true;
   1888         // printBreaksFromBI = true;
   1889         // ((RuleBasedBreakIterator_New)bi).dump();
   1890 
   1891         //--------------------------------------------------------------------------------------------
   1892         //
   1893         //  End of Debugging settings.
   1894         //
   1895         //--------------------------------------------------------------------------------------------
   1896 
   1897         int  dotsOnLine = 0;
   1898         while (loopCount < numIterations || numIterations == -1) {
   1899             if (numIterations == -1 && loopCount % 10 == 0) {
   1900                 // If test is running in an infinite loop, display a periodic tic so
   1901                 //   we can tell that it is making progress.
   1902                 System.out.print(".");
   1903                 if (dotsOnLine++ >= 80){
   1904                     System.out.println();
   1905                     dotsOnLine = 0;
   1906                 }
   1907             }
   1908             // Save current random number seed, so that we can recreate the random numbers
   1909             //   for this loop iteration in event of an error.
   1910             seed = m_seed;
   1911 
   1912             testText.setLength(0);
   1913             // Populate a test string with data.
   1914             if (printTestData) {
   1915                 System.out.println("Test Data string ...");
   1916             }
   1917             for (i=0; i<TESTSTRINGLEN; i++) {
   1918                 int        aClassNum = m_rand() % numCharClasses;
   1919                 UnicodeSet classSet  = (UnicodeSet)chClasses.get(aClassNum);
   1920                 int        charIdx   = m_rand() % classSet.size();
   1921                 int        c         = classSet.charAt(charIdx);
   1922                 if (c < 0) {   // TODO:  deal with sets containing strings.
   1923                     errln("c < 0");
   1924                 }
   1925                 UTF16.appendCodePoint(testText, c);
   1926                 if (printTestData) {
   1927                     System.out.print(Integer.toHexString(c) + " ");
   1928                 }
   1929             }
   1930             if (printTestData) {
   1931                 System.out.println();
   1932             }
   1933 
   1934             Arrays.fill(expected, 0);
   1935             Arrays.fill(expectedBreaks, false);
   1936             Arrays.fill(forwardBreaks, false);
   1937             Arrays.fill(reverseBreaks, false);
   1938             Arrays.fill(isBoundaryBreaks, false);
   1939             Arrays.fill(followingBreaks, false);
   1940             Arrays.fill(precedingBreaks, false);
   1941 
   1942             // Calculate the expected results for this test string.
   1943             mk.setText(testText);
   1944             expectedCount = 0;
   1945             expectedBreaks[0] = true;
   1946             expected[expectedCount ++] = 0;
   1947             int breakPos = 0;
   1948             int lastBreakPos = -1;
   1949             for (;;) {
   1950                 lastBreakPos = breakPos;
   1951                 breakPos = mk.next(breakPos);
   1952                 if (breakPos == -1) {
   1953                     break;
   1954                 }
   1955                 if (breakPos > testText.length()) {
   1956                     errln("breakPos > testText.length()");
   1957                 }
   1958                 if (lastBreakPos >= breakPos) {
   1959                     errln("Next() not increasing.");
   1960                     // break;
   1961                 }
   1962                 expectedBreaks[breakPos] = true;
   1963                 expected[expectedCount ++] = breakPos;
   1964             }
   1965 
   1966             // Find the break positions using forward iteration
   1967             if (printBreaksFromBI) {
   1968                 System.out.println("Breaks from BI...");
   1969             }
   1970             bi.setText(testText.toString());
   1971             for (i=bi.first(); i != BreakIterator.DONE; i=bi.next()) {
   1972                 if (i < 0 || i > testText.length()) {
   1973                     errln(name + " break monkey test: Out of range value returned by breakIterator::next()");
   1974                     break;
   1975                 }
   1976                 if (printBreaksFromBI) {
   1977                     System.out.print(Integer.toHexString(i) + " ");
   1978                 }
   1979                 forwardBreaks[i] = true;
   1980             }
   1981             if (printBreaksFromBI) {
   1982                 System.out.println();
   1983             }
   1984 
   1985             // Find the break positions using reverse iteration
   1986             for (i=bi.last(); i != BreakIterator.DONE; i=bi.previous()) {
   1987                 if (i < 0 || i > testText.length()) {
   1988                     errln(name + " break monkey test: Out of range value returned by breakIterator.next()" + name);
   1989                     break;
   1990                 }
   1991                 reverseBreaks[i] = true;
   1992             }
   1993 
   1994             // Find the break positions using isBoundary() tests.
   1995             for (i=0; i<=testText.length(); i++) {
   1996                 isBoundaryBreaks[i] = bi.isBoundary(i);
   1997             }
   1998 
   1999             // Find the break positions using the following() function.
   2000             lastBreakPos = 0;
   2001             followingBreaks[0] = true;
   2002             for (i=0; i<testText.length(); i++) {
   2003                 breakPos = bi.following(i);
   2004                 if (breakPos <= i ||
   2005                         breakPos < lastBreakPos ||
   2006                         breakPos > testText.length() ||
   2007                         breakPos > lastBreakPos && lastBreakPos > i ) {
   2008                     errln(name + " break monkey test: " +
   2009                             "Out of range value returned by BreakIterator::following().\n" +
   2010                             "index=" + i + "following returned=" + breakPos +
   2011                             "lastBreak=" + lastBreakPos);
   2012                     precedingBreaks[i] = !expectedBreaks[i];   // Forces an error.
   2013                 } else {
   2014                     followingBreaks[breakPos] = true;
   2015                     lastBreakPos = breakPos;
   2016                 }
   2017             }
   2018 
   2019             // Find the break positions using the preceding() function.
   2020             lastBreakPos = testText.length();
   2021             precedingBreaks[testText.length()] = true;
   2022             for (i=testText.length(); i>0; i--) {
   2023                 breakPos = bi.preceding(i);
   2024                 if (breakPos >= i ||
   2025                         breakPos > lastBreakPos ||
   2026                         breakPos < 0 ||
   2027                         breakPos < lastBreakPos && lastBreakPos < i ) {
   2028                     errln(name + " break monkey test: " +
   2029                             "Out of range value returned by BreakIterator::preceding().\n" +
   2030                             "index=" + i + "preceding returned=" + breakPos +
   2031                             "lastBreak=" + lastBreakPos);
   2032                     precedingBreaks[i] = !expectedBreaks[i];   // Forces an error.
   2033                 } else {
   2034                     precedingBreaks[breakPos] = true;
   2035                     lastBreakPos = breakPos;
   2036                 }
   2037             }
   2038 
   2039 
   2040 
   2041             // Compare the expected and actual results.
   2042             for (i=0; i<=testText.length(); i++) {
   2043                 String errorType = null;
   2044                 if  (forwardBreaks[i] != expectedBreaks[i]) {
   2045                     errorType = "next()";
   2046                 } else if (reverseBreaks[i] != forwardBreaks[i]) {
   2047                     errorType = "previous()";
   2048                 } else if (isBoundaryBreaks[i] != expectedBreaks[i]) {
   2049                     errorType = "isBoundary()";
   2050                 } else if (followingBreaks[i] != expectedBreaks[i]) {
   2051                     errorType = "following()";
   2052                 } else if (precedingBreaks[i] != expectedBreaks[i]) {
   2053                     errorType = "preceding()";
   2054                 }
   2055 
   2056                 if (errorType != null) {
   2057                     // Format a range of the test text that includes the failure as
   2058                     //  a data item that can be included in the rbbi test data file.
   2059 
   2060                     // Start of the range is the last point where expected and actual results
   2061                     //   both agreed that there was a break position.
   2062                     int startContext = i;
   2063                     int count = 0;
   2064                     for (;;) {
   2065                         if (startContext==0) { break; }
   2066                         startContext --;
   2067                         if (expectedBreaks[startContext]) {
   2068                             if (count == 2) break;
   2069                             count ++;
   2070                         }
   2071                     }
   2072 
   2073                     // End of range is two expected breaks past the start position.
   2074                     int endContext = i + 1;
   2075                     int ci;
   2076                     for (ci=0; ci<2; ci++) {  // Number of items to include in error text.
   2077                         for (;;) {
   2078                             if (endContext >= testText.length()) {break;}
   2079                             if (expectedBreaks[endContext-1]) {
   2080                                 if (count == 0) break;
   2081                                 count --;
   2082                             }
   2083                             endContext ++;
   2084                         }
   2085                     }
   2086 
   2087                     // Format looks like   "<data><>\uabcd\uabcd<>\U0001abcd...</data>"
   2088                     StringBuffer errorText = new StringBuffer();
   2089 
   2090                     int      c;    // Char from test data
   2091                     for (ci = startContext;  ci <= endContext && ci != -1;  ci = nextCP(testText, ci)) {
   2092                         if (ci == i) {
   2093                             // This is the location of the error.
   2094                             errorText.append("<?>---------------------------------\n");
   2095                         } else if (expectedBreaks[ci]) {
   2096                             // This a non-error expected break position.
   2097                             errorText.append("------------------------------------\n");
   2098                         }
   2099                         if (ci < testText.length()) {
   2100                             c = UTF16.charAt(testText, ci);
   2101                             appendCharToBuf(errorText, c, 11);
   2102                             String gc = UCharacter.getPropertyValueName(UProperty.GENERAL_CATEGORY, UCharacter.getType(c), UProperty.NameChoice.SHORT);
   2103                             appendToBuf(errorText, gc, 8);
   2104                             int extraProp = UCharacter.getIntPropertyValue(c, mk.fCharProperty);
   2105                             String extraPropValue =
   2106                                     UCharacter.getPropertyValueName(mk.fCharProperty, extraProp, UProperty.NameChoice.LONG);
   2107                             appendToBuf(errorText, extraPropValue, 20);
   2108 
   2109                             String charName = UCharacter.getExtendedName(c);
   2110                             appendToBuf(errorText, charName, 40);
   2111                             errorText.append('\n');
   2112                         }
   2113                     }
   2114                     if (ci == testText.length() && ci != -1) {
   2115                         errorText.append("<>");
   2116                     }
   2117                     errorText.append("</data>\n");
   2118 
   2119                     // Output the error
   2120                     errln(name + " break monkey test error.  " +
   2121                             (expectedBreaks[i]? "Break expected but not found." : "Break found but not expected.") +
   2122                             "\nOperation = " + errorType + "; random seed = " + seed + ";  buf Idx = " + i + "\n" +
   2123                             errorText);
   2124                     break;
   2125                 }
   2126             }
   2127 
   2128             loopCount++;
   2129         }
   2130     }
   2131 
   2132     @Test
   2133     public void TestCharMonkey() {
   2134 
   2135         int        loopCount = 500;
   2136         int        seed      = 1;
   2137 
   2138         if (TestFmwk.getExhaustiveness() >= 9) {
   2139             loopCount = 10000;
   2140         }
   2141 
   2142         RBBICharMonkey  m = new RBBICharMonkey();
   2143         BreakIterator   bi = BreakIterator.getCharacterInstance(Locale.US);
   2144         RunMonkey(bi, m, "char", seed, loopCount);
   2145     }
   2146 
   2147     @Test
   2148     public void TestWordMonkey() {
   2149 
   2150         int        loopCount = 500;
   2151         int        seed      = 1;
   2152 
   2153         if (TestFmwk.getExhaustiveness() >= 9) {
   2154             loopCount = 10000;
   2155         }
   2156 
   2157         logln("Word Break Monkey Test");
   2158         RBBIWordMonkey  m = new RBBIWordMonkey();
   2159         BreakIterator   bi = BreakIterator.getWordInstance(Locale.US);
   2160         RunMonkey(bi, m, "word", seed, loopCount);
   2161     }
   2162 
   2163     @Test
   2164     public void TestLineMonkey() {
   2165         int        loopCount = 500;
   2166         int        seed      = 1;
   2167 
   2168         if (TestFmwk.getExhaustiveness() >= 9) {
   2169             loopCount = 10000;
   2170         }
   2171 
   2172         logln("Line Break Monkey Test");
   2173         RBBILineMonkey  m = new RBBILineMonkey();
   2174         BreakIterator   bi = BreakIterator.getLineInstance(Locale.US);
   2175         RunMonkey(bi, m, "line", seed, loopCount);
   2176     }
   2177 
   2178     @Test
   2179     public void TestSentMonkey() {
   2180 
   2181         int        loopCount = 500;
   2182         int        seed      = 1;
   2183 
   2184         if (TestFmwk.getExhaustiveness() >= 9) {
   2185             loopCount = 3000;
   2186         }
   2187 
   2188         logln("Sentence Break Monkey Test");
   2189         RBBISentenceMonkey  m = new RBBISentenceMonkey();
   2190         BreakIterator   bi = BreakIterator.getSentenceInstance(Locale.US);
   2191         RunMonkey(bi, m, "sent", seed, loopCount);
   2192     }
   2193     //
   2194     //  Round-trip monkey tests.
   2195     //  Verify that break iterators created from the rule source from the default
   2196     //    break iterators still pass the monkey test for the iterator type.
   2197     //
   2198     //  This is a major test for the Rule Compiler.  The default break iterators are built
   2199     //  from pre-compiled binary rule data that was created using ICU4C; these
   2200     //  round-trip rule recompile tests verify that the Java rule compiler can
   2201     //  rebuild break iterators from the original source rules.
   2202     //
   2203     @Test
   2204     public void TestRTCharMonkey() {
   2205 
   2206         int        loopCount = 200;
   2207         int        seed      = 1;
   2208 
   2209         if (TestFmwk.getExhaustiveness() >= 9) {
   2210             loopCount = 2000;
   2211         }
   2212 
   2213         RBBICharMonkey  m = new RBBICharMonkey();
   2214         BreakIterator   bi = BreakIterator.getCharacterInstance(Locale.US);
   2215         String rules = bi.toString();
   2216         BreakIterator rtbi = new RuleBasedBreakIterator(rules);
   2217         RunMonkey(rtbi, m, "char", seed, loopCount);
   2218     }
   2219 
   2220     @Test
   2221     public void TestRTWordMonkey() {
   2222 
   2223         int        loopCount = 200;
   2224         int        seed      = 1;
   2225 
   2226         if (TestFmwk.getExhaustiveness() >= 9) {
   2227             loopCount = 2000;
   2228         }
   2229         logln("Word Break Monkey Test");
   2230         RBBIWordMonkey  m = new RBBIWordMonkey();
   2231         BreakIterator   bi = BreakIterator.getWordInstance(Locale.US);
   2232         String rules = bi.toString();
   2233         BreakIterator rtbi = new RuleBasedBreakIterator(rules);
   2234         RunMonkey(rtbi, m, "word", seed, loopCount);
   2235     }
   2236 
   2237     @Test
   2238     public void TestRTLineMonkey() {
   2239         int        loopCount = 200;
   2240         int        seed      = 1;
   2241 
   2242         if (TestFmwk.getExhaustiveness() >= 9) {
   2243             loopCount = 2000;
   2244         }
   2245 
   2246         logln("Line Break Monkey Test");
   2247         RBBILineMonkey  m = new RBBILineMonkey();
   2248         BreakIterator   bi = BreakIterator.getLineInstance(Locale.US);
   2249         String rules = bi.toString();
   2250         BreakIterator rtbi = new RuleBasedBreakIterator(rules);
   2251         RunMonkey(rtbi, m, "line", seed, loopCount);
   2252     }
   2253 
   2254     @Test
   2255     public void TestRTSentMonkey() {
   2256 
   2257         int        loopCount = 200;
   2258         int        seed      = 1;
   2259 
   2260         if (TestFmwk.getExhaustiveness() >= 9) {
   2261             loopCount = 1000;
   2262         }
   2263 
   2264         logln("Sentence Break Monkey Test");
   2265         RBBISentenceMonkey  m = new RBBISentenceMonkey();
   2266         BreakIterator   bi = BreakIterator.getSentenceInstance(Locale.US);
   2267         String rules = bi.toString();
   2268         BreakIterator rtbi = new RuleBasedBreakIterator(rules);
   2269         RunMonkey(rtbi, m, "sent", seed, loopCount);
   2270     }
   2271 }
   2272 
   2273