Home | History | Annotate | Download | only in intltest
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /********************************************************************
      4  * Copyright (c) 1999-2016, International Business Machines
      5  * Corporation and others. All Rights Reserved.
      6  ********************************************************************
      7  *   Date        Name        Description
      8  *   12/14/99    Madhu        Creation.
      9  *   01/12/2000  Madhu        updated for changed API
     10  ********************************************************************/
     11 
     12 #include "unicode/utypes.h"
     13 
     14 #if !UCONFIG_NO_BREAK_ITERATION
     15 
     16 #include "unicode/uchar.h"
     17 #include "intltest.h"
     18 #include "unicode/rbbi.h"
     19 #include "unicode/schriter.h"
     20 #include "rbbiapts.h"
     21 #include "rbbidata.h"
     22 #include "cstring.h"
     23 #include "ubrkimpl.h"
     24 #include "unicode/locid.h"
     25 #include "unicode/ustring.h"
     26 #include "unicode/utext.h"
     27 #include "cmemory.h"
     28 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
     29 #include "unicode/filteredbrk.h"
     30 #include <stdio.h> // for sprintf
     31 #endif
     32 /**
     33  * API Test the RuleBasedBreakIterator class
     34  */
     35 
     36 
     37 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
     38 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
     39 
     40 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
     41     errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
     42 
     43 void RBBIAPITest::TestCloneEquals()
     44 {
     45 
     46     UErrorCode status=U_ZERO_ERROR;
     47     RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
     48     RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
     49     RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
     50     RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
     51     if(U_FAILURE(status)){
     52         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
     53         return;
     54     }
     55 
     56 
     57     UnicodeString testString="Testing word break iterators's clone() and equals()";
     58     bi1->setText(testString);
     59     bi2->setText(testString);
     60     biequal->setText(testString);
     61 
     62     bi3->setText("hello");
     63 
     64     logln((UnicodeString)"Testing equals()");
     65 
     66     logln((UnicodeString)"Testing == and !=");
     67     UBool b = (*bi1 != *biequal);
     68     b |= *bi1 == *bi2;
     69     b |= *bi1 == *bi3;
     70     if (b) {
     71         errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
     72     }
     73 
     74     if(*bi2 == *biequal || *bi2 == *bi1  || *biequal == *bi3)
     75         errln((UnicodeString)"ERROR:2 RBBI's == and != operator  failed.");
     76 
     77 
     78     // Quick test of RulesBasedBreakIterator assignment -
     79     // Check that
     80     //    two different iterators are !=
     81     //    they are == after assignment
     82     //    source and dest iterator produce the same next() after assignment.
     83     //    deleting one doesn't disable the other.
     84     logln("Testing assignment");
     85     RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
     86     if(U_FAILURE(status)){
     87         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
     88         return;
     89     }
     90 
     91     RuleBasedBreakIterator biDefault, biDefault2;
     92     if(U_FAILURE(status)){
     93         errln((UnicodeString)"FAIL : in construction of default iterator");
     94         return;
     95     }
     96     if (biDefault == *bix) {
     97         errln((UnicodeString)"ERROR: iterators should not compare ==");
     98         return;
     99     }
    100     if (biDefault != biDefault2) {
    101         errln((UnicodeString)"ERROR: iterators should compare ==");
    102         return;
    103     }
    104 
    105 
    106     UnicodeString   HelloString("Hello Kitty");
    107     bix->setText(HelloString);
    108     if (*bix == *bi2) {
    109         errln(UnicodeString("ERROR: strings should not be equal before assignment."));
    110     }
    111     *bix = *bi2;
    112     if (*bix != *bi2) {
    113         errln(UnicodeString("ERROR: strings should be equal before assignment."));
    114     }
    115 
    116     int bixnext = bix->next();
    117     int bi2next = bi2->next();
    118     if (! (bixnext == bi2next && bixnext == 7)) {
    119         errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
    120     }
    121     delete bix;
    122     if (bi2->next() != 8) {
    123         errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
    124     }
    125 
    126 
    127 
    128     logln((UnicodeString)"Testing clone()");
    129     RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
    130     RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
    131 
    132     if(*bi1clone != *bi1 || *bi1clone  != *biequal  ||
    133       *bi1clone == *bi3 || *bi1clone == *bi2)
    134         errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
    135 
    136     if(*bi2clone == *bi1 || *bi2clone == *biequal ||
    137        *bi2clone == *bi3 || *bi2clone != *bi2)
    138         errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
    139 
    140     if(bi1->getText() != bi1clone->getText()   ||
    141        bi2clone->getText() != bi2->getText()   ||
    142        *bi2clone == *bi1clone )
    143         errln((UnicodeString)"ERROR: RBBI's clone() method failed");
    144 
    145     delete bi1clone;
    146     delete bi2clone;
    147     delete bi1;
    148     delete bi3;
    149     delete bi2;
    150     delete biequal;
    151 }
    152 
    153 void RBBIAPITest::TestBoilerPlate()
    154 {
    155     UErrorCode status = U_ZERO_ERROR;
    156     BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
    157     BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
    158     if (U_FAILURE(status)) {
    159         errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
    160         return;
    161     }
    162     if(*a!=*b){
    163         errln("Failed: boilerplate method operator!= does not return correct results");
    164     }
    165     // Japanese word break iterators are identical to root with
    166     // a dictionary-based break iterator
    167     BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
    168     BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
    169     if(c && d){
    170         if(*c!=*d){
    171             errln("Failed: boilerplate method operator== does not return correct results");
    172         }
    173     }else{
    174         errln("creation of break iterator failed");
    175     }
    176     delete a;
    177     delete b;
    178     delete c;
    179     delete d;
    180 }
    181 
    182 void RBBIAPITest::TestgetRules()
    183 {
    184     UErrorCode status=U_ZERO_ERROR;
    185 
    186     RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
    187     RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
    188     if(U_FAILURE(status)){
    189         errcheckln(status, "FAIL: in construction - %s", u_errorName(status));
    190         delete bi1;
    191         delete bi2;
    192         return;
    193     }
    194 
    195 
    196 
    197     logln((UnicodeString)"Testing toString()");
    198 
    199     bi1->setText((UnicodeString)"Hello there");
    200 
    201     RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
    202 
    203     UnicodeString temp=bi1->getRules();
    204     UnicodeString temp2=bi2->getRules();
    205     UnicodeString temp3=bi3->getRules();
    206     if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
    207         errln((UnicodeString)"ERROR: error in getRules() method");
    208 
    209     delete bi1;
    210     delete bi2;
    211     delete bi3;
    212 }
    213 void RBBIAPITest::TestHashCode()
    214 {
    215     UErrorCode status=U_ZERO_ERROR;
    216     RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
    217     RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
    218     RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
    219     if(U_FAILURE(status)){
    220         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
    221         delete bi1;
    222         delete bi2;
    223         delete bi3;
    224         return;
    225     }
    226 
    227 
    228     logln((UnicodeString)"Testing hashCode()");
    229 
    230     bi1->setText((UnicodeString)"Hash code");
    231     bi2->setText((UnicodeString)"Hash code");
    232     bi3->setText((UnicodeString)"Hash code");
    233 
    234     RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
    235     RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
    236 
    237     if(bi1->hashCode() != bi1clone->hashCode() ||  bi1->hashCode() != bi3->hashCode() ||
    238         bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
    239         errln((UnicodeString)"ERROR: identical objects have different hashcodes");
    240 
    241     if(bi1->hashCode() == bi2->hashCode() ||  bi2->hashCode() == bi3->hashCode() ||
    242         bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
    243         errln((UnicodeString)"ERROR: different objects have same hashcodes");
    244 
    245     delete bi1clone;
    246     delete bi2clone;
    247     delete bi1;
    248     delete bi2;
    249     delete bi3;
    250 
    251 }
    252 void RBBIAPITest::TestGetSetAdoptText()
    253 {
    254     logln((UnicodeString)"Testing getText setText ");
    255     IcuTestErrorCode status(*this, "TestGetSetAdoptText");
    256     UnicodeString str1="first string.";
    257     UnicodeString str2="Second string.";
    258     LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
    259     LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
    260     if(status.isFailure()){
    261         errcheckln(status, "Fail : in construction - %s", status.errorName());
    262             return;
    263     }
    264 
    265 
    266     CharacterIterator* text1= new StringCharacterIterator(str1);
    267     CharacterIterator* text1Clone = text1->clone();
    268     CharacterIterator* text2= new StringCharacterIterator(str2);
    269     CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"
    270 
    271     wordIter1->setText(str1);
    272     CharacterIterator *tci = &wordIter1->getText();
    273     UnicodeString      tstr;
    274     tci->getText(tstr);
    275     TEST_ASSERT(tstr == str1);
    276     if(wordIter1->current() != 0)
    277         errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
    278 
    279     wordIter1->next(2);
    280 
    281     wordIter1->setText(str2);
    282     if(wordIter1->current() != 0)
    283         errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
    284 
    285 
    286     charIter1->adoptText(text1Clone);
    287     TEST_ASSERT(wordIter1->getText() != charIter1->getText());
    288     tci = &wordIter1->getText();
    289     tci->getText(tstr);
    290     TEST_ASSERT(tstr == str2);
    291     tci = &charIter1->getText();
    292     tci->getText(tstr);
    293     TEST_ASSERT(tstr == str1);
    294 
    295 
    296     LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
    297     rb->adoptText(text1);
    298     if(rb->getText() != *text1)
    299         errln((UnicodeString)"ERROR:1 error in adoptText ");
    300     rb->adoptText(text2);
    301     if(rb->getText() != *text2)
    302         errln((UnicodeString)"ERROR:2 error in adoptText ");
    303 
    304     // Adopt where iterator range is less than the entire orignal source string.
    305     //   (With the change of the break engine to working with UText internally,
    306     //    CharacterIterators starting at positions other than zero are not supported)
    307     rb->adoptText(text3);
    308     TEST_ASSERT(rb->preceding(2) == 0);
    309     TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
    310     //if(rb->preceding(2) != 3) {
    311     //    errln((UnicodeString)"ERROR:3 error in adoptText ");
    312     //}
    313     //if(rb->following(11) != BreakIterator::DONE) {
    314     //    errln((UnicodeString)"ERROR:4 error in adoptText ");
    315     //}
    316 
    317     // UText API
    318     //
    319     //   Quick test to see if UText is working at all.
    320     //
    321     const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
    322     const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
    323     //                012345678901
    324 
    325     status.reset();
    326     LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
    327     wordIter1->setText(ut.getAlias(), status);
    328     TEST_ASSERT_SUCCESS(status);
    329 
    330     int32_t pos;
    331     pos = wordIter1->first();
    332     TEST_ASSERT(pos==0);
    333     pos = wordIter1->next();
    334     TEST_ASSERT(pos==5);
    335     pos = wordIter1->next();
    336     TEST_ASSERT(pos==6);
    337     pos = wordIter1->next();
    338     TEST_ASSERT(pos==11);
    339     pos = wordIter1->next();
    340     TEST_ASSERT(pos==UBRK_DONE);
    341 
    342     status.reset();
    343     LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
    344     TEST_ASSERT_SUCCESS(status);
    345     wordIter1->setText(ut2.getAlias(), status);
    346     TEST_ASSERT_SUCCESS(status);
    347 
    348     pos = wordIter1->first();
    349     TEST_ASSERT(pos==0);
    350     pos = wordIter1->next();
    351     TEST_ASSERT(pos==3);
    352     pos = wordIter1->next();
    353     TEST_ASSERT(pos==4);
    354 
    355     pos = wordIter1->last();
    356     TEST_ASSERT(pos==6);
    357     pos = wordIter1->previous();
    358     TEST_ASSERT(pos==4);
    359     pos = wordIter1->previous();
    360     TEST_ASSERT(pos==3);
    361     pos = wordIter1->previous();
    362     TEST_ASSERT(pos==0);
    363     pos = wordIter1->previous();
    364     TEST_ASSERT(pos==UBRK_DONE);
    365 
    366     status.reset();
    367     UnicodeString sEmpty;
    368     LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
    369     wordIter1->getUText(gut2.getAlias(), status);
    370     TEST_ASSERT_SUCCESS(status);
    371     status.reset();
    372 }
    373 
    374 
    375 void RBBIAPITest::TestIteration()
    376 {
    377     // This test just verifies that the API is present.
    378     // Testing for correct operation of the break rules happens elsewhere.
    379 
    380     UErrorCode status=U_ZERO_ERROR;
    381     RuleBasedBreakIterator* bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
    382     if (U_FAILURE(status) || bi == NULL)  {
    383         errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
    384     }
    385     delete bi;
    386 
    387     status=U_ZERO_ERROR;
    388     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
    389     if (U_FAILURE(status) || bi == NULL)  {
    390         errcheckln(status, "Failure creating Word break iterator.  Status = %s", u_errorName(status));
    391     }
    392     delete bi;
    393 
    394     status=U_ZERO_ERROR;
    395     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
    396     if (U_FAILURE(status) || bi == NULL)  {
    397         errcheckln(status, "Failure creating Line break iterator.  Status = %s", u_errorName(status));
    398     }
    399     delete bi;
    400 
    401     status=U_ZERO_ERROR;
    402     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
    403     if (U_FAILURE(status) || bi == NULL)  {
    404         errcheckln(status, "Failure creating Sentence break iterator.  Status = %s", u_errorName(status));
    405     }
    406     delete bi;
    407 
    408     status=U_ZERO_ERROR;
    409     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
    410     if (U_FAILURE(status) || bi == NULL)  {
    411         errcheckln(status, "Failure creating Title break iterator.  Status = %s", u_errorName(status));
    412     }
    413     delete bi;
    414 
    415     status=U_ZERO_ERROR;
    416     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
    417     if (U_FAILURE(status) || bi == NULL)  {
    418         errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
    419         return;   // Skip the rest of these tests.
    420     }
    421 
    422 
    423     UnicodeString testString="0123456789";
    424     bi->setText(testString);
    425 
    426     int32_t i;
    427     i = bi->first();
    428     if (i != 0) {
    429         errln("Incorrect value from bi->first().  Expected 0, got %d.", i);
    430     }
    431 
    432     i = bi->last();
    433     if (i != 10) {
    434         errln("Incorrect value from bi->last().  Expected 10, got %d", i);
    435     }
    436 
    437     //
    438     // Previous
    439     //
    440     bi->last();
    441     i = bi->previous();
    442     if (i != 9) {
    443         errln("Incorrect value from bi->last() at line %d.  Expected 9, got %d", __LINE__, i);
    444     }
    445 
    446 
    447     bi->first();
    448     i = bi->previous();
    449     if (i != BreakIterator::DONE) {
    450         errln("Incorrect value from bi->previous() at line %d.  Expected DONE, got %d", __LINE__, i);
    451     }
    452 
    453     //
    454     // next()
    455     //
    456     bi->first();
    457     i = bi->next();
    458     if (i != 1) {
    459         errln("Incorrect value from bi->next() at line %d.  Expected 1, got %d", __LINE__, i);
    460     }
    461 
    462     bi->last();
    463     i = bi->next();
    464     if (i != BreakIterator::DONE) {
    465         errln("Incorrect value from bi->next() at line %d.  Expected DONE, got %d", __LINE__, i);
    466     }
    467 
    468 
    469     //
    470     //  current()
    471     //
    472     bi->first();
    473     i = bi->current();
    474     if (i != 0) {
    475         errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
    476     }
    477 
    478     bi->next();
    479     i = bi->current();
    480     if (i != 1) {
    481         errln("Incorrect value from bi->previous() at line %d.  Expected 1, got %d", __LINE__, i);
    482     }
    483 
    484     bi->last();
    485     bi->next();
    486     i = bi->current();
    487     if (i != 10) {
    488         errln("Incorrect value from bi->previous() at line %d.  Expected 10, got %d", __LINE__, i);
    489     }
    490 
    491     bi->first();
    492     bi->previous();
    493     i = bi->current();
    494     if (i != 0) {
    495         errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
    496     }
    497 
    498 
    499     //
    500     // Following()
    501     //
    502     i = bi->following(4);
    503     if (i != 5) {
    504         errln("Incorrect value from bi->following() at line %d.  Expected 5, got %d", __LINE__, i);
    505     }
    506 
    507     i = bi->following(9);
    508     if (i != 10) {
    509         errln("Incorrect value from bi->following() at line %d.  Expected 10, got %d", __LINE__, i);
    510     }
    511 
    512     i = bi->following(10);
    513     if (i != BreakIterator::DONE) {
    514         errln("Incorrect value from bi->following() at line %d.  Expected DONE, got %d", __LINE__, i);
    515     }
    516 
    517 
    518     //
    519     // Preceding
    520     //
    521     i = bi->preceding(4);
    522     if (i != 3) {
    523         errln("Incorrect value from bi->preceding() at line %d.  Expected 3, got %d", __LINE__, i);
    524     }
    525 
    526     i = bi->preceding(10);
    527     if (i != 9) {
    528         errln("Incorrect value from bi->preceding() at line %d.  Expected 9, got %d", __LINE__, i);
    529     }
    530 
    531     i = bi->preceding(1);
    532     if (i != 0) {
    533         errln("Incorrect value from bi->preceding() at line %d.  Expected 0, got %d", __LINE__, i);
    534     }
    535 
    536     i = bi->preceding(0);
    537     if (i != BreakIterator::DONE) {
    538         errln("Incorrect value from bi->preceding() at line %d.  Expected DONE, got %d", __LINE__, i);
    539     }
    540 
    541 
    542     //
    543     // isBoundary()
    544     //
    545     bi->first();
    546     if (bi->isBoundary(3) != TRUE) {
    547         errln("Incorrect value from bi->isBoudary() at line %d.  Expected TRUE, got FALSE", __LINE__, i);
    548     }
    549     i = bi->current();
    550     if (i != 3) {
    551         errln("Incorrect value from bi->current() at line %d.  Expected 3, got %d", __LINE__, i);
    552     }
    553 
    554 
    555     if (bi->isBoundary(11) != FALSE) {
    556         errln("Incorrect value from bi->isBoudary() at line %d.  Expected FALSE, got TRUE", __LINE__, i);
    557     }
    558     i = bi->current();
    559     if (i != 10) {
    560         errln("Incorrect value from bi->current() at line %d.  Expected 10, got %d", __LINE__, i);
    561     }
    562 
    563     //
    564     // next(n)
    565     //
    566     bi->first();
    567     i = bi->next(4);
    568     if (i != 4) {
    569         errln("Incorrect value from bi->next() at line %d.  Expected 4, got %d", __LINE__, i);
    570     }
    571 
    572     i = bi->next(6);
    573     if (i != 10) {
    574         errln("Incorrect value from bi->next() at line %d.  Expected 10, got %d", __LINE__, i);
    575     }
    576 
    577     bi->first();
    578     i = bi->next(11);
    579     if (i != BreakIterator::DONE) {
    580         errln("Incorrect value from bi->next() at line %d.  Expected BreakIterator::DONE, got %d", __LINE__, i);
    581     }
    582 
    583     delete bi;
    584 
    585 }
    586 
    587 
    588 
    589 
    590 
    591 
    592 void RBBIAPITest::TestBuilder() {
    593      UnicodeString rulesString1 = "$Letters = [:L:];\n"
    594                                   "$Numbers = [:N:];\n"
    595                                   "$Letters+;\n"
    596                                   "$Numbers+;\n"
    597                                   "[^$Letters $Numbers];\n"
    598                                   "!.*;\n";
    599      UnicodeString testString1  = "abc123..abc";
    600                                 // 01234567890
    601      int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
    602      UErrorCode status=U_ZERO_ERROR;
    603      UParseError    parseError;
    604 
    605      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
    606      if(U_FAILURE(status)) {
    607          dataerrln("Fail : in construction - %s", u_errorName(status));
    608      } else {
    609          bi->setText(testString1);
    610          doBoundaryTest(*bi, testString1, bounds1);
    611      }
    612      delete bi;
    613 }
    614 
    615 
    616 //
    617 //  TestQuoteGrouping
    618 //       Single quotes within rules imply a grouping, so that a modifier
    619 //       following the quoted text (* or +) applies to all of the quoted chars.
    620 //
    621 void RBBIAPITest::TestQuoteGrouping() {
    622      UnicodeString rulesString1 = "#Here comes the rule...\n"
    623                                   "'$@!'*;\n"   //  (\$\@\!)*
    624                                   ".;\n";
    625 
    626      UnicodeString testString1  = "$@!$@!X$@!!X";
    627                                 // 0123456789012
    628      int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
    629      UErrorCode status=U_ZERO_ERROR;
    630      UParseError    parseError;
    631 
    632      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
    633      if(U_FAILURE(status)) {
    634          dataerrln("Fail : in construction - %s", u_errorName(status));
    635      } else {
    636          bi->setText(testString1);
    637          doBoundaryTest(*bi, testString1, bounds1);
    638      }
    639      delete bi;
    640 }
    641 
    642 //
    643 //  TestRuleStatus
    644 //      Test word break rule status constants.
    645 //
    646 void RBBIAPITest::TestRuleStatus() {
    647      UChar str[30];
    648      //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
    649      // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
    650      u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
    651               // 012345678901234567  8      9    0
    652               //                     Katakana
    653                 str, 30);
    654      UnicodeString testString1(str);
    655      int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
    656      int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
    657                           UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
    658                           UBRK_WORD_IDEO,     UBRK_WORD_NONE};
    659 
    660      int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
    661                           UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
    662                           UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
    663 
    664      UErrorCode status=U_ZERO_ERROR;
    665 
    666      BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
    667      if(U_FAILURE(status)) {
    668          errcheckln(status, "Fail : in construction - %s", u_errorName(status));
    669      } else {
    670          bi->setText(testString1);
    671          // First test that the breaks are in the right spots.
    672          doBoundaryTest(*bi, testString1, bounds1);
    673 
    674          // Then go back and check tag values
    675          int32_t i = 0;
    676          int32_t pos, tag;
    677          for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
    678              if (pos != bounds1[i]) {
    679                  errln("FAIL: unexpected word break at postion %d", pos);
    680                  break;
    681              }
    682              tag = bi->getRuleStatus();
    683              if (tag < tag_lo[i] || tag >= tag_hi[i]) {
    684                  errln("FAIL: incorrect tag value %d at position %d", tag, pos);
    685                  break;
    686              }
    687 
    688              // Check that we get the same tag values from getRuleStatusVec()
    689              int32_t vec[10];
    690              int t = bi->getRuleStatusVec(vec, 10, status);
    691              TEST_ASSERT_SUCCESS(status);
    692              TEST_ASSERT(t==1);
    693              TEST_ASSERT(vec[0] == tag);
    694          }
    695      }
    696      delete bi;
    697 
    698      // Now test line break status.  This test mostly is to confirm that the status constants
    699      //                              are correctly declared in the header.
    700      testString1 =   "test line. \n";
    701      // break type    s    s     h
    702 
    703      bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
    704      if(U_FAILURE(status)) {
    705          errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status));
    706      } else {
    707          int32_t i = 0;
    708          int32_t pos, tag;
    709          UBool   success;
    710 
    711          bi->setText(testString1);
    712          pos = bi->current();
    713          tag = bi->getRuleStatus();
    714          for (i=0; i<3; i++) {
    715              switch (i) {
    716              case 0:
    717                  success = pos==0  && tag==UBRK_LINE_SOFT; break;
    718              case 1:
    719                  success = pos==5  && tag==UBRK_LINE_SOFT; break;
    720              case 2:
    721                  success = pos==12 && tag==UBRK_LINE_HARD; break;
    722              default:
    723                  success = FALSE; break;
    724              }
    725              if (success == FALSE) {
    726                  errln("Fail: incorrect word break status or position.  i=%d, pos=%d, tag=%d",
    727                      i, pos, tag);
    728                  break;
    729              }
    730              pos = bi->next();
    731              tag = bi->getRuleStatus();
    732          }
    733          if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
    734              UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
    735              (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
    736              errln("UBRK_LINE_* constants from header are inconsistent.");
    737          }
    738      }
    739      delete bi;
    740 
    741 }
    742 
    743 
    744 //
    745 //  TestRuleStatusVec
    746 //      Test the vector form of  break rule status.
    747 //
    748 void RBBIAPITest::TestRuleStatusVec() {
    749     UnicodeString rulesString(   "[A-N]{100}; \n"
    750                                  "[a-w]{200}; \n"
    751                                  "[\\p{L}]{300}; \n"
    752                                  "[\\p{N}]{400}; \n"
    753                                  "[0-5]{500}; \n"
    754                                   "!.*;\n", -1, US_INV);
    755      UnicodeString testString1  = "Aapz5?";
    756      int32_t  statusVals[10];
    757      int32_t  numStatuses;
    758      int32_t  pos;
    759 
    760      UErrorCode status=U_ZERO_ERROR;
    761      UParseError    parseError;
    762 
    763      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
    764      if (U_FAILURE(status)) {
    765          dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
    766      } else {
    767          bi->setText(testString1);
    768 
    769          // A
    770          pos = bi->next();
    771          TEST_ASSERT(pos==1);
    772          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
    773          TEST_ASSERT_SUCCESS(status);
    774          TEST_ASSERT(numStatuses == 2);
    775          TEST_ASSERT(statusVals[0] == 100);
    776          TEST_ASSERT(statusVals[1] == 300);
    777 
    778          // a
    779          pos = bi->next();
    780          TEST_ASSERT(pos==2);
    781          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
    782          TEST_ASSERT_SUCCESS(status);
    783          TEST_ASSERT(numStatuses == 2);
    784          TEST_ASSERT(statusVals[0] == 200);
    785          TEST_ASSERT(statusVals[1] == 300);
    786 
    787          // p
    788          pos = bi->next();
    789          TEST_ASSERT(pos==3);
    790          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
    791          TEST_ASSERT_SUCCESS(status);
    792          TEST_ASSERT(numStatuses == 2);
    793          TEST_ASSERT(statusVals[0] == 200);
    794          TEST_ASSERT(statusVals[1] == 300);
    795 
    796          // z
    797          pos = bi->next();
    798          TEST_ASSERT(pos==4);
    799          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
    800          TEST_ASSERT_SUCCESS(status);
    801          TEST_ASSERT(numStatuses == 1);
    802          TEST_ASSERT(statusVals[0] == 300);
    803 
    804          // 5
    805          pos = bi->next();
    806          TEST_ASSERT(pos==5);
    807          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
    808          TEST_ASSERT_SUCCESS(status);
    809          TEST_ASSERT(numStatuses == 2);
    810          TEST_ASSERT(statusVals[0] == 400);
    811          TEST_ASSERT(statusVals[1] == 500);
    812 
    813          // ?
    814          pos = bi->next();
    815          TEST_ASSERT(pos==6);
    816          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
    817          TEST_ASSERT_SUCCESS(status);
    818          TEST_ASSERT(numStatuses == 1);
    819          TEST_ASSERT(statusVals[0] == 0);
    820 
    821          //
    822          //  Check buffer overflow error handling.   Char == A
    823          //
    824          bi->first();
    825          pos = bi->next();
    826          TEST_ASSERT(pos==1);
    827          memset(statusVals, -1, sizeof(statusVals));
    828          numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
    829          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    830          TEST_ASSERT(numStatuses == 2);
    831          TEST_ASSERT(statusVals[0] == -1);
    832 
    833          status = U_ZERO_ERROR;
    834          memset(statusVals, -1, sizeof(statusVals));
    835          numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
    836          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    837          TEST_ASSERT(numStatuses == 2);
    838          TEST_ASSERT(statusVals[0] == 100);
    839          TEST_ASSERT(statusVals[1] == -1);
    840 
    841          status = U_ZERO_ERROR;
    842          memset(statusVals, -1, sizeof(statusVals));
    843          numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
    844          TEST_ASSERT_SUCCESS(status);
    845          TEST_ASSERT(numStatuses == 2);
    846          TEST_ASSERT(statusVals[0] == 100);
    847          TEST_ASSERT(statusVals[1] == 300);
    848          TEST_ASSERT(statusVals[2] == -1);
    849      }
    850      delete bi;
    851 
    852 }
    853 
    854 //
    855 //   Bug 2190 Regression test.   Builder crash on rule consisting of only a
    856 //                               $variable reference
    857 void RBBIAPITest::TestBug2190() {
    858      UnicodeString rulesString1 = "$aaa = abcd;\n"
    859                                   "$bbb = $aaa;\n"
    860                                   "$bbb;\n";
    861      UnicodeString testString1  = "abcdabcd";
    862                                 // 01234567890
    863      int32_t bounds1[] = {0, 4, 8};
    864      UErrorCode status=U_ZERO_ERROR;
    865      UParseError    parseError;
    866 
    867      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
    868      if(U_FAILURE(status)) {
    869          dataerrln("Fail : in construction - %s", u_errorName(status));
    870      } else {
    871          bi->setText(testString1);
    872          doBoundaryTest(*bi, testString1, bounds1);
    873      }
    874      delete bi;
    875 }
    876 
    877 
    878 void RBBIAPITest::TestRegistration() {
    879 #if !UCONFIG_NO_SERVICE
    880     UErrorCode status = U_ZERO_ERROR;
    881     BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
    882     // ok to not delete these if we exit because of error?
    883     BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
    884     BreakIterator* root_word = BreakIterator::createWordInstance("", status);
    885     BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
    886 
    887     if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
    888         dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
    889 
    890         delete ja_word;
    891         delete ja_char;
    892         delete root_word;
    893         delete root_char;
    894 
    895         return;
    896     }
    897 
    898     URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
    899     {
    900 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
    901         if (ja_word && *ja_word == *root_word) {
    902             errln("japan not different from root");
    903         }
    904 #endif
    905     }
    906 
    907     {
    908         BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
    909         UBool fail = TRUE;
    910         if(result){
    911             fail = *result != *ja_word;
    912         }
    913         delete result;
    914         if (fail) {
    915             errln("bad result for xx_XX/word");
    916         }
    917     }
    918 
    919     {
    920         BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
    921         UBool fail = TRUE;
    922         if(result){
    923             fail = *result != *ja_char;
    924         }
    925         delete result;
    926         if (fail) {
    927             errln("bad result for ja_JP/char");
    928         }
    929     }
    930 
    931     {
    932         BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
    933         UBool fail = TRUE;
    934         if(result){
    935             fail = *result != *root_char;
    936         }
    937         delete result;
    938         if (fail) {
    939             errln("bad result for xx_XX/char");
    940         }
    941     }
    942 
    943     {
    944         StringEnumeration* avail = BreakIterator::getAvailableLocales();
    945         UBool found = FALSE;
    946         const UnicodeString* p;
    947         while ((p = avail->snext(status))) {
    948             if (p->compare("xx") == 0) {
    949                 found = TRUE;
    950                 break;
    951             }
    952         }
    953         delete avail;
    954         if (!found) {
    955             errln("did not find test locale");
    956         }
    957     }
    958 
    959     {
    960         UBool unreg = BreakIterator::unregister(key, status);
    961         if (!unreg) {
    962             errln("unable to unregister");
    963         }
    964     }
    965 
    966     {
    967         BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
    968         BreakIterator* root = BreakIterator::createWordInstance("", status);
    969         UBool fail = TRUE;
    970         if(root){
    971           fail = *root != *result;
    972         }
    973         delete root;
    974         delete result;
    975         if (fail) {
    976             errln("did not get root break");
    977         }
    978     }
    979 
    980     {
    981         StringEnumeration* avail = BreakIterator::getAvailableLocales();
    982         UBool found = FALSE;
    983         const UnicodeString* p;
    984         while ((p = avail->snext(status))) {
    985             if (p->compare("xx") == 0) {
    986                 found = TRUE;
    987                 break;
    988             }
    989         }
    990         delete avail;
    991         if (found) {
    992             errln("found test locale");
    993         }
    994     }
    995 
    996     {
    997         int32_t count;
    998         UBool   foundLocale = FALSE;
    999         const Locale *avail = BreakIterator::getAvailableLocales(count);
   1000         for (int i=0; i<count; i++) {
   1001             if (avail[i] == Locale::getEnglish()) {
   1002                 foundLocale = TRUE;
   1003                 break;
   1004             }
   1005         }
   1006         if (foundLocale == FALSE) {
   1007             errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
   1008         }
   1009     }
   1010 
   1011 
   1012     // ja_word was adopted by factory
   1013     delete ja_char;
   1014     delete root_word;
   1015     delete root_char;
   1016 #endif
   1017 }
   1018 
   1019 void RBBIAPITest::RoundtripRule(const char *dataFile) {
   1020     UErrorCode status = U_ZERO_ERROR;
   1021     UParseError parseError;
   1022     parseError.line = 0;
   1023     parseError.offset = 0;
   1024     LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
   1025     uint32_t length;
   1026     const UChar *builtSource;
   1027     const uint8_t *rbbiRules;
   1028     const uint8_t *builtRules;
   1029 
   1030     if (U_FAILURE(status)) {
   1031         errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status));
   1032         return;
   1033     }
   1034 
   1035     builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
   1036     builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
   1037     RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
   1038     if (U_FAILURE(status)) {
   1039         errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
   1040                 __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset);
   1041         errln(UnicodeString(builtSource));
   1042         return;
   1043     };
   1044     rbbiRules = brkItr->getBinaryRules(length);
   1045     logln("Comparing \"%s\" len=%d", dataFile, length);
   1046     if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
   1047         errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile);
   1048         return;
   1049     }
   1050     delete brkItr;
   1051 }
   1052 
   1053 void RBBIAPITest::TestRoundtripRules() {
   1054     RoundtripRule("word");
   1055     RoundtripRule("title");
   1056     RoundtripRule("sent");
   1057     RoundtripRule("line");
   1058     RoundtripRule("char");
   1059     if (!quick) {
   1060         RoundtripRule("word_POSIX");
   1061     }
   1062 }
   1063 
   1064 
   1065 // Check getBinaryRules() and construction of a break iterator from those rules.
   1066 
   1067 void RBBIAPITest::TestGetBinaryRules() {
   1068     UErrorCode status=U_ZERO_ERROR;
   1069     LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
   1070     TEST_ASSERT_SUCCESS(status);
   1071     RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
   1072     TEST_ASSERT(rbbi != NULL);
   1073 
   1074     // Check that the new line break iterator is nominally functional.
   1075     UnicodeString helloWorld("Hello, World!");
   1076     rbbi->setText(helloWorld);
   1077     int n = 0;
   1078     while (bi->next() != UBRK_DONE) {
   1079         ++n;
   1080     }
   1081     TEST_ASSERT(n == 2);
   1082 
   1083     // Extract the binary rules as a uint8_t blob.
   1084     uint32_t ruleLength;
   1085     const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
   1086     TEST_ASSERT(ruleLength > 0);
   1087     TEST_ASSERT(binRules != NULL);
   1088 
   1089     // Clone the binary rules, and create a break iterator from that.
   1090     // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
   1091     uint8_t *clonedRules = new uint8_t[ruleLength];
   1092     memcpy(clonedRules, binRules, ruleLength);
   1093     RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
   1094     TEST_ASSERT_SUCCESS(status);
   1095 
   1096     // Check that the cloned line break iterator is nominally alive.
   1097     clonedBI.setText(helloWorld);
   1098     n = 0;
   1099     while (clonedBI.next() != UBRK_DONE) {
   1100         ++n;
   1101     }
   1102     TEST_ASSERT(n == 2);
   1103 
   1104     delete[] clonedRules;
   1105 }
   1106 
   1107 
   1108 void RBBIAPITest::TestRefreshInputText() {
   1109     /*
   1110      *  RefreshInput changes out the input of a Break Iterator without
   1111      *    changing anything else in the iterator's state.  Used with Java JNI,
   1112      *    when Java moves the underlying string storage.   This test
   1113      *    runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
   1114      *    The right set of boundaries should still be found.
   1115      */
   1116     UChar testStr[]  = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0};  /* = " A B C D"  */
   1117     UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,  0};
   1118     UErrorCode status = U_ZERO_ERROR;
   1119     UText ut1 = UTEXT_INITIALIZER;
   1120     UText ut2 = UTEXT_INITIALIZER;
   1121     RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
   1122     TEST_ASSERT_SUCCESS(status);
   1123 
   1124     utext_openUChars(&ut1, testStr, -1, &status);
   1125     TEST_ASSERT_SUCCESS(status);
   1126 
   1127     if (U_SUCCESS(status)) {
   1128         bi->setText(&ut1, status);
   1129         TEST_ASSERT_SUCCESS(status);
   1130 
   1131         /* Line boundaries will occur before each letter in the original string */
   1132         TEST_ASSERT(1 == bi->next());
   1133         TEST_ASSERT(3 == bi->next());
   1134 
   1135         /* Move the string, kill the original string.  */
   1136         u_strcpy(movedStr, testStr);
   1137         u_memset(testStr, 0x20, u_strlen(testStr));
   1138         utext_openUChars(&ut2, movedStr, -1, &status);
   1139         TEST_ASSERT_SUCCESS(status);
   1140         RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
   1141         TEST_ASSERT_SUCCESS(status);
   1142         TEST_ASSERT(bi == returnedBI);
   1143 
   1144         /* Find the following matches, now working in the moved string. */
   1145         TEST_ASSERT(5 == bi->next());
   1146         TEST_ASSERT(7 == bi->next());
   1147         TEST_ASSERT(8 == bi->next());
   1148         TEST_ASSERT(UBRK_DONE == bi->next());
   1149 
   1150         utext_close(&ut1);
   1151         utext_close(&ut2);
   1152     }
   1153     delete bi;
   1154 
   1155 }
   1156 
   1157 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
   1158 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
   1159   static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
   1160   it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
   1161 
   1162   int32_t *pos = new int32_t[ustr.length()];
   1163   int32_t posCount = 0;
   1164 
   1165   // calculate breaks up front, so we can print out
   1166   // sans any debugging
   1167   for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
   1168     pos[posCount++] = n;
   1169     if(posCount>=ustr.length()) {
   1170       it.errln("brk count exceeds string length!");
   1171       return;
   1172     }
   1173   }
   1174   UnicodeString out;
   1175   out.append((UChar)CHSTR);
   1176   int32_t prev = 0;
   1177   for(int32_t i=0;i<posCount;i++) {
   1178     int32_t n=pos[i];
   1179     out.append(ustr.tempSubString(prev,n-prev));
   1180     out.append((UChar)PILCROW);
   1181     prev=n;
   1182   }
   1183   out.append(ustr.tempSubString(prev,ustr.length()-prev));
   1184   out.append((UChar)CHEND);
   1185   it.logln(out);
   1186 
   1187   out.remove();
   1188   for(int32_t i=0;i<posCount;i++) {
   1189     char tmp[100];
   1190     sprintf(tmp,"%d ",pos[i]);
   1191     out.append(UnicodeString(tmp));
   1192   }
   1193   it.logln(out);
   1194   delete [] pos;
   1195 }
   1196 #endif
   1197 
   1198 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
   1199 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
   1200   UErrorCode status = U_ZERO_ERROR;
   1201   LocalPointer<FilteredBreakIteratorBuilder> builder;
   1202   LocalPointer<BreakIterator> baseBI;
   1203   LocalPointer<BreakIterator> filteredBI;
   1204   LocalPointer<BreakIterator> frenchBI;
   1205 
   1206   const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
   1207   const UnicodeString ABBR_MR("Mr.");
   1208   const UnicodeString ABBR_CAPT("Capt.");
   1209 
   1210   {
   1211     logln("Constructing empty builder\n");
   1212     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
   1213     TEST_ASSERT_SUCCESS(status);
   1214 
   1215     logln("Constructing base BI\n");
   1216     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
   1217     TEST_ASSERT_SUCCESS(status);
   1218 
   1219 	logln("Building new BI\n");
   1220     filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
   1221     TEST_ASSERT_SUCCESS(status);
   1222 
   1223 	if (U_SUCCESS(status)) {
   1224         logln("Testing:");
   1225         filteredBI->setText(text);
   1226         TEST_ASSERT(20 == filteredBI->next()); // Mr.
   1227         TEST_ASSERT(84 == filteredBI->next()); // recovered.
   1228         TEST_ASSERT(90 == filteredBI->next()); // Capt.
   1229         TEST_ASSERT(181 == filteredBI->next()); // Mr.
   1230         TEST_ASSERT(278 == filteredBI->next()); // charge.
   1231         filteredBI->first();
   1232         prtbrks(filteredBI.getAlias(), text, *this);
   1233     }
   1234   }
   1235 
   1236   {
   1237     logln("Constructing empty builder\n");
   1238     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
   1239     TEST_ASSERT_SUCCESS(status);
   1240 
   1241     if (U_SUCCESS(status)) {
   1242         logln("Adding Mr. as an exception\n");
   1243         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
   1244         TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
   1245         TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
   1246         TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
   1247         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
   1248         TEST_ASSERT_SUCCESS(status);
   1249 
   1250         logln("Constructing base BI\n");
   1251         baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
   1252         TEST_ASSERT_SUCCESS(status);
   1253 
   1254         logln("Building new BI\n");
   1255         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
   1256         TEST_ASSERT_SUCCESS(status);
   1257 
   1258         logln("Testing:");
   1259         filteredBI->setText(text);
   1260         TEST_ASSERT(84 == filteredBI->next());
   1261         TEST_ASSERT(90 == filteredBI->next());// Capt.
   1262         TEST_ASSERT(278 == filteredBI->next());
   1263         filteredBI->first();
   1264         prtbrks(filteredBI.getAlias(), text, *this);
   1265     }
   1266   }
   1267 
   1268 
   1269   {
   1270     logln("Constructing empty builder\n");
   1271     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
   1272     TEST_ASSERT_SUCCESS(status);
   1273 
   1274     if (U_SUCCESS(status)) {
   1275         logln("Adding Mr. and Capt as an exception\n");
   1276         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
   1277         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
   1278         TEST_ASSERT_SUCCESS(status);
   1279 
   1280         logln("Constructing base BI\n");
   1281         baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
   1282         TEST_ASSERT_SUCCESS(status);
   1283 
   1284         logln("Building new BI\n");
   1285         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
   1286         TEST_ASSERT_SUCCESS(status);
   1287 
   1288         logln("Testing:");
   1289         filteredBI->setText(text);
   1290         TEST_ASSERT(84 == filteredBI->next());
   1291         TEST_ASSERT(278 == filteredBI->next());
   1292         filteredBI->first();
   1293         prtbrks(filteredBI.getAlias(), text, *this);
   1294     }
   1295   }
   1296 
   1297 
   1298   {
   1299     logln("Constructing English builder\n");
   1300     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
   1301     TEST_ASSERT_SUCCESS(status);
   1302 
   1303     logln("Constructing base BI\n");
   1304     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
   1305     TEST_ASSERT_SUCCESS(status);
   1306 
   1307     if (U_SUCCESS(status)) {
   1308         logln("unsuppressing 'Capt'");
   1309         TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
   1310 
   1311         logln("Building new BI\n");
   1312         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
   1313         TEST_ASSERT_SUCCESS(status);
   1314 
   1315         if(filteredBI.isValid()) {
   1316           logln("Testing:");
   1317           filteredBI->setText(text);
   1318           TEST_ASSERT(84 == filteredBI->next());
   1319           TEST_ASSERT(90 == filteredBI->next());
   1320           TEST_ASSERT(278 == filteredBI->next());
   1321           filteredBI->first();
   1322           prtbrks(filteredBI.getAlias(), text, *this);
   1323         }
   1324     }
   1325   }
   1326 
   1327 
   1328   {
   1329     logln("Constructing English builder\n");
   1330     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
   1331     TEST_ASSERT_SUCCESS(status);
   1332 
   1333     logln("Constructing base BI\n");
   1334     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
   1335     TEST_ASSERT_SUCCESS(status);
   1336 
   1337     if (U_SUCCESS(status)) {
   1338         logln("Building new BI\n");
   1339         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
   1340         TEST_ASSERT_SUCCESS(status);
   1341 
   1342         if(filteredBI.isValid()) {
   1343           logln("Testing:");
   1344           filteredBI->setText(text);
   1345           TEST_ASSERT(84 == filteredBI->next());
   1346           TEST_ASSERT(278 == filteredBI->next());
   1347           filteredBI->first();
   1348           prtbrks(filteredBI.getAlias(), text, *this);
   1349         }
   1350     }
   1351   }
   1352 
   1353   // reenable once french is in
   1354   {
   1355     logln("Constructing French builder");
   1356     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
   1357     TEST_ASSERT_SUCCESS(status);
   1358 
   1359     logln("Constructing base BI\n");
   1360     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
   1361     TEST_ASSERT_SUCCESS(status);
   1362 
   1363     if (U_SUCCESS(status)) {
   1364         logln("Building new BI\n");
   1365         frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
   1366         TEST_ASSERT_SUCCESS(status);
   1367     }
   1368 
   1369     if(frenchBI.isValid()) {
   1370       logln("Testing:");
   1371       UnicodeString frText("C'est MM. Duval.");
   1372       frenchBI->setText(frText);
   1373       TEST_ASSERT(16 == frenchBI->next());
   1374       TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
   1375       frenchBI->first();
   1376       prtbrks(frenchBI.getAlias(), frText, *this);
   1377       logln("Testing against English:");
   1378       filteredBI->setText(frText);
   1379       TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
   1380       TEST_ASSERT(16 == filteredBI->next());
   1381       TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
   1382       filteredBI->first();
   1383       prtbrks(filteredBI.getAlias(), frText, *this);
   1384 
   1385       // Verify ==
   1386       TEST_ASSERT_TRUE(*frenchBI   == *frenchBI);
   1387       TEST_ASSERT_TRUE(*filteredBI != *frenchBI);
   1388       TEST_ASSERT_TRUE(*frenchBI   != *filteredBI);
   1389     } else {
   1390       dataerrln("French BI: not valid.");
   1391 	}
   1392   }
   1393 
   1394 #else
   1395   logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
   1396 #endif
   1397 }
   1398 
   1399 //---------------------------------------------
   1400 // runIndexedTest
   1401 //---------------------------------------------
   1402 
   1403 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
   1404 {
   1405     if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
   1406     TESTCASE_AUTO_BEGIN;
   1407 #if !UCONFIG_NO_FILE_IO
   1408     TESTCASE_AUTO(TestCloneEquals);
   1409     TESTCASE_AUTO(TestgetRules);
   1410     TESTCASE_AUTO(TestHashCode);
   1411     TESTCASE_AUTO(TestGetSetAdoptText);
   1412     TESTCASE_AUTO(TestIteration);
   1413 #endif
   1414     TESTCASE_AUTO(TestBuilder);
   1415     TESTCASE_AUTO(TestQuoteGrouping);
   1416     TESTCASE_AUTO(TestRuleStatusVec);
   1417     TESTCASE_AUTO(TestBug2190);
   1418 #if !UCONFIG_NO_FILE_IO
   1419     TESTCASE_AUTO(TestRegistration);
   1420     TESTCASE_AUTO(TestBoilerPlate);
   1421     TESTCASE_AUTO(TestRuleStatus);
   1422     TESTCASE_AUTO(TestRoundtripRules);
   1423     TESTCASE_AUTO(TestGetBinaryRules);
   1424 #endif
   1425     TESTCASE_AUTO(TestRefreshInputText);
   1426 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
   1427     TESTCASE_AUTO(TestFilteredBreakIteratorBuilder);
   1428 #endif
   1429     TESTCASE_AUTO_END;
   1430 }
   1431 
   1432 
   1433 //---------------------------------------------
   1434 //Internal subroutines
   1435 //---------------------------------------------
   1436 
   1437 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
   1438      logln((UnicodeString)"testIsBoundary():");
   1439         int32_t p = 0;
   1440         UBool isB;
   1441         for (int32_t i = 0; i < text.length(); i++) {
   1442             isB = bi.isBoundary(i);
   1443             logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
   1444 
   1445             if (i == boundaries[p]) {
   1446                 if (!isB)
   1447                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
   1448                 p++;
   1449             }
   1450             else {
   1451                 if (isB)
   1452                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
   1453             }
   1454         }
   1455 }
   1456 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
   1457     UnicodeString selected;
   1458     UnicodeString expected=CharsToUnicodeString(expectedString);
   1459 
   1460     if(gotoffset != expectedOffset)
   1461          errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
   1462     if(start <= gotoffset){
   1463         testString.extractBetween(start, gotoffset, selected);
   1464     }
   1465     else{
   1466         testString.extractBetween(gotoffset, start, selected);
   1467     }
   1468     if(selected.compare(expected) != 0)
   1469          errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
   1470     else
   1471         logln(prettify("****selected \"" + selected + "\""));
   1472 }
   1473 
   1474 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
   1475