Home | History | Annotate | Download | only in intltest
      1 /********************************************************************
      2  * Copyright (c) 1999-2014, International Business Machines
      3  * Corporation and others. All Rights Reserved.
      4  ********************************************************************
      5  *   Date        Name        Description
      6  *   12/14/99    Madhu        Creation.
      7  *   01/12/2000  Madhu        updated for changed API
      8  ********************************************************************/
      9 
     10 #include "unicode/utypes.h"
     11 
     12 #if !UCONFIG_NO_BREAK_ITERATION
     13 
     14 #include "unicode/uchar.h"
     15 #include "intltest.h"
     16 #include "unicode/rbbi.h"
     17 #include "unicode/schriter.h"
     18 #include "rbbiapts.h"
     19 #include "rbbidata.h"
     20 #include "cstring.h"
     21 #include "ubrkimpl.h"
     22 #include "unicode/locid.h"
     23 #include "unicode/ustring.h"
     24 #include "unicode/utext.h"
     25 #include "cmemory.h"
     26 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
     27 #include "unicode/filteredbrk.h"
     28 #include <stdio.h> // for sprintf
     29 #endif
     30 /**
     31  * API Test the RuleBasedBreakIterator class
     32  */
     33 
     34 
     35 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
     36 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
     37 
     38 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
     39     errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
     40 
     41 void RBBIAPITest::TestCloneEquals()
     42 {
     43 
     44     UErrorCode status=U_ZERO_ERROR;
     45     RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
     46     RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
     47     RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
     48     RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
     49     if(U_FAILURE(status)){
     50         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
     51         return;
     52     }
     53 
     54 
     55     UnicodeString testString="Testing word break iterators's clone() and equals()";
     56     bi1->setText(testString);
     57     bi2->setText(testString);
     58     biequal->setText(testString);
     59 
     60     bi3->setText("hello");
     61 
     62     logln((UnicodeString)"Testing equals()");
     63 
     64     logln((UnicodeString)"Testing == and !=");
     65     UBool b = (*bi1 != *biequal);
     66     b |= *bi1 == *bi2;
     67     b |= *bi1 == *bi3;
     68     if (b) {
     69         errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
     70     }
     71 
     72     if(*bi2 == *biequal || *bi2 == *bi1  || *biequal == *bi3)
     73         errln((UnicodeString)"ERROR:2 RBBI's == and != operator  failed.");
     74 
     75 
     76     // Quick test of RulesBasedBreakIterator assignment -
     77     // Check that
     78     //    two different iterators are !=
     79     //    they are == after assignment
     80     //    source and dest iterator produce the same next() after assignment.
     81     //    deleting one doesn't disable the other.
     82     logln("Testing assignment");
     83     RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
     84     if(U_FAILURE(status)){
     85         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
     86         return;
     87     }
     88 
     89     RuleBasedBreakIterator biDefault, biDefault2;
     90     if(U_FAILURE(status)){
     91         errln((UnicodeString)"FAIL : in construction of default iterator");
     92         return;
     93     }
     94     if (biDefault == *bix) {
     95         errln((UnicodeString)"ERROR: iterators should not compare ==");
     96         return;
     97     }
     98     if (biDefault != biDefault2) {
     99         errln((UnicodeString)"ERROR: iterators should compare ==");
    100         return;
    101     }
    102 
    103 
    104     UnicodeString   HelloString("Hello Kitty");
    105     bix->setText(HelloString);
    106     if (*bix == *bi2) {
    107         errln(UnicodeString("ERROR: strings should not be equal before assignment."));
    108     }
    109     *bix = *bi2;
    110     if (*bix != *bi2) {
    111         errln(UnicodeString("ERROR: strings should be equal before assignment."));
    112     }
    113 
    114     int bixnext = bix->next();
    115     int bi2next = bi2->next();
    116     if (! (bixnext == bi2next && bixnext == 7)) {
    117         errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
    118     }
    119     delete bix;
    120     if (bi2->next() != 8) {
    121         errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
    122     }
    123 
    124 
    125 
    126     logln((UnicodeString)"Testing clone()");
    127     RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
    128     RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
    129 
    130     if(*bi1clone != *bi1 || *bi1clone  != *biequal  ||
    131       *bi1clone == *bi3 || *bi1clone == *bi2)
    132         errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
    133 
    134     if(*bi2clone == *bi1 || *bi2clone == *biequal ||
    135        *bi2clone == *bi3 || *bi2clone != *bi2)
    136         errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
    137 
    138     if(bi1->getText() != bi1clone->getText()   ||
    139        bi2clone->getText() != bi2->getText()   ||
    140        *bi2clone == *bi1clone )
    141         errln((UnicodeString)"ERROR: RBBI's clone() method failed");
    142 
    143     delete bi1clone;
    144     delete bi2clone;
    145     delete bi1;
    146     delete bi3;
    147     delete bi2;
    148     delete biequal;
    149 }
    150 
    151 void RBBIAPITest::TestBoilerPlate()
    152 {
    153     UErrorCode status = U_ZERO_ERROR;
    154     BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
    155     BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
    156     if (U_FAILURE(status)) {
    157         errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
    158         return;
    159     }
    160     if(*a!=*b){
    161         errln("Failed: boilerplate method operator!= does not return correct results");
    162     }
    163     // Japanese word break iterators are identical to root with
    164     // a dictionary-based break iterator
    165     BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
    166     BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
    167     if(c && d){
    168         if(*c!=*d){
    169             errln("Failed: boilerplate method operator== does not return correct results");
    170         }
    171     }else{
    172         errln("creation of break iterator failed");
    173     }
    174     delete a;
    175     delete b;
    176     delete c;
    177     delete d;
    178 }
    179 
    180 void RBBIAPITest::TestgetRules()
    181 {
    182     UErrorCode status=U_ZERO_ERROR;
    183 
    184     RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
    185     RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
    186     if(U_FAILURE(status)){
    187         errcheckln(status, "FAIL: in construction - %s", u_errorName(status));
    188         delete bi1;
    189         delete bi2;
    190         return;
    191     }
    192 
    193 
    194 
    195     logln((UnicodeString)"Testing toString()");
    196 
    197     bi1->setText((UnicodeString)"Hello there");
    198 
    199     RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
    200 
    201     UnicodeString temp=bi1->getRules();
    202     UnicodeString temp2=bi2->getRules();
    203     UnicodeString temp3=bi3->getRules();
    204     if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
    205         errln((UnicodeString)"ERROR: error in getRules() method");
    206 
    207     delete bi1;
    208     delete bi2;
    209     delete bi3;
    210 }
    211 void RBBIAPITest::TestHashCode()
    212 {
    213     UErrorCode status=U_ZERO_ERROR;
    214     RuleBasedBreakIterator* bi1     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
    215     RuleBasedBreakIterator* bi3     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
    216     RuleBasedBreakIterator* bi2     = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
    217     if(U_FAILURE(status)){
    218         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
    219         delete bi1;
    220         delete bi2;
    221         delete bi3;
    222         return;
    223     }
    224 
    225 
    226     logln((UnicodeString)"Testing hashCode()");
    227 
    228     bi1->setText((UnicodeString)"Hash code");
    229     bi2->setText((UnicodeString)"Hash code");
    230     bi3->setText((UnicodeString)"Hash code");
    231 
    232     RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
    233     RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
    234 
    235     if(bi1->hashCode() != bi1clone->hashCode() ||  bi1->hashCode() != bi3->hashCode() ||
    236         bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
    237         errln((UnicodeString)"ERROR: identical objects have different hashcodes");
    238 
    239     if(bi1->hashCode() == bi2->hashCode() ||  bi2->hashCode() == bi3->hashCode() ||
    240         bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
    241         errln((UnicodeString)"ERROR: different objects have same hashcodes");
    242 
    243     delete bi1clone;
    244     delete bi2clone;
    245     delete bi1;
    246     delete bi2;
    247     delete bi3;
    248 
    249 }
    250 void RBBIAPITest::TestGetSetAdoptText()
    251 {
    252     logln((UnicodeString)"Testing getText setText ");
    253     IcuTestErrorCode status(*this, "TestGetSetAdoptText");
    254     UnicodeString str1="first string.";
    255     UnicodeString str2="Second string.";
    256     LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
    257     LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
    258     if(status.isFailure()){
    259         errcheckln(status, "Fail : in construction - %s", status.errorName());
    260             return;
    261     }
    262 
    263 
    264     CharacterIterator* text1= new StringCharacterIterator(str1);
    265     CharacterIterator* text1Clone = text1->clone();
    266     CharacterIterator* text2= new StringCharacterIterator(str2);
    267     CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"
    268 
    269     wordIter1->setText(str1);
    270     CharacterIterator *tci = &wordIter1->getText();
    271     UnicodeString      tstr;
    272     tci->getText(tstr);
    273     TEST_ASSERT(tstr == str1);
    274     if(wordIter1->current() != 0)
    275         errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
    276 
    277     wordIter1->next(2);
    278 
    279     wordIter1->setText(str2);
    280     if(wordIter1->current() != 0)
    281         errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
    282 
    283 
    284     charIter1->adoptText(text1Clone);
    285     TEST_ASSERT(wordIter1->getText() != charIter1->getText());
    286     tci = &wordIter1->getText();
    287     tci->getText(tstr);
    288     TEST_ASSERT(tstr == str2);
    289     tci = &charIter1->getText();
    290     tci->getText(tstr);
    291     TEST_ASSERT(tstr == str1);
    292 
    293 
    294     LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
    295     rb->adoptText(text1);
    296     if(rb->getText() != *text1)
    297         errln((UnicodeString)"ERROR:1 error in adoptText ");
    298     rb->adoptText(text2);
    299     if(rb->getText() != *text2)
    300         errln((UnicodeString)"ERROR:2 error in adoptText ");
    301 
    302     // Adopt where iterator range is less than the entire orignal source string.
    303     //   (With the change of the break engine to working with UText internally,
    304     //    CharacterIterators starting at positions other than zero are not supported)
    305     rb->adoptText(text3);
    306     TEST_ASSERT(rb->preceding(2) == 0);
    307     TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
    308     //if(rb->preceding(2) != 3) {
    309     //    errln((UnicodeString)"ERROR:3 error in adoptText ");
    310     //}
    311     //if(rb->following(11) != BreakIterator::DONE) {
    312     //    errln((UnicodeString)"ERROR:4 error in adoptText ");
    313     //}
    314 
    315     // UText API
    316     //
    317     //   Quick test to see if UText is working at all.
    318     //
    319     const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
    320     const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
    321     //                012345678901
    322 
    323     status.reset();
    324     LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
    325     wordIter1->setText(ut.getAlias(), status);
    326     TEST_ASSERT_SUCCESS(status);
    327 
    328     int32_t pos;
    329     pos = wordIter1->first();
    330     TEST_ASSERT(pos==0);
    331     pos = wordIter1->next();
    332     TEST_ASSERT(pos==5);
    333     pos = wordIter1->next();
    334     TEST_ASSERT(pos==6);
    335     pos = wordIter1->next();
    336     TEST_ASSERT(pos==11);
    337     pos = wordIter1->next();
    338     TEST_ASSERT(pos==UBRK_DONE);
    339 
    340     status.reset();
    341     LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
    342     TEST_ASSERT_SUCCESS(status);
    343     wordIter1->setText(ut2.getAlias(), status);
    344     TEST_ASSERT_SUCCESS(status);
    345 
    346     pos = wordIter1->first();
    347     TEST_ASSERT(pos==0);
    348     pos = wordIter1->next();
    349     TEST_ASSERT(pos==3);
    350     pos = wordIter1->next();
    351     TEST_ASSERT(pos==4);
    352 
    353     pos = wordIter1->last();
    354     TEST_ASSERT(pos==6);
    355     pos = wordIter1->previous();
    356     TEST_ASSERT(pos==4);
    357     pos = wordIter1->previous();
    358     TEST_ASSERT(pos==3);
    359     pos = wordIter1->previous();
    360     TEST_ASSERT(pos==0);
    361     pos = wordIter1->previous();
    362     TEST_ASSERT(pos==UBRK_DONE);
    363 
    364     status.reset();
    365     UnicodeString sEmpty;
    366     LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
    367     wordIter1->getUText(gut2.getAlias(), status);
    368     TEST_ASSERT_SUCCESS(status);
    369     status.reset();
    370 }
    371 
    372 
    373 void RBBIAPITest::TestIteration()
    374 {
    375     // This test just verifies that the API is present.
    376     // Testing for correct operation of the break rules happens elsewhere.
    377 
    378     UErrorCode status=U_ZERO_ERROR;
    379     RuleBasedBreakIterator* bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
    380     if (U_FAILURE(status) || bi == NULL)  {
    381         errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
    382     }
    383     delete bi;
    384 
    385     status=U_ZERO_ERROR;
    386     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
    387     if (U_FAILURE(status) || bi == NULL)  {
    388         errcheckln(status, "Failure creating Word break iterator.  Status = %s", u_errorName(status));
    389     }
    390     delete bi;
    391 
    392     status=U_ZERO_ERROR;
    393     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
    394     if (U_FAILURE(status) || bi == NULL)  {
    395         errcheckln(status, "Failure creating Line break iterator.  Status = %s", u_errorName(status));
    396     }
    397     delete bi;
    398 
    399     status=U_ZERO_ERROR;
    400     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
    401     if (U_FAILURE(status) || bi == NULL)  {
    402         errcheckln(status, "Failure creating Sentence break iterator.  Status = %s", u_errorName(status));
    403     }
    404     delete bi;
    405 
    406     status=U_ZERO_ERROR;
    407     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
    408     if (U_FAILURE(status) || bi == NULL)  {
    409         errcheckln(status, "Failure creating Title break iterator.  Status = %s", u_errorName(status));
    410     }
    411     delete bi;
    412 
    413     status=U_ZERO_ERROR;
    414     bi  = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
    415     if (U_FAILURE(status) || bi == NULL)  {
    416         errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
    417         return;   // Skip the rest of these tests.
    418     }
    419 
    420 
    421     UnicodeString testString="0123456789";
    422     bi->setText(testString);
    423 
    424     int32_t i;
    425     i = bi->first();
    426     if (i != 0) {
    427         errln("Incorrect value from bi->first().  Expected 0, got %d.", i);
    428     }
    429 
    430     i = bi->last();
    431     if (i != 10) {
    432         errln("Incorrect value from bi->last().  Expected 10, got %d", i);
    433     }
    434 
    435     //
    436     // Previous
    437     //
    438     bi->last();
    439     i = bi->previous();
    440     if (i != 9) {
    441         errln("Incorrect value from bi->last() at line %d.  Expected 9, got %d", __LINE__, i);
    442     }
    443 
    444 
    445     bi->first();
    446     i = bi->previous();
    447     if (i != BreakIterator::DONE) {
    448         errln("Incorrect value from bi->previous() at line %d.  Expected DONE, got %d", __LINE__, i);
    449     }
    450 
    451     //
    452     // next()
    453     //
    454     bi->first();
    455     i = bi->next();
    456     if (i != 1) {
    457         errln("Incorrect value from bi->next() at line %d.  Expected 1, got %d", __LINE__, i);
    458     }
    459 
    460     bi->last();
    461     i = bi->next();
    462     if (i != BreakIterator::DONE) {
    463         errln("Incorrect value from bi->next() at line %d.  Expected DONE, got %d", __LINE__, i);
    464     }
    465 
    466 
    467     //
    468     //  current()
    469     //
    470     bi->first();
    471     i = bi->current();
    472     if (i != 0) {
    473         errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
    474     }
    475 
    476     bi->next();
    477     i = bi->current();
    478     if (i != 1) {
    479         errln("Incorrect value from bi->previous() at line %d.  Expected 1, got %d", __LINE__, i);
    480     }
    481 
    482     bi->last();
    483     bi->next();
    484     i = bi->current();
    485     if (i != 10) {
    486         errln("Incorrect value from bi->previous() at line %d.  Expected 10, got %d", __LINE__, i);
    487     }
    488 
    489     bi->first();
    490     bi->previous();
    491     i = bi->current();
    492     if (i != 0) {
    493         errln("Incorrect value from bi->previous() at line %d.  Expected 0, got %d", __LINE__, i);
    494     }
    495 
    496 
    497     //
    498     // Following()
    499     //
    500     i = bi->following(4);
    501     if (i != 5) {
    502         errln("Incorrect value from bi->following() at line %d.  Expected 5, got %d", __LINE__, i);
    503     }
    504 
    505     i = bi->following(9);
    506     if (i != 10) {
    507         errln("Incorrect value from bi->following() at line %d.  Expected 10, got %d", __LINE__, i);
    508     }
    509 
    510     i = bi->following(10);
    511     if (i != BreakIterator::DONE) {
    512         errln("Incorrect value from bi->following() at line %d.  Expected DONE, got %d", __LINE__, i);
    513     }
    514 
    515 
    516     //
    517     // Preceding
    518     //
    519     i = bi->preceding(4);
    520     if (i != 3) {
    521         errln("Incorrect value from bi->preceding() at line %d.  Expected 3, got %d", __LINE__, i);
    522     }
    523 
    524     i = bi->preceding(10);
    525     if (i != 9) {
    526         errln("Incorrect value from bi->preceding() at line %d.  Expected 9, got %d", __LINE__, i);
    527     }
    528 
    529     i = bi->preceding(1);
    530     if (i != 0) {
    531         errln("Incorrect value from bi->preceding() at line %d.  Expected 0, got %d", __LINE__, i);
    532     }
    533 
    534     i = bi->preceding(0);
    535     if (i != BreakIterator::DONE) {
    536         errln("Incorrect value from bi->preceding() at line %d.  Expected DONE, got %d", __LINE__, i);
    537     }
    538 
    539 
    540     //
    541     // isBoundary()
    542     //
    543     bi->first();
    544     if (bi->isBoundary(3) != TRUE) {
    545         errln("Incorrect value from bi->isBoudary() at line %d.  Expected TRUE, got FALSE", __LINE__, i);
    546     }
    547     i = bi->current();
    548     if (i != 3) {
    549         errln("Incorrect value from bi->current() at line %d.  Expected 3, got %d", __LINE__, i);
    550     }
    551 
    552 
    553     if (bi->isBoundary(11) != FALSE) {
    554         errln("Incorrect value from bi->isBoudary() at line %d.  Expected FALSE, got TRUE", __LINE__, i);
    555     }
    556     i = bi->current();
    557     if (i != 10) {
    558         errln("Incorrect value from bi->current() at line %d.  Expected 10, got %d", __LINE__, i);
    559     }
    560 
    561     //
    562     // next(n)
    563     //
    564     bi->first();
    565     i = bi->next(4);
    566     if (i != 4) {
    567         errln("Incorrect value from bi->next() at line %d.  Expected 4, got %d", __LINE__, i);
    568     }
    569 
    570     i = bi->next(6);
    571     if (i != 10) {
    572         errln("Incorrect value from bi->next() at line %d.  Expected 10, got %d", __LINE__, i);
    573     }
    574 
    575     bi->first();
    576     i = bi->next(11);
    577     if (i != BreakIterator::DONE) {
    578         errln("Incorrect value from bi->next() at line %d.  Expected BreakIterator::DONE, got %d", __LINE__, i);
    579     }
    580 
    581     delete bi;
    582 
    583 }
    584 
    585 
    586 
    587 
    588 
    589 
    590 void RBBIAPITest::TestBuilder() {
    591      UnicodeString rulesString1 = "$Letters = [:L:];\n"
    592                                   "$Numbers = [:N:];\n"
    593                                   "$Letters+;\n"
    594                                   "$Numbers+;\n"
    595                                   "[^$Letters $Numbers];\n"
    596                                   "!.*;\n";
    597      UnicodeString testString1  = "abc123..abc";
    598                                 // 01234567890
    599      int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
    600      UErrorCode status=U_ZERO_ERROR;
    601      UParseError    parseError;
    602 
    603      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
    604      if(U_FAILURE(status)) {
    605          dataerrln("Fail : in construction - %s", u_errorName(status));
    606      } else {
    607          bi->setText(testString1);
    608          doBoundaryTest(*bi, testString1, bounds1);
    609      }
    610      delete bi;
    611 }
    612 
    613 
    614 //
    615 //  TestQuoteGrouping
    616 //       Single quotes within rules imply a grouping, so that a modifier
    617 //       following the quoted text (* or +) applies to all of the quoted chars.
    618 //
    619 void RBBIAPITest::TestQuoteGrouping() {
    620      UnicodeString rulesString1 = "#Here comes the rule...\n"
    621                                   "'$@!'*;\n"   //  (\$\@\!)*
    622                                   ".;\n";
    623 
    624      UnicodeString testString1  = "$@!$@!X$@!!X";
    625                                 // 0123456789012
    626      int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
    627      UErrorCode status=U_ZERO_ERROR;
    628      UParseError    parseError;
    629 
    630      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
    631      if(U_FAILURE(status)) {
    632          dataerrln("Fail : in construction - %s", u_errorName(status));
    633      } else {
    634          bi->setText(testString1);
    635          doBoundaryTest(*bi, testString1, bounds1);
    636      }
    637      delete bi;
    638 }
    639 
    640 //
    641 //  TestRuleStatus
    642 //      Test word break rule status constants.
    643 //
    644 void RBBIAPITest::TestRuleStatus() {
    645      UChar str[30];
    646      //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
    647      // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
    648      u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
    649               // 012345678901234567  8      9    0
    650               //                     Katakana
    651                 str, 30);
    652      UnicodeString testString1(str);
    653      int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
    654      int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
    655                           UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
    656                           UBRK_WORD_IDEO,     UBRK_WORD_NONE};
    657 
    658      int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
    659                           UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
    660                           UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
    661 
    662      UErrorCode status=U_ZERO_ERROR;
    663 
    664      BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
    665      if(U_FAILURE(status)) {
    666          errcheckln(status, "Fail : in construction - %s", u_errorName(status));
    667      } else {
    668          bi->setText(testString1);
    669          // First test that the breaks are in the right spots.
    670          doBoundaryTest(*bi, testString1, bounds1);
    671 
    672          // Then go back and check tag values
    673          int32_t i = 0;
    674          int32_t pos, tag;
    675          for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
    676              if (pos != bounds1[i]) {
    677                  errln("FAIL: unexpected word break at postion %d", pos);
    678                  break;
    679              }
    680              tag = bi->getRuleStatus();
    681              if (tag < tag_lo[i] || tag >= tag_hi[i]) {
    682                  errln("FAIL: incorrect tag value %d at position %d", tag, pos);
    683                  break;
    684              }
    685 
    686              // Check that we get the same tag values from getRuleStatusVec()
    687              int32_t vec[10];
    688              int t = bi->getRuleStatusVec(vec, 10, status);
    689              TEST_ASSERT_SUCCESS(status);
    690              TEST_ASSERT(t==1);
    691              TEST_ASSERT(vec[0] == tag);
    692          }
    693      }
    694      delete bi;
    695 
    696      // Now test line break status.  This test mostly is to confirm that the status constants
    697      //                              are correctly declared in the header.
    698      testString1 =   "test line. \n";
    699      // break type    s    s     h
    700 
    701      bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
    702      if(U_FAILURE(status)) {
    703          errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status));
    704      } else {
    705          int32_t i = 0;
    706          int32_t pos, tag;
    707          UBool   success;
    708 
    709          bi->setText(testString1);
    710          pos = bi->current();
    711          tag = bi->getRuleStatus();
    712          for (i=0; i<3; i++) {
    713              switch (i) {
    714              case 0:
    715                  success = pos==0  && tag==UBRK_LINE_SOFT; break;
    716              case 1:
    717                  success = pos==5  && tag==UBRK_LINE_SOFT; break;
    718              case 2:
    719                  success = pos==12 && tag==UBRK_LINE_HARD; break;
    720              default:
    721                  success = FALSE; break;
    722              }
    723              if (success == FALSE) {
    724                  errln("Fail: incorrect word break status or position.  i=%d, pos=%d, tag=%d",
    725                      i, pos, tag);
    726                  break;
    727              }
    728              pos = bi->next();
    729              tag = bi->getRuleStatus();
    730          }
    731          if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
    732              UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
    733              (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
    734              errln("UBRK_LINE_* constants from header are inconsistent.");
    735          }
    736      }
    737      delete bi;
    738 
    739 }
    740 
    741 
    742 //
    743 //  TestRuleStatusVec
    744 //      Test the vector form of  break rule status.
    745 //
    746 void RBBIAPITest::TestRuleStatusVec() {
    747     UnicodeString rulesString(   "[A-N]{100}; \n"
    748                                  "[a-w]{200}; \n"
    749                                  "[\\p{L}]{300}; \n"
    750                                  "[\\p{N}]{400}; \n"
    751                                  "[0-5]{500}; \n"
    752                                   "!.*;\n", -1, US_INV);
    753      UnicodeString testString1  = "Aapz5?";
    754      int32_t  statusVals[10];
    755      int32_t  numStatuses;
    756      int32_t  pos;
    757 
    758      UErrorCode status=U_ZERO_ERROR;
    759      UParseError    parseError;
    760 
    761      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
    762      if (U_FAILURE(status)) {
    763          dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
    764      } else {
    765          bi->setText(testString1);
    766 
    767          // A
    768          pos = bi->next();
    769          TEST_ASSERT(pos==1);
    770          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
    771          TEST_ASSERT_SUCCESS(status);
    772          TEST_ASSERT(numStatuses == 2);
    773          TEST_ASSERT(statusVals[0] == 100);
    774          TEST_ASSERT(statusVals[1] == 300);
    775 
    776          // a
    777          pos = bi->next();
    778          TEST_ASSERT(pos==2);
    779          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
    780          TEST_ASSERT_SUCCESS(status);
    781          TEST_ASSERT(numStatuses == 2);
    782          TEST_ASSERT(statusVals[0] == 200);
    783          TEST_ASSERT(statusVals[1] == 300);
    784 
    785          // p
    786          pos = bi->next();
    787          TEST_ASSERT(pos==3);
    788          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
    789          TEST_ASSERT_SUCCESS(status);
    790          TEST_ASSERT(numStatuses == 2);
    791          TEST_ASSERT(statusVals[0] == 200);
    792          TEST_ASSERT(statusVals[1] == 300);
    793 
    794          // z
    795          pos = bi->next();
    796          TEST_ASSERT(pos==4);
    797          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
    798          TEST_ASSERT_SUCCESS(status);
    799          TEST_ASSERT(numStatuses == 1);
    800          TEST_ASSERT(statusVals[0] == 300);
    801 
    802          // 5
    803          pos = bi->next();
    804          TEST_ASSERT(pos==5);
    805          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
    806          TEST_ASSERT_SUCCESS(status);
    807          TEST_ASSERT(numStatuses == 2);
    808          TEST_ASSERT(statusVals[0] == 400);
    809          TEST_ASSERT(statusVals[1] == 500);
    810 
    811          // ?
    812          pos = bi->next();
    813          TEST_ASSERT(pos==6);
    814          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
    815          TEST_ASSERT_SUCCESS(status);
    816          TEST_ASSERT(numStatuses == 1);
    817          TEST_ASSERT(statusVals[0] == 0);
    818 
    819          //
    820          //  Check buffer overflow error handling.   Char == A
    821          //
    822          bi->first();
    823          pos = bi->next();
    824          TEST_ASSERT(pos==1);
    825          memset(statusVals, -1, sizeof(statusVals));
    826          numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
    827          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    828          TEST_ASSERT(numStatuses == 2);
    829          TEST_ASSERT(statusVals[0] == -1);
    830 
    831          status = U_ZERO_ERROR;
    832          memset(statusVals, -1, sizeof(statusVals));
    833          numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
    834          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
    835          TEST_ASSERT(numStatuses == 2);
    836          TEST_ASSERT(statusVals[0] == 100);
    837          TEST_ASSERT(statusVals[1] == -1);
    838 
    839          status = U_ZERO_ERROR;
    840          memset(statusVals, -1, sizeof(statusVals));
    841          numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
    842          TEST_ASSERT_SUCCESS(status);
    843          TEST_ASSERT(numStatuses == 2);
    844          TEST_ASSERT(statusVals[0] == 100);
    845          TEST_ASSERT(statusVals[1] == 300);
    846          TEST_ASSERT(statusVals[2] == -1);
    847      }
    848      delete bi;
    849 
    850 }
    851 
    852 //
    853 //   Bug 2190 Regression test.   Builder crash on rule consisting of only a
    854 //                               $variable reference
    855 void RBBIAPITest::TestBug2190() {
    856      UnicodeString rulesString1 = "$aaa = abcd;\n"
    857                                   "$bbb = $aaa;\n"
    858                                   "$bbb;\n";
    859      UnicodeString testString1  = "abcdabcd";
    860                                 // 01234567890
    861      int32_t bounds1[] = {0, 4, 8};
    862      UErrorCode status=U_ZERO_ERROR;
    863      UParseError    parseError;
    864 
    865      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
    866      if(U_FAILURE(status)) {
    867          dataerrln("Fail : in construction - %s", u_errorName(status));
    868      } else {
    869          bi->setText(testString1);
    870          doBoundaryTest(*bi, testString1, bounds1);
    871      }
    872      delete bi;
    873 }
    874 
    875 
    876 void RBBIAPITest::TestRegistration() {
    877 #if !UCONFIG_NO_SERVICE
    878     UErrorCode status = U_ZERO_ERROR;
    879     BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
    880     // ok to not delete these if we exit because of error?
    881     BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
    882     BreakIterator* root_word = BreakIterator::createWordInstance("", status);
    883     BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
    884 
    885     if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
    886         dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
    887 
    888         delete ja_word;
    889         delete ja_char;
    890         delete root_word;
    891         delete root_char;
    892 
    893         return;
    894     }
    895 
    896     URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
    897     {
    898 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
    899         if (ja_word && *ja_word == *root_word) {
    900             errln("japan not different from root");
    901         }
    902 #endif
    903     }
    904 
    905     {
    906         BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
    907         UBool fail = TRUE;
    908         if(result){
    909             fail = *result != *ja_word;
    910         }
    911         delete result;
    912         if (fail) {
    913             errln("bad result for xx_XX/word");
    914         }
    915     }
    916 
    917     {
    918         BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
    919         UBool fail = TRUE;
    920         if(result){
    921             fail = *result != *ja_char;
    922         }
    923         delete result;
    924         if (fail) {
    925             errln("bad result for ja_JP/char");
    926         }
    927     }
    928 
    929     {
    930         BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
    931         UBool fail = TRUE;
    932         if(result){
    933             fail = *result != *root_char;
    934         }
    935         delete result;
    936         if (fail) {
    937             errln("bad result for xx_XX/char");
    938         }
    939     }
    940 
    941     {
    942         StringEnumeration* avail = BreakIterator::getAvailableLocales();
    943         UBool found = FALSE;
    944         const UnicodeString* p;
    945         while ((p = avail->snext(status))) {
    946             if (p->compare("xx") == 0) {
    947                 found = TRUE;
    948                 break;
    949             }
    950         }
    951         delete avail;
    952         if (!found) {
    953             errln("did not find test locale");
    954         }
    955     }
    956 
    957     {
    958         UBool unreg = BreakIterator::unregister(key, status);
    959         if (!unreg) {
    960             errln("unable to unregister");
    961         }
    962     }
    963 
    964     {
    965         BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
    966         BreakIterator* root = BreakIterator::createWordInstance("", status);
    967         UBool fail = TRUE;
    968         if(root){
    969           fail = *root != *result;
    970         }
    971         delete root;
    972         delete result;
    973         if (fail) {
    974             errln("did not get root break");
    975         }
    976     }
    977 
    978     {
    979         StringEnumeration* avail = BreakIterator::getAvailableLocales();
    980         UBool found = FALSE;
    981         const UnicodeString* p;
    982         while ((p = avail->snext(status))) {
    983             if (p->compare("xx") == 0) {
    984                 found = TRUE;
    985                 break;
    986             }
    987         }
    988         delete avail;
    989         if (found) {
    990             errln("found test locale");
    991         }
    992     }
    993 
    994     {
    995         int32_t count;
    996         UBool   foundLocale = FALSE;
    997         const Locale *avail = BreakIterator::getAvailableLocales(count);
    998         for (int i=0; i<count; i++) {
    999             if (avail[i] == Locale::getEnglish()) {
   1000                 foundLocale = TRUE;
   1001                 break;
   1002             }
   1003         }
   1004         if (foundLocale == FALSE) {
   1005             errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
   1006         }
   1007     }
   1008 
   1009 
   1010     // ja_word was adopted by factory
   1011     delete ja_char;
   1012     delete root_word;
   1013     delete root_char;
   1014 #endif
   1015 }
   1016 
   1017 void RBBIAPITest::RoundtripRule(const char *dataFile) {
   1018     UErrorCode status = U_ZERO_ERROR;
   1019     UParseError parseError;
   1020     parseError.line = 0;
   1021     parseError.offset = 0;
   1022     LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
   1023     uint32_t length;
   1024     const UChar *builtSource;
   1025     const uint8_t *rbbiRules;
   1026     const uint8_t *builtRules;
   1027 
   1028     if (U_FAILURE(status)) {
   1029         errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status));
   1030         return;
   1031     }
   1032 
   1033     builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
   1034     builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
   1035     RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
   1036     if (U_FAILURE(status)) {
   1037         errln("createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
   1038                 u_errorName(status), parseError.line, parseError.offset);
   1039         return;
   1040     };
   1041     rbbiRules = brkItr->getBinaryRules(length);
   1042     logln("Comparing \"%s\" len=%d", dataFile, length);
   1043     if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
   1044         errln("Built rules and rebuilt rules are different %s", dataFile);
   1045         return;
   1046     }
   1047     delete brkItr;
   1048 }
   1049 
   1050 void RBBIAPITest::TestRoundtripRules() {
   1051     RoundtripRule("word");
   1052     RoundtripRule("title");
   1053     RoundtripRule("sent");
   1054     RoundtripRule("line");
   1055     RoundtripRule("char");
   1056     if (!quick) {
   1057         RoundtripRule("word_POSIX");
   1058     }
   1059 }
   1060 
   1061 // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
   1062 // (these are protected so we access them via a local class RBBIWithProtectedFunctions).
   1063 // This is just a sanity check, not a thorough test (e.g. we don't check that the
   1064 // first delete actually frees rulesCopy).
   1065 void RBBIAPITest::TestCreateFromRBBIData() {
   1066     // Get some handy RBBIData
   1067     const char *brkName = "word"; // or "sent", "line", "char", etc.
   1068     UErrorCode status = U_ZERO_ERROR;
   1069     LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status));
   1070     if ( U_SUCCESS(status) ) {
   1071         const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data.getAlias());
   1072         uint32_t length = builtRules->fLength;
   1073         RBBIWithProtectedFunctions * brkItr;
   1074 
   1075         // Try the memory-adopting constructor, need to copy the data first
   1076         RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length);
   1077         if ( rulesCopy ) {
   1078             uprv_memcpy( rulesCopy, builtRules, length );
   1079 
   1080             brkItr = new RBBIWithProtectedFunctions(rulesCopy, status);
   1081             if ( U_SUCCESS(status) ) {
   1082                 delete brkItr; // this should free rulesCopy
   1083             } else {
   1084                 errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) );
   1085                 status = U_ZERO_ERROR;// reset for the next test
   1086                 uprv_free( rulesCopy );
   1087             }
   1088         }
   1089 
   1090         // Now try the non-adopting constructor
   1091         brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status);
   1092         if ( U_SUCCESS(status) ) {
   1093             delete brkItr; // this should NOT attempt to free builtRules
   1094             if (builtRules->fLength != length) { // sanity check
   1095                 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" );
   1096             }
   1097         } else {
   1098             errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) );
   1099         }
   1100     }
   1101 
   1102     // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...)
   1103     //
   1104     status = U_ZERO_ERROR;
   1105     RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
   1106     if (rb == NULL || U_FAILURE(status)) {
   1107         dataerrln("Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s", u_errorName(status));
   1108     } else {
   1109         uint32_t length;
   1110         const uint8_t *rules = rb->getBinaryRules(length);
   1111         RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status);
   1112         TEST_ASSERT_SUCCESS(status);
   1113         TEST_ASSERT(*rb == *rb2);
   1114         UnicodeString words = "one two three ";
   1115         rb2->setText(words);
   1116         int wordCounter = 0;
   1117         while (rb2->next() != UBRK_DONE) {
   1118             wordCounter++;
   1119         }
   1120         TEST_ASSERT(wordCounter == 6);
   1121 
   1122         status = U_ZERO_ERROR;
   1123         RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1, status);
   1124         TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
   1125 
   1126         delete rb;
   1127         delete rb2;
   1128         delete rb3;
   1129     }
   1130 }
   1131 
   1132 
   1133 void RBBIAPITest::TestRefreshInputText() {
   1134     /*
   1135      *  RefreshInput changes out the input of a Break Iterator without
   1136      *    changing anything else in the iterator's state.  Used with Java JNI,
   1137      *    when Java moves the underlying string storage.   This test
   1138      *    runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
   1139      *    The right set of boundaries should still be found.
   1140      */
   1141     UChar testStr[]  = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0};  /* = " A B C D"  */
   1142     UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,  0};
   1143     UErrorCode status = U_ZERO_ERROR;
   1144     UText ut1 = UTEXT_INITIALIZER;
   1145     UText ut2 = UTEXT_INITIALIZER;
   1146     RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
   1147     TEST_ASSERT_SUCCESS(status);
   1148 
   1149     utext_openUChars(&ut1, testStr, -1, &status);
   1150     TEST_ASSERT_SUCCESS(status);
   1151 
   1152     if (U_SUCCESS(status)) {
   1153         bi->setText(&ut1, status);
   1154         TEST_ASSERT_SUCCESS(status);
   1155 
   1156         /* Line boundaries will occur before each letter in the original string */
   1157         TEST_ASSERT(1 == bi->next());
   1158         TEST_ASSERT(3 == bi->next());
   1159 
   1160         /* Move the string, kill the original string.  */
   1161         u_strcpy(movedStr, testStr);
   1162         u_memset(testStr, 0x20, u_strlen(testStr));
   1163         utext_openUChars(&ut2, movedStr, -1, &status);
   1164         TEST_ASSERT_SUCCESS(status);
   1165         RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
   1166         TEST_ASSERT_SUCCESS(status);
   1167         TEST_ASSERT(bi == returnedBI);
   1168 
   1169         /* Find the following matches, now working in the moved string. */
   1170         TEST_ASSERT(5 == bi->next());
   1171         TEST_ASSERT(7 == bi->next());
   1172         TEST_ASSERT(8 == bi->next());
   1173         TEST_ASSERT(UBRK_DONE == bi->next());
   1174 
   1175         utext_close(&ut1);
   1176         utext_close(&ut2);
   1177     }
   1178     delete bi;
   1179 
   1180 }
   1181 
   1182 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
   1183 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
   1184   static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
   1185   it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
   1186 
   1187   int32_t *pos = new int32_t[ustr.length()];
   1188   int32_t posCount = 0;
   1189 
   1190   // calculate breaks up front, so we can print out
   1191   // sans any debugging
   1192   for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
   1193     pos[posCount++] = n;
   1194     if(posCount>=ustr.length()) {
   1195       it.errln("brk count exceeds string length!");
   1196       return;
   1197     }
   1198   }
   1199   UnicodeString out;
   1200   out.append((UChar)CHSTR);
   1201   int32_t prev = 0;
   1202   for(int32_t i=0;i<posCount;i++) {
   1203     int32_t n=pos[i];
   1204     out.append(ustr.tempSubString(prev,n-prev));
   1205     out.append((UChar)PILCROW);
   1206     prev=n;
   1207   }
   1208   out.append(ustr.tempSubString(prev,ustr.length()-prev));
   1209   out.append((UChar)CHEND);
   1210   it.logln(out);
   1211 
   1212   out.remove();
   1213   for(int32_t i=0;i<posCount;i++) {
   1214     char tmp[100];
   1215     sprintf(tmp,"%d ",pos[i]);
   1216     out.append(UnicodeString(tmp));
   1217   }
   1218   it.logln(out);
   1219   delete [] pos;
   1220 }
   1221 #endif
   1222 
   1223 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
   1224 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
   1225   UErrorCode status = U_ZERO_ERROR;
   1226   LocalPointer<FilteredBreakIteratorBuilder> builder;
   1227   LocalPointer<BreakIterator> baseBI;
   1228   LocalPointer<BreakIterator> filteredBI;
   1229   LocalPointer<BreakIterator> frenchBI;
   1230 
   1231   const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
   1232   const UnicodeString ABBR_MR("Mr.");
   1233   const UnicodeString ABBR_CAPT("Capt.");
   1234 
   1235   {
   1236     logln("Constructing empty builder\n");
   1237     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
   1238     TEST_ASSERT_SUCCESS(status);
   1239 
   1240     logln("Constructing base BI\n");
   1241     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
   1242     TEST_ASSERT_SUCCESS(status);
   1243 
   1244 	logln("Building new BI\n");
   1245     filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
   1246     TEST_ASSERT_SUCCESS(status);
   1247 
   1248 	if (U_SUCCESS(status)) {
   1249         logln("Testing:");
   1250         filteredBI->setText(text);
   1251         TEST_ASSERT(20 == filteredBI->next()); // Mr.
   1252         TEST_ASSERT(84 == filteredBI->next()); // recovered.
   1253         TEST_ASSERT(90 == filteredBI->next()); // Capt.
   1254         TEST_ASSERT(181 == filteredBI->next()); // Mr.
   1255         TEST_ASSERT(278 == filteredBI->next()); // charge.
   1256         filteredBI->first();
   1257         prtbrks(filteredBI.getAlias(), text, *this);
   1258     }
   1259   }
   1260 
   1261   {
   1262     logln("Constructing empty builder\n");
   1263     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
   1264     TEST_ASSERT_SUCCESS(status);
   1265 
   1266     if (U_SUCCESS(status)) {
   1267         logln("Adding Mr. as an exception\n");
   1268         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
   1269         TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
   1270         TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
   1271         TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
   1272         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
   1273         TEST_ASSERT_SUCCESS(status);
   1274 
   1275         logln("Constructing base BI\n");
   1276         baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
   1277         TEST_ASSERT_SUCCESS(status);
   1278 
   1279         logln("Building new BI\n");
   1280         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
   1281         TEST_ASSERT_SUCCESS(status);
   1282 
   1283         logln("Testing:");
   1284         filteredBI->setText(text);
   1285         TEST_ASSERT(84 == filteredBI->next());
   1286         TEST_ASSERT(90 == filteredBI->next());// Capt.
   1287         TEST_ASSERT(278 == filteredBI->next());
   1288         filteredBI->first();
   1289         prtbrks(filteredBI.getAlias(), text, *this);
   1290     }
   1291   }
   1292 
   1293 
   1294   {
   1295     logln("Constructing empty builder\n");
   1296     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
   1297     TEST_ASSERT_SUCCESS(status);
   1298 
   1299     if (U_SUCCESS(status)) {
   1300         logln("Adding Mr. and Capt as an exception\n");
   1301         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
   1302         TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
   1303         TEST_ASSERT_SUCCESS(status);
   1304 
   1305         logln("Constructing base BI\n");
   1306         baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
   1307         TEST_ASSERT_SUCCESS(status);
   1308 
   1309         logln("Building new BI\n");
   1310         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
   1311         TEST_ASSERT_SUCCESS(status);
   1312 
   1313         logln("Testing:");
   1314         filteredBI->setText(text);
   1315         TEST_ASSERT(84 == filteredBI->next());
   1316         TEST_ASSERT(278 == filteredBI->next());
   1317         filteredBI->first();
   1318         prtbrks(filteredBI.getAlias(), text, *this);
   1319     }
   1320   }
   1321 
   1322 
   1323   {
   1324     logln("Constructing English builder\n");
   1325     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
   1326     TEST_ASSERT_SUCCESS(status);
   1327 
   1328     logln("Constructing base BI\n");
   1329     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
   1330     TEST_ASSERT_SUCCESS(status);
   1331 
   1332     if (U_SUCCESS(status)) {
   1333         logln("unsuppressing 'Capt'");
   1334         TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
   1335 
   1336         logln("Building new BI\n");
   1337         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
   1338         TEST_ASSERT_SUCCESS(status);
   1339 
   1340         if(filteredBI.isValid()) {
   1341           logln("Testing:");
   1342           filteredBI->setText(text);
   1343           TEST_ASSERT(84 == filteredBI->next());
   1344           TEST_ASSERT(90 == filteredBI->next());
   1345           TEST_ASSERT(278 == filteredBI->next());
   1346           filteredBI->first();
   1347           prtbrks(filteredBI.getAlias(), text, *this);
   1348         }
   1349     }
   1350   }
   1351 
   1352 
   1353   {
   1354     logln("Constructing English builder\n");
   1355     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
   1356     TEST_ASSERT_SUCCESS(status);
   1357 
   1358     logln("Constructing base BI\n");
   1359     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
   1360     TEST_ASSERT_SUCCESS(status);
   1361 
   1362     if (U_SUCCESS(status)) {
   1363         logln("Building new BI\n");
   1364         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
   1365         TEST_ASSERT_SUCCESS(status);
   1366 
   1367         if(filteredBI.isValid()) {
   1368           logln("Testing:");
   1369           filteredBI->setText(text);
   1370           TEST_ASSERT(84 == filteredBI->next());
   1371           TEST_ASSERT(278 == filteredBI->next());
   1372           filteredBI->first();
   1373           prtbrks(filteredBI.getAlias(), text, *this);
   1374         }
   1375     }
   1376   }
   1377 
   1378   // reenable once french is in
   1379   {
   1380     logln("Constructing French builder");
   1381     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
   1382     TEST_ASSERT_SUCCESS(status);
   1383 
   1384     logln("Constructing base BI\n");
   1385     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
   1386     TEST_ASSERT_SUCCESS(status);
   1387 
   1388     if (U_SUCCESS(status)) {
   1389         logln("Building new BI\n");
   1390         frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
   1391         TEST_ASSERT_SUCCESS(status);
   1392     }
   1393 
   1394     if(frenchBI.isValid()) {
   1395       logln("Testing:");
   1396       UnicodeString frText("C'est MM. Duval.");
   1397       frenchBI->setText(frText);
   1398       TEST_ASSERT(16 == frenchBI->next());
   1399       TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
   1400       frenchBI->first();
   1401       prtbrks(frenchBI.getAlias(), frText, *this);
   1402       logln("Testing against English:");
   1403       filteredBI->setText(frText);
   1404       TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
   1405       TEST_ASSERT(16 == filteredBI->next());
   1406       TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
   1407       filteredBI->first();
   1408       prtbrks(filteredBI.getAlias(), frText, *this);
   1409 
   1410       // Verify ==
   1411       TEST_ASSERT_TRUE(*frenchBI   == *frenchBI);
   1412       TEST_ASSERT_TRUE(*filteredBI != *frenchBI);
   1413       TEST_ASSERT_TRUE(*frenchBI   != *filteredBI);
   1414     } else {
   1415       dataerrln("French BI: not valid.");
   1416 	}
   1417   }
   1418 
   1419 #else
   1420   logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
   1421 #endif
   1422 }
   1423 
   1424 //---------------------------------------------
   1425 // runIndexedTest
   1426 //---------------------------------------------
   1427 
   1428 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
   1429 {
   1430     if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
   1431     switch (index) {
   1432      //   case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
   1433 #if !UCONFIG_NO_FILE_IO
   1434         case  0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
   1435         case  1: name = "TestgetRules"; if (exec) TestgetRules(); break;
   1436         case  2: name = "TestHashCode"; if (exec) TestHashCode(); break;
   1437         case  3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
   1438         case  4: name = "TestIteration"; if (exec) TestIteration(); break;
   1439 #else
   1440         case  0: case  1: case  2: case  3: case  4: name = "skip"; break;
   1441 #endif
   1442         case  5: name = "TestBuilder"; if (exec) TestBuilder(); break;
   1443         case  6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
   1444         case  7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break;
   1445         case  8: name = "TestBug2190"; if (exec) TestBug2190(); break;
   1446 #if !UCONFIG_NO_FILE_IO
   1447         case  9: name = "TestRegistration"; if (exec) TestRegistration(); break;
   1448         case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
   1449         case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
   1450         case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
   1451         case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break;
   1452 #else
   1453         case  9: case 10: case 11: case 12: case 13: name = "skip"; break;
   1454 #endif
   1455         case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break;
   1456 
   1457 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
   1458     case 15: name = "TestFilteredBreakIteratorBuilder"; if(exec) TestFilteredBreakIteratorBuilder(); break;
   1459 #else
   1460     case 15: name="skip"; break;
   1461 #endif
   1462         default: name = ""; break; // needed to end loop
   1463     }
   1464 }
   1465 
   1466 //---------------------------------------------
   1467 //Internal subroutines
   1468 //---------------------------------------------
   1469 
   1470 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
   1471      logln((UnicodeString)"testIsBoundary():");
   1472         int32_t p = 0;
   1473         UBool isB;
   1474         for (int32_t i = 0; i < text.length(); i++) {
   1475             isB = bi.isBoundary(i);
   1476             logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
   1477 
   1478             if (i == boundaries[p]) {
   1479                 if (!isB)
   1480                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
   1481                 p++;
   1482             }
   1483             else {
   1484                 if (isB)
   1485                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
   1486             }
   1487         }
   1488 }
   1489 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
   1490     UnicodeString selected;
   1491     UnicodeString expected=CharsToUnicodeString(expectedString);
   1492 
   1493     if(gotoffset != expectedOffset)
   1494          errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
   1495     if(start <= gotoffset){
   1496         testString.extractBetween(start, gotoffset, selected);
   1497     }
   1498     else{
   1499         testString.extractBetween(gotoffset, start, selected);
   1500     }
   1501     if(selected.compare(expected) != 0)
   1502          errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
   1503     else
   1504         logln(prettify("****selected \"" + selected + "\""));
   1505 }
   1506 
   1507 //---------------------------------------------
   1508 //RBBIWithProtectedFunctions class functions
   1509 //---------------------------------------------
   1510 
   1511 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status)
   1512     : RuleBasedBreakIterator(data, status)
   1513 {
   1514 }
   1515 
   1516 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status)
   1517     : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status)
   1518 {
   1519 }
   1520 
   1521 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
   1522