1 /******************************************************************** 2 * Copyright (c) 1999-2014, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************** 5 * Date Name Description 6 * 12/14/99 Madhu Creation. 7 * 01/12/2000 Madhu updated for changed API 8 ********************************************************************/ 9 10 #include "unicode/utypes.h" 11 12 #if !UCONFIG_NO_BREAK_ITERATION 13 14 #include "unicode/uchar.h" 15 #include "intltest.h" 16 #include "unicode/rbbi.h" 17 #include "unicode/schriter.h" 18 #include "rbbiapts.h" 19 #include "rbbidata.h" 20 #include "cstring.h" 21 #include "ubrkimpl.h" 22 #include "unicode/locid.h" 23 #include "unicode/ustring.h" 24 #include "unicode/utext.h" 25 #include "cmemory.h" 26 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING 27 #include "unicode/filteredbrk.h" 28 #include <stdio.h> // for sprintf 29 #endif 30 /** 31 * API Test the RuleBasedBreakIterator class 32 */ 33 34 35 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ 36 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} 37 38 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \ 39 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};} 40 41 void RBBIAPITest::TestCloneEquals() 42 { 43 44 UErrorCode status=U_ZERO_ERROR; 45 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 46 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 47 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 48 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 49 if(U_FAILURE(status)){ 50 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 51 return; 52 } 53 54 55 UnicodeString testString="Testing word break iterators's clone() and equals()"; 56 bi1->setText(testString); 57 bi2->setText(testString); 58 biequal->setText(testString); 59 60 bi3->setText("hello"); 61 62 logln((UnicodeString)"Testing equals()"); 63 64 logln((UnicodeString)"Testing == and !="); 65 UBool b = (*bi1 != *biequal); 66 b |= *bi1 == *bi2; 67 b |= *bi1 == *bi3; 68 if (b) { 69 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed."); 70 } 71 72 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3) 73 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed."); 74 75 76 // Quick test of RulesBasedBreakIterator assignment - 77 // Check that 78 // two different iterators are != 79 // they are == after assignment 80 // source and dest iterator produce the same next() after assignment. 81 // deleting one doesn't disable the other. 82 logln("Testing assignment"); 83 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status); 84 if(U_FAILURE(status)){ 85 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 86 return; 87 } 88 89 RuleBasedBreakIterator biDefault, biDefault2; 90 if(U_FAILURE(status)){ 91 errln((UnicodeString)"FAIL : in construction of default iterator"); 92 return; 93 } 94 if (biDefault == *bix) { 95 errln((UnicodeString)"ERROR: iterators should not compare =="); 96 return; 97 } 98 if (biDefault != biDefault2) { 99 errln((UnicodeString)"ERROR: iterators should compare =="); 100 return; 101 } 102 103 104 UnicodeString HelloString("Hello Kitty"); 105 bix->setText(HelloString); 106 if (*bix == *bi2) { 107 errln(UnicodeString("ERROR: strings should not be equal before assignment.")); 108 } 109 *bix = *bi2; 110 if (*bix != *bi2) { 111 errln(UnicodeString("ERROR: strings should be equal before assignment.")); 112 } 113 114 int bixnext = bix->next(); 115 int bi2next = bi2->next(); 116 if (! (bixnext == bi2next && bixnext == 7)) { 117 errln(UnicodeString("ERROR: iterators behaved differently after assignment.")); 118 } 119 delete bix; 120 if (bi2->next() != 8) { 121 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy.")); 122 } 123 124 125 126 logln((UnicodeString)"Testing clone()"); 127 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone(); 128 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone(); 129 130 if(*bi1clone != *bi1 || *bi1clone != *biequal || 131 *bi1clone == *bi3 || *bi1clone == *bi2) 132 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed"); 133 134 if(*bi2clone == *bi1 || *bi2clone == *biequal || 135 *bi2clone == *bi3 || *bi2clone != *bi2) 136 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed"); 137 138 if(bi1->getText() != bi1clone->getText() || 139 bi2clone->getText() != bi2->getText() || 140 *bi2clone == *bi1clone ) 141 errln((UnicodeString)"ERROR: RBBI's clone() method failed"); 142 143 delete bi1clone; 144 delete bi2clone; 145 delete bi1; 146 delete bi3; 147 delete bi2; 148 delete biequal; 149 } 150 151 void RBBIAPITest::TestBoilerPlate() 152 { 153 UErrorCode status = U_ZERO_ERROR; 154 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status); 155 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status); 156 if (U_FAILURE(status)) { 157 errcheckln(status, "Creation of break iterator failed %s", u_errorName(status)); 158 return; 159 } 160 if(*a!=*b){ 161 errln("Failed: boilerplate method operator!= does not return correct results"); 162 } 163 // Japanese word break iterators are identical to root with 164 // a dictionary-based break iterator 165 BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status); 166 BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status); 167 if(c && d){ 168 if(*c!=*d){ 169 errln("Failed: boilerplate method operator== does not return correct results"); 170 } 171 }else{ 172 errln("creation of break iterator failed"); 173 } 174 delete a; 175 delete b; 176 delete c; 177 delete d; 178 } 179 180 void RBBIAPITest::TestgetRules() 181 { 182 UErrorCode status=U_ZERO_ERROR; 183 184 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 185 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 186 if(U_FAILURE(status)){ 187 errcheckln(status, "FAIL: in construction - %s", u_errorName(status)); 188 delete bi1; 189 delete bi2; 190 return; 191 } 192 193 194 195 logln((UnicodeString)"Testing toString()"); 196 197 bi1->setText((UnicodeString)"Hello there"); 198 199 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone(); 200 201 UnicodeString temp=bi1->getRules(); 202 UnicodeString temp2=bi2->getRules(); 203 UnicodeString temp3=bi3->getRules(); 204 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0) 205 errln((UnicodeString)"ERROR: error in getRules() method"); 206 207 delete bi1; 208 delete bi2; 209 delete bi3; 210 } 211 void RBBIAPITest::TestHashCode() 212 { 213 UErrorCode status=U_ZERO_ERROR; 214 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 215 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 216 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 217 if(U_FAILURE(status)){ 218 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 219 delete bi1; 220 delete bi2; 221 delete bi3; 222 return; 223 } 224 225 226 logln((UnicodeString)"Testing hashCode()"); 227 228 bi1->setText((UnicodeString)"Hash code"); 229 bi2->setText((UnicodeString)"Hash code"); 230 bi3->setText((UnicodeString)"Hash code"); 231 232 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone(); 233 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone(); 234 235 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() || 236 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode()) 237 errln((UnicodeString)"ERROR: identical objects have different hashcodes"); 238 239 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() || 240 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode()) 241 errln((UnicodeString)"ERROR: different objects have same hashcodes"); 242 243 delete bi1clone; 244 delete bi2clone; 245 delete bi1; 246 delete bi2; 247 delete bi3; 248 249 } 250 void RBBIAPITest::TestGetSetAdoptText() 251 { 252 logln((UnicodeString)"Testing getText setText "); 253 IcuTestErrorCode status(*this, "TestGetSetAdoptText"); 254 UnicodeString str1="first string."; 255 UnicodeString str2="Second string."; 256 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status)); 257 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status)); 258 if(status.isFailure()){ 259 errcheckln(status, "Fail : in construction - %s", status.errorName()); 260 return; 261 } 262 263 264 CharacterIterator* text1= new StringCharacterIterator(str1); 265 CharacterIterator* text1Clone = text1->clone(); 266 CharacterIterator* text2= new StringCharacterIterator(str2); 267 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str" 268 269 wordIter1->setText(str1); 270 CharacterIterator *tci = &wordIter1->getText(); 271 UnicodeString tstr; 272 tci->getText(tstr); 273 TEST_ASSERT(tstr == str1); 274 if(wordIter1->current() != 0) 275 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); 276 277 wordIter1->next(2); 278 279 wordIter1->setText(str2); 280 if(wordIter1->current() != 0) 281 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); 282 283 284 charIter1->adoptText(text1Clone); 285 TEST_ASSERT(wordIter1->getText() != charIter1->getText()); 286 tci = &wordIter1->getText(); 287 tci->getText(tstr); 288 TEST_ASSERT(tstr == str2); 289 tci = &charIter1->getText(); 290 tci->getText(tstr); 291 TEST_ASSERT(tstr == str1); 292 293 294 LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone()); 295 rb->adoptText(text1); 296 if(rb->getText() != *text1) 297 errln((UnicodeString)"ERROR:1 error in adoptText "); 298 rb->adoptText(text2); 299 if(rb->getText() != *text2) 300 errln((UnicodeString)"ERROR:2 error in adoptText "); 301 302 // Adopt where iterator range is less than the entire orignal source string. 303 // (With the change of the break engine to working with UText internally, 304 // CharacterIterators starting at positions other than zero are not supported) 305 rb->adoptText(text3); 306 TEST_ASSERT(rb->preceding(2) == 0); 307 TEST_ASSERT(rb->following(11) == BreakIterator::DONE); 308 //if(rb->preceding(2) != 3) { 309 // errln((UnicodeString)"ERROR:3 error in adoptText "); 310 //} 311 //if(rb->following(11) != BreakIterator::DONE) { 312 // errln((UnicodeString)"ERROR:4 error in adoptText "); 313 //} 314 315 // UText API 316 // 317 // Quick test to see if UText is working at all. 318 // 319 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */ 320 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */ 321 // 012345678901 322 323 status.reset(); 324 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status)); 325 wordIter1->setText(ut.getAlias(), status); 326 TEST_ASSERT_SUCCESS(status); 327 328 int32_t pos; 329 pos = wordIter1->first(); 330 TEST_ASSERT(pos==0); 331 pos = wordIter1->next(); 332 TEST_ASSERT(pos==5); 333 pos = wordIter1->next(); 334 TEST_ASSERT(pos==6); 335 pos = wordIter1->next(); 336 TEST_ASSERT(pos==11); 337 pos = wordIter1->next(); 338 TEST_ASSERT(pos==UBRK_DONE); 339 340 status.reset(); 341 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status)); 342 TEST_ASSERT_SUCCESS(status); 343 wordIter1->setText(ut2.getAlias(), status); 344 TEST_ASSERT_SUCCESS(status); 345 346 pos = wordIter1->first(); 347 TEST_ASSERT(pos==0); 348 pos = wordIter1->next(); 349 TEST_ASSERT(pos==3); 350 pos = wordIter1->next(); 351 TEST_ASSERT(pos==4); 352 353 pos = wordIter1->last(); 354 TEST_ASSERT(pos==6); 355 pos = wordIter1->previous(); 356 TEST_ASSERT(pos==4); 357 pos = wordIter1->previous(); 358 TEST_ASSERT(pos==3); 359 pos = wordIter1->previous(); 360 TEST_ASSERT(pos==0); 361 pos = wordIter1->previous(); 362 TEST_ASSERT(pos==UBRK_DONE); 363 364 status.reset(); 365 UnicodeString sEmpty; 366 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status)); 367 wordIter1->getUText(gut2.getAlias(), status); 368 TEST_ASSERT_SUCCESS(status); 369 status.reset(); 370 } 371 372 373 void RBBIAPITest::TestIteration() 374 { 375 // This test just verifies that the API is present. 376 // Testing for correct operation of the break rules happens elsewhere. 377 378 UErrorCode status=U_ZERO_ERROR; 379 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 380 if (U_FAILURE(status) || bi == NULL) { 381 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); 382 } 383 delete bi; 384 385 status=U_ZERO_ERROR; 386 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 387 if (U_FAILURE(status) || bi == NULL) { 388 errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status)); 389 } 390 delete bi; 391 392 status=U_ZERO_ERROR; 393 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status); 394 if (U_FAILURE(status) || bi == NULL) { 395 errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status)); 396 } 397 delete bi; 398 399 status=U_ZERO_ERROR; 400 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status); 401 if (U_FAILURE(status) || bi == NULL) { 402 errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status)); 403 } 404 delete bi; 405 406 status=U_ZERO_ERROR; 407 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status); 408 if (U_FAILURE(status) || bi == NULL) { 409 errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status)); 410 } 411 delete bi; 412 413 status=U_ZERO_ERROR; 414 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 415 if (U_FAILURE(status) || bi == NULL) { 416 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); 417 return; // Skip the rest of these tests. 418 } 419 420 421 UnicodeString testString="0123456789"; 422 bi->setText(testString); 423 424 int32_t i; 425 i = bi->first(); 426 if (i != 0) { 427 errln("Incorrect value from bi->first(). Expected 0, got %d.", i); 428 } 429 430 i = bi->last(); 431 if (i != 10) { 432 errln("Incorrect value from bi->last(). Expected 10, got %d", i); 433 } 434 435 // 436 // Previous 437 // 438 bi->last(); 439 i = bi->previous(); 440 if (i != 9) { 441 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i); 442 } 443 444 445 bi->first(); 446 i = bi->previous(); 447 if (i != BreakIterator::DONE) { 448 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__, i); 449 } 450 451 // 452 // next() 453 // 454 bi->first(); 455 i = bi->next(); 456 if (i != 1) { 457 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i); 458 } 459 460 bi->last(); 461 i = bi->next(); 462 if (i != BreakIterator::DONE) { 463 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__, i); 464 } 465 466 467 // 468 // current() 469 // 470 bi->first(); 471 i = bi->current(); 472 if (i != 0) { 473 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); 474 } 475 476 bi->next(); 477 i = bi->current(); 478 if (i != 1) { 479 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i); 480 } 481 482 bi->last(); 483 bi->next(); 484 i = bi->current(); 485 if (i != 10) { 486 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i); 487 } 488 489 bi->first(); 490 bi->previous(); 491 i = bi->current(); 492 if (i != 0) { 493 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); 494 } 495 496 497 // 498 // Following() 499 // 500 i = bi->following(4); 501 if (i != 5) { 502 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i); 503 } 504 505 i = bi->following(9); 506 if (i != 10) { 507 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__, i); 508 } 509 510 i = bi->following(10); 511 if (i != BreakIterator::DONE) { 512 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i); 513 } 514 515 516 // 517 // Preceding 518 // 519 i = bi->preceding(4); 520 if (i != 3) { 521 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i); 522 } 523 524 i = bi->preceding(10); 525 if (i != 9) { 526 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i); 527 } 528 529 i = bi->preceding(1); 530 if (i != 0) { 531 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i); 532 } 533 534 i = bi->preceding(0); 535 if (i != BreakIterator::DONE) { 536 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i); 537 } 538 539 540 // 541 // isBoundary() 542 // 543 bi->first(); 544 if (bi->isBoundary(3) != TRUE) { 545 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i); 546 } 547 i = bi->current(); 548 if (i != 3) { 549 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__, i); 550 } 551 552 553 if (bi->isBoundary(11) != FALSE) { 554 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i); 555 } 556 i = bi->current(); 557 if (i != 10) { 558 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i); 559 } 560 561 // 562 // next(n) 563 // 564 bi->first(); 565 i = bi->next(4); 566 if (i != 4) { 567 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i); 568 } 569 570 i = bi->next(6); 571 if (i != 10) { 572 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__, i); 573 } 574 575 bi->first(); 576 i = bi->next(11); 577 if (i != BreakIterator::DONE) { 578 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__, i); 579 } 580 581 delete bi; 582 583 } 584 585 586 587 588 589 590 void RBBIAPITest::TestBuilder() { 591 UnicodeString rulesString1 = "$Letters = [:L:];\n" 592 "$Numbers = [:N:];\n" 593 "$Letters+;\n" 594 "$Numbers+;\n" 595 "[^$Letters $Numbers];\n" 596 "!.*;\n"; 597 UnicodeString testString1 = "abc123..abc"; 598 // 01234567890 599 int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; 600 UErrorCode status=U_ZERO_ERROR; 601 UParseError parseError; 602 603 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 604 if(U_FAILURE(status)) { 605 dataerrln("Fail : in construction - %s", u_errorName(status)); 606 } else { 607 bi->setText(testString1); 608 doBoundaryTest(*bi, testString1, bounds1); 609 } 610 delete bi; 611 } 612 613 614 // 615 // TestQuoteGrouping 616 // Single quotes within rules imply a grouping, so that a modifier 617 // following the quoted text (* or +) applies to all of the quoted chars. 618 // 619 void RBBIAPITest::TestQuoteGrouping() { 620 UnicodeString rulesString1 = "#Here comes the rule...\n" 621 "'$@!'*;\n" // (\$\@\!)* 622 ".;\n"; 623 624 UnicodeString testString1 = "$@!$@!X$@!!X"; 625 // 0123456789012 626 int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; 627 UErrorCode status=U_ZERO_ERROR; 628 UParseError parseError; 629 630 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 631 if(U_FAILURE(status)) { 632 dataerrln("Fail : in construction - %s", u_errorName(status)); 633 } else { 634 bi->setText(testString1); 635 doBoundaryTest(*bi, testString1, bounds1); 636 } 637 delete bi; 638 } 639 640 // 641 // TestRuleStatus 642 // Test word break rule status constants. 643 // 644 void RBBIAPITest::TestRuleStatus() { 645 UChar str[30]; 646 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing 647 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO 648 u_unescape("plain word 123.45 \\u30a1\\u30a2 ", 649 // 012345678901234567 8 9 0 650 // Katakana 651 str, 30); 652 UnicodeString testString1(str); 653 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21}; 654 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER, 655 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE, 656 UBRK_WORD_IDEO, UBRK_WORD_NONE}; 657 658 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, 659 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT, 660 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT}; 661 662 UErrorCode status=U_ZERO_ERROR; 663 664 BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status); 665 if(U_FAILURE(status)) { 666 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 667 } else { 668 bi->setText(testString1); 669 // First test that the breaks are in the right spots. 670 doBoundaryTest(*bi, testString1, bounds1); 671 672 // Then go back and check tag values 673 int32_t i = 0; 674 int32_t pos, tag; 675 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) { 676 if (pos != bounds1[i]) { 677 errln("FAIL: unexpected word break at postion %d", pos); 678 break; 679 } 680 tag = bi->getRuleStatus(); 681 if (tag < tag_lo[i] || tag >= tag_hi[i]) { 682 errln("FAIL: incorrect tag value %d at position %d", tag, pos); 683 break; 684 } 685 686 // Check that we get the same tag values from getRuleStatusVec() 687 int32_t vec[10]; 688 int t = bi->getRuleStatusVec(vec, 10, status); 689 TEST_ASSERT_SUCCESS(status); 690 TEST_ASSERT(t==1); 691 TEST_ASSERT(vec[0] == tag); 692 } 693 } 694 delete bi; 695 696 // Now test line break status. This test mostly is to confirm that the status constants 697 // are correctly declared in the header. 698 testString1 = "test line. \n"; 699 // break type s s h 700 701 bi = BreakIterator::createLineInstance(Locale::getEnglish(), status); 702 if(U_FAILURE(status)) { 703 errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status)); 704 } else { 705 int32_t i = 0; 706 int32_t pos, tag; 707 UBool success; 708 709 bi->setText(testString1); 710 pos = bi->current(); 711 tag = bi->getRuleStatus(); 712 for (i=0; i<3; i++) { 713 switch (i) { 714 case 0: 715 success = pos==0 && tag==UBRK_LINE_SOFT; break; 716 case 1: 717 success = pos==5 && tag==UBRK_LINE_SOFT; break; 718 case 2: 719 success = pos==12 && tag==UBRK_LINE_HARD; break; 720 default: 721 success = FALSE; break; 722 } 723 if (success == FALSE) { 724 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d", 725 i, pos, tag); 726 break; 727 } 728 pos = bi->next(); 729 tag = bi->getRuleStatus(); 730 } 731 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT || 732 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT || 733 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) { 734 errln("UBRK_LINE_* constants from header are inconsistent."); 735 } 736 } 737 delete bi; 738 739 } 740 741 742 // 743 // TestRuleStatusVec 744 // Test the vector form of break rule status. 745 // 746 void RBBIAPITest::TestRuleStatusVec() { 747 UnicodeString rulesString( "[A-N]{100}; \n" 748 "[a-w]{200}; \n" 749 "[\\p{L}]{300}; \n" 750 "[\\p{N}]{400}; \n" 751 "[0-5]{500}; \n" 752 "!.*;\n", -1, US_INV); 753 UnicodeString testString1 = "Aapz5?"; 754 int32_t statusVals[10]; 755 int32_t numStatuses; 756 int32_t pos; 757 758 UErrorCode status=U_ZERO_ERROR; 759 UParseError parseError; 760 761 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status); 762 if (U_FAILURE(status)) { 763 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); 764 } else { 765 bi->setText(testString1); 766 767 // A 768 pos = bi->next(); 769 TEST_ASSERT(pos==1); 770 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 771 TEST_ASSERT_SUCCESS(status); 772 TEST_ASSERT(numStatuses == 2); 773 TEST_ASSERT(statusVals[0] == 100); 774 TEST_ASSERT(statusVals[1] == 300); 775 776 // a 777 pos = bi->next(); 778 TEST_ASSERT(pos==2); 779 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 780 TEST_ASSERT_SUCCESS(status); 781 TEST_ASSERT(numStatuses == 2); 782 TEST_ASSERT(statusVals[0] == 200); 783 TEST_ASSERT(statusVals[1] == 300); 784 785 // p 786 pos = bi->next(); 787 TEST_ASSERT(pos==3); 788 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 789 TEST_ASSERT_SUCCESS(status); 790 TEST_ASSERT(numStatuses == 2); 791 TEST_ASSERT(statusVals[0] == 200); 792 TEST_ASSERT(statusVals[1] == 300); 793 794 // z 795 pos = bi->next(); 796 TEST_ASSERT(pos==4); 797 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 798 TEST_ASSERT_SUCCESS(status); 799 TEST_ASSERT(numStatuses == 1); 800 TEST_ASSERT(statusVals[0] == 300); 801 802 // 5 803 pos = bi->next(); 804 TEST_ASSERT(pos==5); 805 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 806 TEST_ASSERT_SUCCESS(status); 807 TEST_ASSERT(numStatuses == 2); 808 TEST_ASSERT(statusVals[0] == 400); 809 TEST_ASSERT(statusVals[1] == 500); 810 811 // ? 812 pos = bi->next(); 813 TEST_ASSERT(pos==6); 814 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 815 TEST_ASSERT_SUCCESS(status); 816 TEST_ASSERT(numStatuses == 1); 817 TEST_ASSERT(statusVals[0] == 0); 818 819 // 820 // Check buffer overflow error handling. Char == A 821 // 822 bi->first(); 823 pos = bi->next(); 824 TEST_ASSERT(pos==1); 825 memset(statusVals, -1, sizeof(statusVals)); 826 numStatuses = bi->getRuleStatusVec(statusVals, 0, status); 827 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 828 TEST_ASSERT(numStatuses == 2); 829 TEST_ASSERT(statusVals[0] == -1); 830 831 status = U_ZERO_ERROR; 832 memset(statusVals, -1, sizeof(statusVals)); 833 numStatuses = bi->getRuleStatusVec(statusVals, 1, status); 834 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 835 TEST_ASSERT(numStatuses == 2); 836 TEST_ASSERT(statusVals[0] == 100); 837 TEST_ASSERT(statusVals[1] == -1); 838 839 status = U_ZERO_ERROR; 840 memset(statusVals, -1, sizeof(statusVals)); 841 numStatuses = bi->getRuleStatusVec(statusVals, 2, status); 842 TEST_ASSERT_SUCCESS(status); 843 TEST_ASSERT(numStatuses == 2); 844 TEST_ASSERT(statusVals[0] == 100); 845 TEST_ASSERT(statusVals[1] == 300); 846 TEST_ASSERT(statusVals[2] == -1); 847 } 848 delete bi; 849 850 } 851 852 // 853 // Bug 2190 Regression test. Builder crash on rule consisting of only a 854 // $variable reference 855 void RBBIAPITest::TestBug2190() { 856 UnicodeString rulesString1 = "$aaa = abcd;\n" 857 "$bbb = $aaa;\n" 858 "$bbb;\n"; 859 UnicodeString testString1 = "abcdabcd"; 860 // 01234567890 861 int32_t bounds1[] = {0, 4, 8}; 862 UErrorCode status=U_ZERO_ERROR; 863 UParseError parseError; 864 865 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 866 if(U_FAILURE(status)) { 867 dataerrln("Fail : in construction - %s", u_errorName(status)); 868 } else { 869 bi->setText(testString1); 870 doBoundaryTest(*bi, testString1, bounds1); 871 } 872 delete bi; 873 } 874 875 876 void RBBIAPITest::TestRegistration() { 877 #if !UCONFIG_NO_SERVICE 878 UErrorCode status = U_ZERO_ERROR; 879 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status); 880 // ok to not delete these if we exit because of error? 881 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status); 882 BreakIterator* root_word = BreakIterator::createWordInstance("", status); 883 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status); 884 885 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) { 886 dataerrln("Error creating instances of break interactors - %s", u_errorName(status)); 887 888 delete ja_word; 889 delete ja_char; 890 delete root_word; 891 delete root_char; 892 893 return; 894 } 895 896 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status); 897 { 898 #if 0 // With a dictionary based word breaking, ja_word is identical to root. 899 if (ja_word && *ja_word == *root_word) { 900 errln("japan not different from root"); 901 } 902 #endif 903 } 904 905 { 906 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status); 907 UBool fail = TRUE; 908 if(result){ 909 fail = *result != *ja_word; 910 } 911 delete result; 912 if (fail) { 913 errln("bad result for xx_XX/word"); 914 } 915 } 916 917 { 918 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status); 919 UBool fail = TRUE; 920 if(result){ 921 fail = *result != *ja_char; 922 } 923 delete result; 924 if (fail) { 925 errln("bad result for ja_JP/char"); 926 } 927 } 928 929 { 930 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status); 931 UBool fail = TRUE; 932 if(result){ 933 fail = *result != *root_char; 934 } 935 delete result; 936 if (fail) { 937 errln("bad result for xx_XX/char"); 938 } 939 } 940 941 { 942 StringEnumeration* avail = BreakIterator::getAvailableLocales(); 943 UBool found = FALSE; 944 const UnicodeString* p; 945 while ((p = avail->snext(status))) { 946 if (p->compare("xx") == 0) { 947 found = TRUE; 948 break; 949 } 950 } 951 delete avail; 952 if (!found) { 953 errln("did not find test locale"); 954 } 955 } 956 957 { 958 UBool unreg = BreakIterator::unregister(key, status); 959 if (!unreg) { 960 errln("unable to unregister"); 961 } 962 } 963 964 { 965 BreakIterator* result = BreakIterator::createWordInstance("en_US", status); 966 BreakIterator* root = BreakIterator::createWordInstance("", status); 967 UBool fail = TRUE; 968 if(root){ 969 fail = *root != *result; 970 } 971 delete root; 972 delete result; 973 if (fail) { 974 errln("did not get root break"); 975 } 976 } 977 978 { 979 StringEnumeration* avail = BreakIterator::getAvailableLocales(); 980 UBool found = FALSE; 981 const UnicodeString* p; 982 while ((p = avail->snext(status))) { 983 if (p->compare("xx") == 0) { 984 found = TRUE; 985 break; 986 } 987 } 988 delete avail; 989 if (found) { 990 errln("found test locale"); 991 } 992 } 993 994 { 995 int32_t count; 996 UBool foundLocale = FALSE; 997 const Locale *avail = BreakIterator::getAvailableLocales(count); 998 for (int i=0; i<count; i++) { 999 if (avail[i] == Locale::getEnglish()) { 1000 foundLocale = TRUE; 1001 break; 1002 } 1003 } 1004 if (foundLocale == FALSE) { 1005 errln("BreakIterator::getAvailableLocales(&count), failed to find EN."); 1006 } 1007 } 1008 1009 1010 // ja_word was adopted by factory 1011 delete ja_char; 1012 delete root_word; 1013 delete root_char; 1014 #endif 1015 } 1016 1017 void RBBIAPITest::RoundtripRule(const char *dataFile) { 1018 UErrorCode status = U_ZERO_ERROR; 1019 UParseError parseError; 1020 parseError.line = 0; 1021 parseError.offset = 0; 1022 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status)); 1023 uint32_t length; 1024 const UChar *builtSource; 1025 const uint8_t *rbbiRules; 1026 const uint8_t *builtRules; 1027 1028 if (U_FAILURE(status)) { 1029 errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status)); 1030 return; 1031 } 1032 1033 builtRules = (const uint8_t *)udata_getMemory(data.getAlias()); 1034 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource); 1035 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status); 1036 if (U_FAILURE(status)) { 1037 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n", 1038 u_errorName(status), parseError.line, parseError.offset); 1039 return; 1040 }; 1041 rbbiRules = brkItr->getBinaryRules(length); 1042 logln("Comparing \"%s\" len=%d", dataFile, length); 1043 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) { 1044 errln("Built rules and rebuilt rules are different %s", dataFile); 1045 return; 1046 } 1047 delete brkItr; 1048 } 1049 1050 void RBBIAPITest::TestRoundtripRules() { 1051 RoundtripRule("word"); 1052 RoundtripRule("title"); 1053 RoundtripRule("sent"); 1054 RoundtripRule("line"); 1055 RoundtripRule("char"); 1056 if (!quick) { 1057 RoundtripRule("word_POSIX"); 1058 } 1059 } 1060 1061 // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader* 1062 // (these are protected so we access them via a local class RBBIWithProtectedFunctions). 1063 // This is just a sanity check, not a thorough test (e.g. we don't check that the 1064 // first delete actually frees rulesCopy). 1065 void RBBIAPITest::TestCreateFromRBBIData() { 1066 // Get some handy RBBIData 1067 const char *brkName = "word"; // or "sent", "line", "char", etc. 1068 UErrorCode status = U_ZERO_ERROR; 1069 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status)); 1070 if ( U_SUCCESS(status) ) { 1071 const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data.getAlias()); 1072 uint32_t length = builtRules->fLength; 1073 RBBIWithProtectedFunctions * brkItr; 1074 1075 // Try the memory-adopting constructor, need to copy the data first 1076 RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length); 1077 if ( rulesCopy ) { 1078 uprv_memcpy( rulesCopy, builtRules, length ); 1079 1080 brkItr = new RBBIWithProtectedFunctions(rulesCopy, status); 1081 if ( U_SUCCESS(status) ) { 1082 delete brkItr; // this should free rulesCopy 1083 } else { 1084 errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) ); 1085 status = U_ZERO_ERROR;// reset for the next test 1086 uprv_free( rulesCopy ); 1087 } 1088 } 1089 1090 // Now try the non-adopting constructor 1091 brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status); 1092 if ( U_SUCCESS(status) ) { 1093 delete brkItr; // this should NOT attempt to free builtRules 1094 if (builtRules->fLength != length) { // sanity check 1095 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" ); 1096 } 1097 } else { 1098 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) ); 1099 } 1100 } 1101 1102 // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...) 1103 // 1104 status = U_ZERO_ERROR; 1105 RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); 1106 if (rb == NULL || U_FAILURE(status)) { 1107 dataerrln("Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s", u_errorName(status)); 1108 } else { 1109 uint32_t length; 1110 const uint8_t *rules = rb->getBinaryRules(length); 1111 RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status); 1112 TEST_ASSERT_SUCCESS(status); 1113 TEST_ASSERT(*rb == *rb2); 1114 UnicodeString words = "one two three "; 1115 rb2->setText(words); 1116 int wordCounter = 0; 1117 while (rb2->next() != UBRK_DONE) { 1118 wordCounter++; 1119 } 1120 TEST_ASSERT(wordCounter == 6); 1121 1122 status = U_ZERO_ERROR; 1123 RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1, status); 1124 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1125 1126 delete rb; 1127 delete rb2; 1128 delete rb3; 1129 } 1130 } 1131 1132 1133 void RBBIAPITest::TestRefreshInputText() { 1134 /* 1135 * RefreshInput changes out the input of a Break Iterator without 1136 * changing anything else in the iterator's state. Used with Java JNI, 1137 * when Java moves the underlying string storage. This test 1138 * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence. 1139 * The right set of boundaries should still be found. 1140 */ 1141 UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */ 1142 UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0}; 1143 UErrorCode status = U_ZERO_ERROR; 1144 UText ut1 = UTEXT_INITIALIZER; 1145 UText ut2 = UTEXT_INITIALIZER; 1146 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status); 1147 TEST_ASSERT_SUCCESS(status); 1148 1149 utext_openUChars(&ut1, testStr, -1, &status); 1150 TEST_ASSERT_SUCCESS(status); 1151 1152 if (U_SUCCESS(status)) { 1153 bi->setText(&ut1, status); 1154 TEST_ASSERT_SUCCESS(status); 1155 1156 /* Line boundaries will occur before each letter in the original string */ 1157 TEST_ASSERT(1 == bi->next()); 1158 TEST_ASSERT(3 == bi->next()); 1159 1160 /* Move the string, kill the original string. */ 1161 u_strcpy(movedStr, testStr); 1162 u_memset(testStr, 0x20, u_strlen(testStr)); 1163 utext_openUChars(&ut2, movedStr, -1, &status); 1164 TEST_ASSERT_SUCCESS(status); 1165 RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status); 1166 TEST_ASSERT_SUCCESS(status); 1167 TEST_ASSERT(bi == returnedBI); 1168 1169 /* Find the following matches, now working in the moved string. */ 1170 TEST_ASSERT(5 == bi->next()); 1171 TEST_ASSERT(7 == bi->next()); 1172 TEST_ASSERT(8 == bi->next()); 1173 TEST_ASSERT(UBRK_DONE == bi->next()); 1174 1175 utext_close(&ut1); 1176 utext_close(&ut2); 1177 } 1178 delete bi; 1179 1180 } 1181 1182 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION 1183 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) { 1184 static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets 1185 it.logln(UnicodeString("String:'")+ustr+UnicodeString("'")); 1186 1187 int32_t *pos = new int32_t[ustr.length()]; 1188 int32_t posCount = 0; 1189 1190 // calculate breaks up front, so we can print out 1191 // sans any debugging 1192 for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) { 1193 pos[posCount++] = n; 1194 if(posCount>=ustr.length()) { 1195 it.errln("brk count exceeds string length!"); 1196 return; 1197 } 1198 } 1199 UnicodeString out; 1200 out.append((UChar)CHSTR); 1201 int32_t prev = 0; 1202 for(int32_t i=0;i<posCount;i++) { 1203 int32_t n=pos[i]; 1204 out.append(ustr.tempSubString(prev,n-prev)); 1205 out.append((UChar)PILCROW); 1206 prev=n; 1207 } 1208 out.append(ustr.tempSubString(prev,ustr.length()-prev)); 1209 out.append((UChar)CHEND); 1210 it.logln(out); 1211 1212 out.remove(); 1213 for(int32_t i=0;i<posCount;i++) { 1214 char tmp[100]; 1215 sprintf(tmp,"%d ",pos[i]); 1216 out.append(UnicodeString(tmp)); 1217 } 1218 it.logln(out); 1219 delete [] pos; 1220 } 1221 #endif 1222 1223 void RBBIAPITest::TestFilteredBreakIteratorBuilder() { 1224 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION 1225 UErrorCode status = U_ZERO_ERROR; 1226 LocalPointer<FilteredBreakIteratorBuilder> builder; 1227 LocalPointer<BreakIterator> baseBI; 1228 LocalPointer<BreakIterator> filteredBI; 1229 LocalPointer<BreakIterator> frenchBI; 1230 1231 const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited. 1232 const UnicodeString ABBR_MR("Mr."); 1233 const UnicodeString ABBR_CAPT("Capt."); 1234 1235 { 1236 logln("Constructing empty builder\n"); 1237 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); 1238 TEST_ASSERT_SUCCESS(status); 1239 1240 logln("Constructing base BI\n"); 1241 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1242 TEST_ASSERT_SUCCESS(status); 1243 1244 logln("Building new BI\n"); 1245 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1246 TEST_ASSERT_SUCCESS(status); 1247 1248 if (U_SUCCESS(status)) { 1249 logln("Testing:"); 1250 filteredBI->setText(text); 1251 TEST_ASSERT(20 == filteredBI->next()); // Mr. 1252 TEST_ASSERT(84 == filteredBI->next()); // recovered. 1253 TEST_ASSERT(90 == filteredBI->next()); // Capt. 1254 TEST_ASSERT(181 == filteredBI->next()); // Mr. 1255 TEST_ASSERT(278 == filteredBI->next()); // charge. 1256 filteredBI->first(); 1257 prtbrks(filteredBI.getAlias(), text, *this); 1258 } 1259 } 1260 1261 { 1262 logln("Constructing empty builder\n"); 1263 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); 1264 TEST_ASSERT_SUCCESS(status); 1265 1266 if (U_SUCCESS(status)) { 1267 logln("Adding Mr. as an exception\n"); 1268 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); 1269 TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it 1270 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status)); 1271 TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it 1272 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); 1273 TEST_ASSERT_SUCCESS(status); 1274 1275 logln("Constructing base BI\n"); 1276 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1277 TEST_ASSERT_SUCCESS(status); 1278 1279 logln("Building new BI\n"); 1280 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1281 TEST_ASSERT_SUCCESS(status); 1282 1283 logln("Testing:"); 1284 filteredBI->setText(text); 1285 TEST_ASSERT(84 == filteredBI->next()); 1286 TEST_ASSERT(90 == filteredBI->next());// Capt. 1287 TEST_ASSERT(278 == filteredBI->next()); 1288 filteredBI->first(); 1289 prtbrks(filteredBI.getAlias(), text, *this); 1290 } 1291 } 1292 1293 1294 { 1295 logln("Constructing empty builder\n"); 1296 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); 1297 TEST_ASSERT_SUCCESS(status); 1298 1299 if (U_SUCCESS(status)) { 1300 logln("Adding Mr. and Capt as an exception\n"); 1301 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); 1302 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status)); 1303 TEST_ASSERT_SUCCESS(status); 1304 1305 logln("Constructing base BI\n"); 1306 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1307 TEST_ASSERT_SUCCESS(status); 1308 1309 logln("Building new BI\n"); 1310 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1311 TEST_ASSERT_SUCCESS(status); 1312 1313 logln("Testing:"); 1314 filteredBI->setText(text); 1315 TEST_ASSERT(84 == filteredBI->next()); 1316 TEST_ASSERT(278 == filteredBI->next()); 1317 filteredBI->first(); 1318 prtbrks(filteredBI.getAlias(), text, *this); 1319 } 1320 } 1321 1322 1323 { 1324 logln("Constructing English builder\n"); 1325 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status)); 1326 TEST_ASSERT_SUCCESS(status); 1327 1328 logln("Constructing base BI\n"); 1329 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1330 TEST_ASSERT_SUCCESS(status); 1331 1332 if (U_SUCCESS(status)) { 1333 logln("unsuppressing 'Capt'"); 1334 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status)); 1335 1336 logln("Building new BI\n"); 1337 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1338 TEST_ASSERT_SUCCESS(status); 1339 1340 if(filteredBI.isValid()) { 1341 logln("Testing:"); 1342 filteredBI->setText(text); 1343 TEST_ASSERT(84 == filteredBI->next()); 1344 TEST_ASSERT(90 == filteredBI->next()); 1345 TEST_ASSERT(278 == filteredBI->next()); 1346 filteredBI->first(); 1347 prtbrks(filteredBI.getAlias(), text, *this); 1348 } 1349 } 1350 } 1351 1352 1353 { 1354 logln("Constructing English builder\n"); 1355 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status)); 1356 TEST_ASSERT_SUCCESS(status); 1357 1358 logln("Constructing base BI\n"); 1359 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1360 TEST_ASSERT_SUCCESS(status); 1361 1362 if (U_SUCCESS(status)) { 1363 logln("Building new BI\n"); 1364 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1365 TEST_ASSERT_SUCCESS(status); 1366 1367 if(filteredBI.isValid()) { 1368 logln("Testing:"); 1369 filteredBI->setText(text); 1370 TEST_ASSERT(84 == filteredBI->next()); 1371 TEST_ASSERT(278 == filteredBI->next()); 1372 filteredBI->first(); 1373 prtbrks(filteredBI.getAlias(), text, *this); 1374 } 1375 } 1376 } 1377 1378 // reenable once french is in 1379 { 1380 logln("Constructing French builder"); 1381 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status)); 1382 TEST_ASSERT_SUCCESS(status); 1383 1384 logln("Constructing base BI\n"); 1385 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status)); 1386 TEST_ASSERT_SUCCESS(status); 1387 1388 if (U_SUCCESS(status)) { 1389 logln("Building new BI\n"); 1390 frenchBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1391 TEST_ASSERT_SUCCESS(status); 1392 } 1393 1394 if(frenchBI.isValid()) { 1395 logln("Testing:"); 1396 UnicodeString frText("C'est MM. Duval."); 1397 frenchBI->setText(frText); 1398 TEST_ASSERT(16 == frenchBI->next()); 1399 TEST_ASSERT(BreakIterator::DONE == frenchBI->next()); 1400 frenchBI->first(); 1401 prtbrks(frenchBI.getAlias(), frText, *this); 1402 logln("Testing against English:"); 1403 filteredBI->setText(frText); 1404 TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english. 1405 TEST_ASSERT(16 == filteredBI->next()); 1406 TEST_ASSERT(BreakIterator::DONE == filteredBI->next()); 1407 filteredBI->first(); 1408 prtbrks(filteredBI.getAlias(), frText, *this); 1409 1410 // Verify == 1411 TEST_ASSERT_TRUE(*frenchBI == *frenchBI); 1412 TEST_ASSERT_TRUE(*filteredBI != *frenchBI); 1413 TEST_ASSERT_TRUE(*frenchBI != *filteredBI); 1414 } else { 1415 dataerrln("French BI: not valid."); 1416 } 1417 } 1418 1419 #else 1420 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION"); 1421 #endif 1422 } 1423 1424 //--------------------------------------------- 1425 // runIndexedTest 1426 //--------------------------------------------- 1427 1428 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) 1429 { 1430 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API "); 1431 switch (index) { 1432 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break; 1433 #if !UCONFIG_NO_FILE_IO 1434 case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break; 1435 case 1: name = "TestgetRules"; if (exec) TestgetRules(); break; 1436 case 2: name = "TestHashCode"; if (exec) TestHashCode(); break; 1437 case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break; 1438 case 4: name = "TestIteration"; if (exec) TestIteration(); break; 1439 #else 1440 case 0: case 1: case 2: case 3: case 4: name = "skip"; break; 1441 #endif 1442 case 5: name = "TestBuilder"; if (exec) TestBuilder(); break; 1443 case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break; 1444 case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break; 1445 case 8: name = "TestBug2190"; if (exec) TestBug2190(); break; 1446 #if !UCONFIG_NO_FILE_IO 1447 case 9: name = "TestRegistration"; if (exec) TestRegistration(); break; 1448 case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; 1449 case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break; 1450 case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break; 1451 case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break; 1452 #else 1453 case 9: case 10: case 11: case 12: case 13: name = "skip"; break; 1454 #endif 1455 case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break; 1456 1457 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING 1458 case 15: name = "TestFilteredBreakIteratorBuilder"; if(exec) TestFilteredBreakIteratorBuilder(); break; 1459 #else 1460 case 15: name="skip"; break; 1461 #endif 1462 default: name = ""; break; // needed to end loop 1463 } 1464 } 1465 1466 //--------------------------------------------- 1467 //Internal subroutines 1468 //--------------------------------------------- 1469 1470 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){ 1471 logln((UnicodeString)"testIsBoundary():"); 1472 int32_t p = 0; 1473 UBool isB; 1474 for (int32_t i = 0; i < text.length(); i++) { 1475 isB = bi.isBoundary(i); 1476 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB); 1477 1478 if (i == boundaries[p]) { 1479 if (!isB) 1480 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false"); 1481 p++; 1482 } 1483 else { 1484 if (isB) 1485 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true"); 1486 } 1487 } 1488 } 1489 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){ 1490 UnicodeString selected; 1491 UnicodeString expected=CharsToUnicodeString(expectedString); 1492 1493 if(gotoffset != expectedOffset) 1494 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset); 1495 if(start <= gotoffset){ 1496 testString.extractBetween(start, gotoffset, selected); 1497 } 1498 else{ 1499 testString.extractBetween(gotoffset, start, selected); 1500 } 1501 if(selected.compare(expected) != 0) 1502 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\"")); 1503 else 1504 logln(prettify("****selected \"" + selected + "\"")); 1505 } 1506 1507 //--------------------------------------------- 1508 //RBBIWithProtectedFunctions class functions 1509 //--------------------------------------------- 1510 1511 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status) 1512 : RuleBasedBreakIterator(data, status) 1513 { 1514 } 1515 1516 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status) 1517 : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status) 1518 { 1519 } 1520 1521 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 1522