1 /******************************************************************** 2 * Copyright (c) 1999-2014, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************** 5 * Date Name Description 6 * 12/14/99 Madhu Creation. 7 * 01/12/2000 Madhu updated for changed API 8 ********************************************************************/ 9 10 #include "unicode/utypes.h" 11 12 #if !UCONFIG_NO_BREAK_ITERATION 13 14 #include "unicode/uchar.h" 15 #include "intltest.h" 16 #include "unicode/rbbi.h" 17 #include "unicode/schriter.h" 18 #include "rbbiapts.h" 19 #include "rbbidata.h" 20 #include "cstring.h" 21 #include "ubrkimpl.h" 22 #include "unicode/locid.h" 23 #include "unicode/ustring.h" 24 #include "unicode/utext.h" 25 #include "cmemory.h" 26 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING 27 #include "unicode/filteredbrk.h" 28 #include <stdio.h> // for sprintf 29 #endif 30 /** 31 * API Test the RuleBasedBreakIterator class 32 */ 33 34 35 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ 36 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} 37 38 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \ 39 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};} 40 41 void RBBIAPITest::TestCloneEquals() 42 { 43 44 UErrorCode status=U_ZERO_ERROR; 45 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 46 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 47 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 48 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 49 if(U_FAILURE(status)){ 50 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 51 return; 52 } 53 54 55 UnicodeString testString="Testing word break iterators's clone() and equals()"; 56 bi1->setText(testString); 57 bi2->setText(testString); 58 biequal->setText(testString); 59 60 bi3->setText("hello"); 61 62 logln((UnicodeString)"Testing equals()"); 63 64 logln((UnicodeString)"Testing == and !="); 65 UBool b = (*bi1 != *biequal); 66 b |= *bi1 == *bi2; 67 b |= *bi1 == *bi3; 68 if (b) { 69 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed."); 70 } 71 72 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3) 73 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed."); 74 75 76 // Quick test of RulesBasedBreakIterator assignment - 77 // Check that 78 // two different iterators are != 79 // they are == after assignment 80 // source and dest iterator produce the same next() after assignment. 81 // deleting one doesn't disable the other. 82 logln("Testing assignment"); 83 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status); 84 if(U_FAILURE(status)){ 85 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 86 return; 87 } 88 89 RuleBasedBreakIterator biDefault, biDefault2; 90 if(U_FAILURE(status)){ 91 errln((UnicodeString)"FAIL : in construction of default iterator"); 92 return; 93 } 94 if (biDefault == *bix) { 95 errln((UnicodeString)"ERROR: iterators should not compare =="); 96 return; 97 } 98 if (biDefault != biDefault2) { 99 errln((UnicodeString)"ERROR: iterators should compare =="); 100 return; 101 } 102 103 104 UnicodeString HelloString("Hello Kitty"); 105 bix->setText(HelloString); 106 if (*bix == *bi2) { 107 errln(UnicodeString("ERROR: strings should not be equal before assignment.")); 108 } 109 *bix = *bi2; 110 if (*bix != *bi2) { 111 errln(UnicodeString("ERROR: strings should be equal before assignment.")); 112 } 113 114 int bixnext = bix->next(); 115 int bi2next = bi2->next(); 116 if (! (bixnext == bi2next && bixnext == 7)) { 117 errln(UnicodeString("ERROR: iterators behaved differently after assignment.")); 118 } 119 delete bix; 120 if (bi2->next() != 8) { 121 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy.")); 122 } 123 124 125 126 logln((UnicodeString)"Testing clone()"); 127 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone(); 128 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone(); 129 130 if(*bi1clone != *bi1 || *bi1clone != *biequal || 131 *bi1clone == *bi3 || *bi1clone == *bi2) 132 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed"); 133 134 if(*bi2clone == *bi1 || *bi2clone == *biequal || 135 *bi2clone == *bi3 || *bi2clone != *bi2) 136 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed"); 137 138 if(bi1->getText() != bi1clone->getText() || 139 bi2clone->getText() != bi2->getText() || 140 *bi2clone == *bi1clone ) 141 errln((UnicodeString)"ERROR: RBBI's clone() method failed"); 142 143 delete bi1clone; 144 delete bi2clone; 145 delete bi1; 146 delete bi3; 147 delete bi2; 148 delete biequal; 149 } 150 151 void RBBIAPITest::TestBoilerPlate() 152 { 153 UErrorCode status = U_ZERO_ERROR; 154 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status); 155 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status); 156 if (U_FAILURE(status)) { 157 errcheckln(status, "Creation of break iterator failed %s", u_errorName(status)); 158 return; 159 } 160 if(*a!=*b){ 161 errln("Failed: boilerplate method operator!= does not return correct results"); 162 } 163 // Japanese word break iterators are identical to root with 164 // a dictionary-based break iterator 165 BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status); 166 BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status); 167 if(c && d){ 168 if(*c!=*d){ 169 errln("Failed: boilerplate method operator== does not return correct results"); 170 } 171 }else{ 172 errln("creation of break iterator failed"); 173 } 174 delete a; 175 delete b; 176 delete c; 177 delete d; 178 } 179 180 void RBBIAPITest::TestgetRules() 181 { 182 UErrorCode status=U_ZERO_ERROR; 183 184 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 185 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 186 if(U_FAILURE(status)){ 187 errcheckln(status, "FAIL: in construction - %s", u_errorName(status)); 188 delete bi1; 189 delete bi2; 190 return; 191 } 192 193 194 195 logln((UnicodeString)"Testing toString()"); 196 197 bi1->setText((UnicodeString)"Hello there"); 198 199 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone(); 200 201 UnicodeString temp=bi1->getRules(); 202 UnicodeString temp2=bi2->getRules(); 203 UnicodeString temp3=bi3->getRules(); 204 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0) 205 errln((UnicodeString)"ERROR: error in getRules() method"); 206 207 delete bi1; 208 delete bi2; 209 delete bi3; 210 } 211 void RBBIAPITest::TestHashCode() 212 { 213 UErrorCode status=U_ZERO_ERROR; 214 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 215 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 216 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 217 if(U_FAILURE(status)){ 218 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 219 delete bi1; 220 delete bi2; 221 delete bi3; 222 return; 223 } 224 225 226 logln((UnicodeString)"Testing hashCode()"); 227 228 bi1->setText((UnicodeString)"Hash code"); 229 bi2->setText((UnicodeString)"Hash code"); 230 bi3->setText((UnicodeString)"Hash code"); 231 232 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone(); 233 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone(); 234 235 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() || 236 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode()) 237 errln((UnicodeString)"ERROR: identical objects have different hashcodes"); 238 239 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() || 240 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode()) 241 errln((UnicodeString)"ERROR: different objects have same hashcodes"); 242 243 delete bi1clone; 244 delete bi2clone; 245 delete bi1; 246 delete bi2; 247 delete bi3; 248 249 } 250 void RBBIAPITest::TestGetSetAdoptText() 251 { 252 logln((UnicodeString)"Testing getText setText "); 253 IcuTestErrorCode status(*this, "TestGetSetAdoptText"); 254 UnicodeString str1="first string."; 255 UnicodeString str2="Second string."; 256 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status)); 257 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status)); 258 if(status.isFailure()){ 259 errcheckln(status, "Fail : in construction - %s", status.errorName()); 260 return; 261 } 262 263 264 CharacterIterator* text1= new StringCharacterIterator(str1); 265 CharacterIterator* text1Clone = text1->clone(); 266 CharacterIterator* text2= new StringCharacterIterator(str2); 267 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str" 268 269 wordIter1->setText(str1); 270 CharacterIterator *tci = &wordIter1->getText(); 271 UnicodeString tstr; 272 tci->getText(tstr); 273 TEST_ASSERT(tstr == str1); 274 if(wordIter1->current() != 0) 275 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); 276 277 wordIter1->next(2); 278 279 wordIter1->setText(str2); 280 if(wordIter1->current() != 0) 281 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); 282 283 284 charIter1->adoptText(text1Clone); 285 TEST_ASSERT(wordIter1->getText() != charIter1->getText()); 286 tci = &wordIter1->getText(); 287 tci->getText(tstr); 288 TEST_ASSERT(tstr == str2); 289 tci = &charIter1->getText(); 290 tci->getText(tstr); 291 TEST_ASSERT(tstr == str1); 292 293 294 LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone()); 295 rb->adoptText(text1); 296 if(rb->getText() != *text1) 297 errln((UnicodeString)"ERROR:1 error in adoptText "); 298 rb->adoptText(text2); 299 if(rb->getText() != *text2) 300 errln((UnicodeString)"ERROR:2 error in adoptText "); 301 302 // Adopt where iterator range is less than the entire orignal source string. 303 // (With the change of the break engine to working with UText internally, 304 // CharacterIterators starting at positions other than zero are not supported) 305 rb->adoptText(text3); 306 TEST_ASSERT(rb->preceding(2) == 0); 307 TEST_ASSERT(rb->following(11) == BreakIterator::DONE); 308 //if(rb->preceding(2) != 3) { 309 // errln((UnicodeString)"ERROR:3 error in adoptText "); 310 //} 311 //if(rb->following(11) != BreakIterator::DONE) { 312 // errln((UnicodeString)"ERROR:4 error in adoptText "); 313 //} 314 315 // UText API 316 // 317 // Quick test to see if UText is working at all. 318 // 319 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */ 320 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */ 321 // 012345678901 322 323 status.reset(); 324 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status)); 325 wordIter1->setText(ut.getAlias(), status); 326 TEST_ASSERT_SUCCESS(status); 327 328 int32_t pos; 329 pos = wordIter1->first(); 330 TEST_ASSERT(pos==0); 331 pos = wordIter1->next(); 332 TEST_ASSERT(pos==5); 333 pos = wordIter1->next(); 334 TEST_ASSERT(pos==6); 335 pos = wordIter1->next(); 336 TEST_ASSERT(pos==11); 337 pos = wordIter1->next(); 338 TEST_ASSERT(pos==UBRK_DONE); 339 340 status.reset(); 341 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status)); 342 TEST_ASSERT_SUCCESS(status); 343 wordIter1->setText(ut2.getAlias(), status); 344 TEST_ASSERT_SUCCESS(status); 345 346 pos = wordIter1->first(); 347 TEST_ASSERT(pos==0); 348 pos = wordIter1->next(); 349 TEST_ASSERT(pos==3); 350 pos = wordIter1->next(); 351 TEST_ASSERT(pos==4); 352 353 pos = wordIter1->last(); 354 TEST_ASSERT(pos==6); 355 pos = wordIter1->previous(); 356 TEST_ASSERT(pos==4); 357 pos = wordIter1->previous(); 358 TEST_ASSERT(pos==3); 359 pos = wordIter1->previous(); 360 TEST_ASSERT(pos==0); 361 pos = wordIter1->previous(); 362 TEST_ASSERT(pos==UBRK_DONE); 363 364 status.reset(); 365 UnicodeString sEmpty; 366 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status)); 367 wordIter1->getUText(gut2.getAlias(), status); 368 TEST_ASSERT_SUCCESS(status); 369 status.reset(); 370 } 371 372 373 void RBBIAPITest::TestIteration() 374 { 375 // This test just verifies that the API is present. 376 // Testing for correct operation of the break rules happens elsewhere. 377 378 UErrorCode status=U_ZERO_ERROR; 379 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 380 if (U_FAILURE(status) || bi == NULL) { 381 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); 382 } 383 delete bi; 384 385 status=U_ZERO_ERROR; 386 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 387 if (U_FAILURE(status) || bi == NULL) { 388 errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status)); 389 } 390 delete bi; 391 392 status=U_ZERO_ERROR; 393 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status); 394 if (U_FAILURE(status) || bi == NULL) { 395 errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status)); 396 } 397 delete bi; 398 399 status=U_ZERO_ERROR; 400 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status); 401 if (U_FAILURE(status) || bi == NULL) { 402 errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status)); 403 } 404 delete bi; 405 406 status=U_ZERO_ERROR; 407 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status); 408 if (U_FAILURE(status) || bi == NULL) { 409 errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status)); 410 } 411 delete bi; 412 413 status=U_ZERO_ERROR; 414 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 415 if (U_FAILURE(status) || bi == NULL) { 416 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); 417 return; // Skip the rest of these tests. 418 } 419 420 421 UnicodeString testString="0123456789"; 422 bi->setText(testString); 423 424 int32_t i; 425 i = bi->first(); 426 if (i != 0) { 427 errln("Incorrect value from bi->first(). Expected 0, got %d.", i); 428 } 429 430 i = bi->last(); 431 if (i != 10) { 432 errln("Incorrect value from bi->last(). Expected 10, got %d", i); 433 } 434 435 // 436 // Previous 437 // 438 bi->last(); 439 i = bi->previous(); 440 if (i != 9) { 441 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i); 442 } 443 444 445 bi->first(); 446 i = bi->previous(); 447 if (i != BreakIterator::DONE) { 448 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__, i); 449 } 450 451 // 452 // next() 453 // 454 bi->first(); 455 i = bi->next(); 456 if (i != 1) { 457 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i); 458 } 459 460 bi->last(); 461 i = bi->next(); 462 if (i != BreakIterator::DONE) { 463 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__, i); 464 } 465 466 467 // 468 // current() 469 // 470 bi->first(); 471 i = bi->current(); 472 if (i != 0) { 473 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); 474 } 475 476 bi->next(); 477 i = bi->current(); 478 if (i != 1) { 479 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i); 480 } 481 482 bi->last(); 483 bi->next(); 484 i = bi->current(); 485 if (i != 10) { 486 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i); 487 } 488 489 bi->first(); 490 bi->previous(); 491 i = bi->current(); 492 if (i != 0) { 493 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); 494 } 495 496 497 // 498 // Following() 499 // 500 i = bi->following(4); 501 if (i != 5) { 502 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i); 503 } 504 505 i = bi->following(9); 506 if (i != 10) { 507 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__, i); 508 } 509 510 i = bi->following(10); 511 if (i != BreakIterator::DONE) { 512 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i); 513 } 514 515 516 // 517 // Preceding 518 // 519 i = bi->preceding(4); 520 if (i != 3) { 521 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i); 522 } 523 524 i = bi->preceding(10); 525 if (i != 9) { 526 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i); 527 } 528 529 i = bi->preceding(1); 530 if (i != 0) { 531 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i); 532 } 533 534 i = bi->preceding(0); 535 if (i != BreakIterator::DONE) { 536 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i); 537 } 538 539 540 // 541 // isBoundary() 542 // 543 bi->first(); 544 if (bi->isBoundary(3) != TRUE) { 545 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i); 546 } 547 i = bi->current(); 548 if (i != 3) { 549 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__, i); 550 } 551 552 553 if (bi->isBoundary(11) != FALSE) { 554 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i); 555 } 556 i = bi->current(); 557 if (i != 10) { 558 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i); 559 } 560 561 // 562 // next(n) 563 // 564 bi->first(); 565 i = bi->next(4); 566 if (i != 4) { 567 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i); 568 } 569 570 i = bi->next(6); 571 if (i != 10) { 572 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__, i); 573 } 574 575 bi->first(); 576 i = bi->next(11); 577 if (i != BreakIterator::DONE) { 578 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__, i); 579 } 580 581 delete bi; 582 583 } 584 585 586 587 588 589 590 void RBBIAPITest::TestBuilder() { 591 UnicodeString rulesString1 = "$Letters = [:L:];\n" 592 "$Numbers = [:N:];\n" 593 "$Letters+;\n" 594 "$Numbers+;\n" 595 "[^$Letters $Numbers];\n" 596 "!.*;\n"; 597 UnicodeString testString1 = "abc123..abc"; 598 // 01234567890 599 int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; 600 UErrorCode status=U_ZERO_ERROR; 601 UParseError parseError; 602 603 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 604 if(U_FAILURE(status)) { 605 dataerrln("Fail : in construction - %s", u_errorName(status)); 606 } else { 607 bi->setText(testString1); 608 doBoundaryTest(*bi, testString1, bounds1); 609 } 610 delete bi; 611 } 612 613 614 // 615 // TestQuoteGrouping 616 // Single quotes within rules imply a grouping, so that a modifier 617 // following the quoted text (* or +) applies to all of the quoted chars. 618 // 619 void RBBIAPITest::TestQuoteGrouping() { 620 UnicodeString rulesString1 = "#Here comes the rule...\n" 621 "'$@!'*;\n" // (\$\@\!)* 622 ".;\n"; 623 624 UnicodeString testString1 = "$@!$@!X$@!!X"; 625 // 0123456789012 626 int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; 627 UErrorCode status=U_ZERO_ERROR; 628 UParseError parseError; 629 630 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 631 if(U_FAILURE(status)) { 632 dataerrln("Fail : in construction - %s", u_errorName(status)); 633 } else { 634 bi->setText(testString1); 635 doBoundaryTest(*bi, testString1, bounds1); 636 } 637 delete bi; 638 } 639 640 // 641 // TestRuleStatus 642 // Test word break rule status constants. 643 // 644 void RBBIAPITest::TestRuleStatus() { 645 UChar str[30]; 646 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing 647 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO 648 u_unescape("plain word 123.45 \\u30a1\\u30a2 ", 649 // 012345678901234567 8 9 0 650 // Katakana 651 str, 30); 652 UnicodeString testString1(str); 653 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21}; 654 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER, 655 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE, 656 UBRK_WORD_IDEO, UBRK_WORD_NONE}; 657 658 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, 659 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT, 660 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT}; 661 662 UErrorCode status=U_ZERO_ERROR; 663 664 BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status); 665 if(U_FAILURE(status)) { 666 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 667 } else { 668 bi->setText(testString1); 669 // First test that the breaks are in the right spots. 670 doBoundaryTest(*bi, testString1, bounds1); 671 672 // Then go back and check tag values 673 int32_t i = 0; 674 int32_t pos, tag; 675 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) { 676 if (pos != bounds1[i]) { 677 errln("FAIL: unexpected word break at postion %d", pos); 678 break; 679 } 680 tag = bi->getRuleStatus(); 681 if (tag < tag_lo[i] || tag >= tag_hi[i]) { 682 errln("FAIL: incorrect tag value %d at position %d", tag, pos); 683 break; 684 } 685 686 // Check that we get the same tag values from getRuleStatusVec() 687 int32_t vec[10]; 688 int t = bi->getRuleStatusVec(vec, 10, status); 689 TEST_ASSERT_SUCCESS(status); 690 TEST_ASSERT(t==1); 691 TEST_ASSERT(vec[0] == tag); 692 } 693 } 694 delete bi; 695 696 // Now test line break status. This test mostly is to confirm that the status constants 697 // are correctly declared in the header. 698 testString1 = "test line. \n"; 699 // break type s s h 700 701 bi = BreakIterator::createLineInstance(Locale::getEnglish(), status); 702 if(U_FAILURE(status)) { 703 errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status)); 704 } else { 705 int32_t i = 0; 706 int32_t pos, tag; 707 UBool success; 708 709 bi->setText(testString1); 710 pos = bi->current(); 711 tag = bi->getRuleStatus(); 712 for (i=0; i<3; i++) { 713 switch (i) { 714 case 0: 715 success = pos==0 && tag==UBRK_LINE_SOFT; break; 716 case 1: 717 success = pos==5 && tag==UBRK_LINE_SOFT; break; 718 case 2: 719 success = pos==12 && tag==UBRK_LINE_HARD; break; 720 default: 721 success = FALSE; break; 722 } 723 if (success == FALSE) { 724 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d", 725 i, pos, tag); 726 break; 727 } 728 pos = bi->next(); 729 tag = bi->getRuleStatus(); 730 } 731 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT || 732 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT || 733 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) { 734 errln("UBRK_LINE_* constants from header are inconsistent."); 735 } 736 } 737 delete bi; 738 739 } 740 741 742 // 743 // TestRuleStatusVec 744 // Test the vector form of break rule status. 745 // 746 void RBBIAPITest::TestRuleStatusVec() { 747 UnicodeString rulesString( "[A-N]{100}; \n" 748 "[a-w]{200}; \n" 749 "[\\p{L}]{300}; \n" 750 "[\\p{N}]{400}; \n" 751 "[0-5]{500}; \n" 752 "!.*;\n", -1, US_INV); 753 UnicodeString testString1 = "Aapz5?"; 754 int32_t statusVals[10]; 755 int32_t numStatuses; 756 int32_t pos; 757 758 UErrorCode status=U_ZERO_ERROR; 759 UParseError parseError; 760 761 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status); 762 if (U_FAILURE(status)) { 763 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); 764 } else { 765 bi->setText(testString1); 766 767 // A 768 pos = bi->next(); 769 TEST_ASSERT(pos==1); 770 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 771 TEST_ASSERT_SUCCESS(status); 772 TEST_ASSERT(numStatuses == 2); 773 TEST_ASSERT(statusVals[0] == 100); 774 TEST_ASSERT(statusVals[1] == 300); 775 776 // a 777 pos = bi->next(); 778 TEST_ASSERT(pos==2); 779 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 780 TEST_ASSERT_SUCCESS(status); 781 TEST_ASSERT(numStatuses == 2); 782 TEST_ASSERT(statusVals[0] == 200); 783 TEST_ASSERT(statusVals[1] == 300); 784 785 // p 786 pos = bi->next(); 787 TEST_ASSERT(pos==3); 788 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 789 TEST_ASSERT_SUCCESS(status); 790 TEST_ASSERT(numStatuses == 2); 791 TEST_ASSERT(statusVals[0] == 200); 792 TEST_ASSERT(statusVals[1] == 300); 793 794 // z 795 pos = bi->next(); 796 TEST_ASSERT(pos==4); 797 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 798 TEST_ASSERT_SUCCESS(status); 799 TEST_ASSERT(numStatuses == 1); 800 TEST_ASSERT(statusVals[0] == 300); 801 802 // 5 803 pos = bi->next(); 804 TEST_ASSERT(pos==5); 805 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 806 TEST_ASSERT_SUCCESS(status); 807 TEST_ASSERT(numStatuses == 2); 808 TEST_ASSERT(statusVals[0] == 400); 809 TEST_ASSERT(statusVals[1] == 500); 810 811 // ? 812 pos = bi->next(); 813 TEST_ASSERT(pos==6); 814 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 815 TEST_ASSERT_SUCCESS(status); 816 TEST_ASSERT(numStatuses == 1); 817 TEST_ASSERT(statusVals[0] == 0); 818 819 // 820 // Check buffer overflow error handling. Char == A 821 // 822 bi->first(); 823 pos = bi->next(); 824 TEST_ASSERT(pos==1); 825 memset(statusVals, -1, sizeof(statusVals)); 826 numStatuses = bi->getRuleStatusVec(statusVals, 0, status); 827 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 828 TEST_ASSERT(numStatuses == 2); 829 TEST_ASSERT(statusVals[0] == -1); 830 831 status = U_ZERO_ERROR; 832 memset(statusVals, -1, sizeof(statusVals)); 833 numStatuses = bi->getRuleStatusVec(statusVals, 1, status); 834 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 835 TEST_ASSERT(numStatuses == 2); 836 TEST_ASSERT(statusVals[0] == 100); 837 TEST_ASSERT(statusVals[1] == -1); 838 839 status = U_ZERO_ERROR; 840 memset(statusVals, -1, sizeof(statusVals)); 841 numStatuses = bi->getRuleStatusVec(statusVals, 2, status); 842 TEST_ASSERT_SUCCESS(status); 843 TEST_ASSERT(numStatuses == 2); 844 TEST_ASSERT(statusVals[0] == 100); 845 TEST_ASSERT(statusVals[1] == 300); 846 TEST_ASSERT(statusVals[2] == -1); 847 } 848 delete bi; 849 850 } 851 852 // 853 // Bug 2190 Regression test. Builder crash on rule consisting of only a 854 // $variable reference 855 void RBBIAPITest::TestBug2190() { 856 UnicodeString rulesString1 = "$aaa = abcd;\n" 857 "$bbb = $aaa;\n" 858 "$bbb;\n"; 859 UnicodeString testString1 = "abcdabcd"; 860 // 01234567890 861 int32_t bounds1[] = {0, 4, 8}; 862 UErrorCode status=U_ZERO_ERROR; 863 UParseError parseError; 864 865 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 866 if(U_FAILURE(status)) { 867 dataerrln("Fail : in construction - %s", u_errorName(status)); 868 } else { 869 bi->setText(testString1); 870 doBoundaryTest(*bi, testString1, bounds1); 871 } 872 delete bi; 873 } 874 875 876 void RBBIAPITest::TestRegistration() { 877 #if !UCONFIG_NO_SERVICE 878 UErrorCode status = U_ZERO_ERROR; 879 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status); 880 // ok to not delete these if we exit because of error? 881 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status); 882 BreakIterator* root_word = BreakIterator::createWordInstance("", status); 883 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status); 884 885 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) { 886 dataerrln("Error creating instances of break interactors - %s", u_errorName(status)); 887 888 delete ja_word; 889 delete ja_char; 890 delete root_word; 891 delete root_char; 892 893 return; 894 } 895 896 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status); 897 { 898 #if 0 // With a dictionary based word breaking, ja_word is identical to root. 899 if (ja_word && *ja_word == *root_word) { 900 errln("japan not different from root"); 901 } 902 #endif 903 } 904 905 { 906 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status); 907 UBool fail = TRUE; 908 if(result){ 909 fail = *result != *ja_word; 910 } 911 delete result; 912 if (fail) { 913 errln("bad result for xx_XX/word"); 914 } 915 } 916 917 { 918 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status); 919 UBool fail = TRUE; 920 if(result){ 921 fail = *result != *ja_char; 922 } 923 delete result; 924 if (fail) { 925 errln("bad result for ja_JP/char"); 926 } 927 } 928 929 { 930 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status); 931 UBool fail = TRUE; 932 if(result){ 933 fail = *result != *root_char; 934 } 935 delete result; 936 if (fail) { 937 errln("bad result for xx_XX/char"); 938 } 939 } 940 941 { 942 StringEnumeration* avail = BreakIterator::getAvailableLocales(); 943 UBool found = FALSE; 944 const UnicodeString* p; 945 while ((p = avail->snext(status))) { 946 if (p->compare("xx") == 0) { 947 found = TRUE; 948 break; 949 } 950 } 951 delete avail; 952 if (!found) { 953 errln("did not find test locale"); 954 } 955 } 956 957 { 958 UBool unreg = BreakIterator::unregister(key, status); 959 if (!unreg) { 960 errln("unable to unregister"); 961 } 962 } 963 964 { 965 BreakIterator* result = BreakIterator::createWordInstance("en_US", status); 966 BreakIterator* root = BreakIterator::createWordInstance("", status); 967 UBool fail = TRUE; 968 if(root){ 969 fail = *root != *result; 970 } 971 delete root; 972 delete result; 973 if (fail) { 974 errln("did not get root break"); 975 } 976 } 977 978 { 979 StringEnumeration* avail = BreakIterator::getAvailableLocales(); 980 UBool found = FALSE; 981 const UnicodeString* p; 982 while ((p = avail->snext(status))) { 983 if (p->compare("xx") == 0) { 984 found = TRUE; 985 break; 986 } 987 } 988 delete avail; 989 if (found) { 990 errln("found test locale"); 991 } 992 } 993 994 { 995 int32_t count; 996 UBool foundLocale = FALSE; 997 const Locale *avail = BreakIterator::getAvailableLocales(count); 998 for (int i=0; i<count; i++) { 999 if (avail[i] == Locale::getEnglish()) { 1000 foundLocale = TRUE; 1001 break; 1002 } 1003 } 1004 if (foundLocale == FALSE) { 1005 errln("BreakIterator::getAvailableLocales(&count), failed to find EN."); 1006 } 1007 } 1008 1009 1010 // ja_word was adopted by factory 1011 delete ja_char; 1012 delete root_word; 1013 delete root_char; 1014 #endif 1015 } 1016 1017 void RBBIAPITest::RoundtripRule(const char *dataFile) { 1018 UErrorCode status = U_ZERO_ERROR; 1019 UParseError parseError; 1020 parseError.line = 0; 1021 parseError.offset = 0; 1022 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status)); 1023 uint32_t length; 1024 const UChar *builtSource; 1025 const uint8_t *rbbiRules; 1026 const uint8_t *builtRules; 1027 1028 if (U_FAILURE(status)) { 1029 errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status)); 1030 return; 1031 } 1032 1033 builtRules = (const uint8_t *)udata_getMemory(data.getAlias()); 1034 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource); 1035 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status); 1036 if (U_FAILURE(status)) { 1037 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n", 1038 u_errorName(status), parseError.line, parseError.offset); 1039 return; 1040 }; 1041 rbbiRules = brkItr->getBinaryRules(length); 1042 logln("Comparing \"%s\" len=%d", dataFile, length); 1043 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) { 1044 errln("Built rules and rebuilt rules are different %s", dataFile); 1045 return; 1046 } 1047 delete brkItr; 1048 } 1049 1050 void RBBIAPITest::TestRoundtripRules() { 1051 RoundtripRule("word"); 1052 RoundtripRule("title"); 1053 RoundtripRule("sent"); 1054 RoundtripRule("line"); 1055 RoundtripRule("char"); 1056 if (!quick) { 1057 RoundtripRule("word_POSIX"); 1058 } 1059 } 1060 1061 // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader* 1062 // (these are protected so we access them via a local class RBBIWithProtectedFunctions). 1063 // This is just a sanity check, not a thorough test (e.g. we don't check that the 1064 // first delete actually frees rulesCopy). 1065 void RBBIAPITest::TestCreateFromRBBIData() { 1066 // Get some handy RBBIData 1067 const char *brkName = "word"; // or "sent", "line", "char", etc. 1068 UErrorCode status = U_ZERO_ERROR; 1069 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status)); 1070 if ( U_SUCCESS(status) ) { 1071 const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data.getAlias()); 1072 uint32_t length = builtRules->fLength; 1073 RBBIWithProtectedFunctions * brkItr; 1074 1075 // Try the memory-adopting constructor, need to copy the data first 1076 RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length); 1077 if ( rulesCopy ) { 1078 uprv_memcpy( rulesCopy, builtRules, length ); 1079 1080 brkItr = new RBBIWithProtectedFunctions(rulesCopy, status); 1081 if ( U_SUCCESS(status) ) { 1082 delete brkItr; // this should free rulesCopy 1083 } else { 1084 errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) ); 1085 status = U_ZERO_ERROR;// reset for the next test 1086 uprv_free( rulesCopy ); 1087 } 1088 } 1089 1090 // Now try the non-adopting constructor 1091 brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status); 1092 if ( U_SUCCESS(status) ) { 1093 delete brkItr; // this should NOT attempt to free builtRules 1094 if (builtRules->fLength != length) { // sanity check 1095 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" ); 1096 } 1097 } else { 1098 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) ); 1099 } 1100 } 1101 1102 // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...) 1103 // 1104 status = U_ZERO_ERROR; 1105 RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); 1106 if (rb == NULL || U_FAILURE(status)) { 1107 dataerrln("Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s", u_errorName(status)); 1108 } else { 1109 uint32_t length; 1110 const uint8_t *rules = rb->getBinaryRules(length); 1111 RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status); 1112 TEST_ASSERT_SUCCESS(status); 1113 TEST_ASSERT(*rb == *rb2); 1114 UnicodeString words = "one two three "; 1115 rb2->setText(words); 1116 int wordCounter = 0; 1117 while (rb2->next() != UBRK_DONE) { 1118 wordCounter++; 1119 } 1120 TEST_ASSERT(wordCounter == 6); 1121 1122 status = U_ZERO_ERROR; 1123 RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1, status); 1124 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1125 1126 delete rb; 1127 delete rb2; 1128 delete rb3; 1129 } 1130 } 1131 1132 1133 void RBBIAPITest::TestRefreshInputText() { 1134 /* 1135 * RefreshInput changes out the input of a Break Iterator without 1136 * changing anything else in the iterator's state. Used with Java JNI, 1137 * when Java moves the underlying string storage. This test 1138 * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence. 1139 * The right set of boundaries should still be found. 1140 */ 1141 UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */ 1142 UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0}; 1143 UErrorCode status = U_ZERO_ERROR; 1144 UText ut1 = UTEXT_INITIALIZER; 1145 UText ut2 = UTEXT_INITIALIZER; 1146 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status); 1147 TEST_ASSERT_SUCCESS(status); 1148 1149 utext_openUChars(&ut1, testStr, -1, &status); 1150 TEST_ASSERT_SUCCESS(status); 1151 1152 if (U_SUCCESS(status)) { 1153 bi->setText(&ut1, status); 1154 TEST_ASSERT_SUCCESS(status); 1155 1156 /* Line boundaries will occur before each letter in the original string */ 1157 TEST_ASSERT(1 == bi->next()); 1158 TEST_ASSERT(3 == bi->next()); 1159 1160 /* Move the string, kill the original string. */ 1161 u_strcpy(movedStr, testStr); 1162 u_memset(testStr, 0x20, u_strlen(testStr)); 1163 utext_openUChars(&ut2, movedStr, -1, &status); 1164 TEST_ASSERT_SUCCESS(status); 1165 RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status); 1166 TEST_ASSERT_SUCCESS(status); 1167 TEST_ASSERT(bi == returnedBI); 1168 1169 /* Find the following matches, now working in the moved string. */ 1170 TEST_ASSERT(5 == bi->next()); 1171 TEST_ASSERT(7 == bi->next()); 1172 TEST_ASSERT(8 == bi->next()); 1173 TEST_ASSERT(UBRK_DONE == bi->next()); 1174 1175 utext_close(&ut1); 1176 utext_close(&ut2); 1177 } 1178 delete bi; 1179 1180 } 1181 1182 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING 1183 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) { 1184 static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets 1185 it.logln(UnicodeString("String:'")+ustr+UnicodeString("'")); 1186 1187 int32_t *pos = new int32_t[ustr.length()]; 1188 int32_t posCount = 0; 1189 1190 // calculate breaks up front, so we can print out 1191 // sans any debugging 1192 for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) { 1193 pos[posCount++] = n; 1194 if(posCount>=ustr.length()) { 1195 it.errln("brk count exceeds string length!"); 1196 return; 1197 } 1198 } 1199 UnicodeString out; 1200 out.append((UChar)CHSTR); 1201 int32_t prev = 0; 1202 for(int32_t i=0;i<posCount;i++) { 1203 int32_t n=pos[i]; 1204 out.append(ustr.tempSubString(prev,n-prev)); 1205 out.append((UChar)PILCROW); 1206 prev=n; 1207 } 1208 out.append(ustr.tempSubString(prev,ustr.length()-prev)); 1209 out.append((UChar)CHEND); 1210 it.logln(out); 1211 1212 out.remove(); 1213 for(int32_t i=0;i<posCount;i++) { 1214 char tmp[100]; 1215 sprintf(tmp,"%d ",pos[i]); 1216 out.append(UnicodeString(tmp)); 1217 } 1218 it.logln(out); 1219 delete [] pos; 1220 } 1221 #endif 1222 1223 void RBBIAPITest::TestFilteredBreakIteratorBuilder() { 1224 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION 1225 UErrorCode status = U_ZERO_ERROR; 1226 LocalPointer<FilteredBreakIteratorBuilder> builder; 1227 LocalPointer<BreakIterator> baseBI; 1228 LocalPointer<BreakIterator> filteredBI; 1229 1230 const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited. 1231 const UnicodeString ABBR_MR("Mr."); 1232 const UnicodeString ABBR_CAPT("Capt."); 1233 1234 { 1235 logln("Constructing empty builder\n"); 1236 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); 1237 TEST_ASSERT_SUCCESS(status); 1238 1239 logln("Constructing base BI\n"); 1240 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1241 TEST_ASSERT_SUCCESS(status); 1242 1243 logln("Building new BI\n"); 1244 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1245 TEST_ASSERT_SUCCESS(status); 1246 1247 logln("Testing:"); 1248 filteredBI->setText(text); 1249 TEST_ASSERT(20 == filteredBI->next()); // Mr. 1250 TEST_ASSERT(84 == filteredBI->next()); // recovered. 1251 TEST_ASSERT(90 == filteredBI->next()); // Capt. 1252 TEST_ASSERT(181 == filteredBI->next()); // Mr. 1253 TEST_ASSERT(278 == filteredBI->next()); // charge. 1254 filteredBI->first(); 1255 prtbrks(filteredBI.getAlias(), text, *this); 1256 } 1257 1258 { 1259 logln("Constructing empty builder\n"); 1260 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); 1261 TEST_ASSERT_SUCCESS(status); 1262 1263 logln("Adding Mr. as an exception\n"); 1264 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); 1265 TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it 1266 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status)); 1267 TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it 1268 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); 1269 TEST_ASSERT_SUCCESS(status); 1270 1271 logln("Constructing base BI\n"); 1272 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1273 TEST_ASSERT_SUCCESS(status); 1274 1275 logln("Building new BI\n"); 1276 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1277 TEST_ASSERT_SUCCESS(status); 1278 1279 logln("Testing:"); 1280 filteredBI->setText(text); 1281 TEST_ASSERT(84 == filteredBI->next()); 1282 TEST_ASSERT(90 == filteredBI->next());// Capt. 1283 TEST_ASSERT(278 == filteredBI->next()); 1284 filteredBI->first(); 1285 prtbrks(filteredBI.getAlias(), text, *this); 1286 } 1287 1288 1289 { 1290 logln("Constructing empty builder\n"); 1291 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); 1292 TEST_ASSERT_SUCCESS(status); 1293 1294 logln("Adding Mr. and Capt as an exception\n"); 1295 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); 1296 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status)); 1297 TEST_ASSERT_SUCCESS(status); 1298 1299 logln("Constructing base BI\n"); 1300 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1301 TEST_ASSERT_SUCCESS(status); 1302 1303 logln("Building new BI\n"); 1304 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1305 TEST_ASSERT_SUCCESS(status); 1306 1307 logln("Testing:"); 1308 filteredBI->setText(text); 1309 TEST_ASSERT(84 == filteredBI->next()); 1310 TEST_ASSERT(278 == filteredBI->next()); 1311 filteredBI->first(); 1312 prtbrks(filteredBI.getAlias(), text, *this); 1313 } 1314 1315 1316 { 1317 logln("Constructing English builder\n"); 1318 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status)); 1319 TEST_ASSERT_SUCCESS(status); 1320 1321 logln("Constructing base BI\n"); 1322 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1323 TEST_ASSERT_SUCCESS(status); 1324 1325 logln("unsuppressing 'Capt'"); 1326 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status)); 1327 1328 logln("Building new BI\n"); 1329 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1330 TEST_ASSERT_SUCCESS(status); 1331 1332 if(filteredBI.isValid()) { 1333 logln("Testing:"); 1334 filteredBI->setText(text); 1335 TEST_ASSERT(84 == filteredBI->next()); 1336 TEST_ASSERT(90 == filteredBI->next()); 1337 TEST_ASSERT(278 == filteredBI->next()); 1338 filteredBI->first(); 1339 prtbrks(filteredBI.getAlias(), text, *this); 1340 } 1341 } 1342 1343 1344 { 1345 logln("Constructing English builder\n"); 1346 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status)); 1347 TEST_ASSERT_SUCCESS(status); 1348 1349 logln("Constructing base BI\n"); 1350 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1351 TEST_ASSERT_SUCCESS(status); 1352 1353 logln("Building new BI\n"); 1354 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1355 TEST_ASSERT_SUCCESS(status); 1356 1357 if(filteredBI.isValid()) { 1358 logln("Testing:"); 1359 filteredBI->setText(text); 1360 TEST_ASSERT(84 == filteredBI->next()); 1361 TEST_ASSERT(278 == filteredBI->next()); 1362 filteredBI->first(); 1363 prtbrks(filteredBI.getAlias(), text, *this); 1364 } 1365 } 1366 1367 #if 0 1368 // reenable once french is in 1369 { 1370 logln("Constructing French builder"); 1371 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status)); 1372 TEST_ASSERT_SUCCESS(status); 1373 1374 logln("Constructing base BI\n"); 1375 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status)); 1376 TEST_ASSERT_SUCCESS(status); 1377 1378 logln("Building new BI\n"); 1379 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1380 TEST_ASSERT_SUCCESS(status); 1381 1382 if(filteredBI.isValid()) { 1383 logln("Testing:"); 1384 filteredBI->setText(text); 1385 TEST_ASSERT(20 == filteredBI->next()); 1386 TEST_ASSERT(84 == filteredBI->next()); 1387 filteredBI->first(); 1388 prtbrks(filteredBI.getAlias(), text, *this); 1389 } 1390 } 1391 #endif 1392 1393 #else 1394 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION"); 1395 #endif 1396 } 1397 1398 //--------------------------------------------- 1399 // runIndexedTest 1400 //--------------------------------------------- 1401 1402 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) 1403 { 1404 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API "); 1405 switch (index) { 1406 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break; 1407 #if !UCONFIG_NO_FILE_IO 1408 case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break; 1409 case 1: name = "TestgetRules"; if (exec) TestgetRules(); break; 1410 case 2: name = "TestHashCode"; if (exec) TestHashCode(); break; 1411 case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break; 1412 case 4: name = "TestIteration"; if (exec) TestIteration(); break; 1413 #else 1414 case 0: case 1: case 2: case 3: case 4: name = "skip"; break; 1415 #endif 1416 case 5: name = "TestBuilder"; if (exec) TestBuilder(); break; 1417 case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break; 1418 case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break; 1419 case 8: name = "TestBug2190"; if (exec) TestBug2190(); break; 1420 #if !UCONFIG_NO_FILE_IO 1421 case 9: name = "TestRegistration"; if (exec) TestRegistration(); break; 1422 case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; 1423 case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break; 1424 case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break; 1425 case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break; 1426 #else 1427 case 9: case 10: case 11: case 12: case 13: name = "skip"; break; 1428 #endif 1429 case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break; 1430 1431 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING 1432 case 15: name = "TestFilteredBreakIteratorBuilder"; if(exec) TestFilteredBreakIteratorBuilder(); break; 1433 #else 1434 case 15: name="skip"; break; 1435 #endif 1436 default: name = ""; break; // needed to end loop 1437 } 1438 } 1439 1440 //--------------------------------------------- 1441 //Internal subroutines 1442 //--------------------------------------------- 1443 1444 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){ 1445 logln((UnicodeString)"testIsBoundary():"); 1446 int32_t p = 0; 1447 UBool isB; 1448 for (int32_t i = 0; i < text.length(); i++) { 1449 isB = bi.isBoundary(i); 1450 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB); 1451 1452 if (i == boundaries[p]) { 1453 if (!isB) 1454 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false"); 1455 p++; 1456 } 1457 else { 1458 if (isB) 1459 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true"); 1460 } 1461 } 1462 } 1463 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){ 1464 UnicodeString selected; 1465 UnicodeString expected=CharsToUnicodeString(expectedString); 1466 1467 if(gotoffset != expectedOffset) 1468 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset); 1469 if(start <= gotoffset){ 1470 testString.extractBetween(start, gotoffset, selected); 1471 } 1472 else{ 1473 testString.extractBetween(gotoffset, start, selected); 1474 } 1475 if(selected.compare(expected) != 0) 1476 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\"")); 1477 else 1478 logln(prettify("****selected \"" + selected + "\"")); 1479 } 1480 1481 //--------------------------------------------- 1482 //RBBIWithProtectedFunctions class functions 1483 //--------------------------------------------- 1484 1485 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status) 1486 : RuleBasedBreakIterator(data, status) 1487 { 1488 } 1489 1490 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status) 1491 : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status) 1492 { 1493 } 1494 1495 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 1496