1 /******************************************************************** 2 * Copyright (c) 1999-2010, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************** 5 * Date Name Description 6 * 12/14/99 Madhu Creation. 7 * 01/12/2000 Madhu updated for changed API 8 ********************************************************************/ 9 10 #include "unicode/utypes.h" 11 12 #if !UCONFIG_NO_BREAK_ITERATION 13 14 #include "unicode/uchar.h" 15 #include "intltest.h" 16 #include "unicode/rbbi.h" 17 #include "unicode/schriter.h" 18 #include "rbbiapts.h" 19 #include "rbbidata.h" 20 #include "cstring.h" 21 #include "ubrkimpl.h" 22 #include "unicode/ustring.h" 23 #include "unicode/utext.h" 24 #include "cmemory.h" 25 26 /** 27 * API Test the RuleBasedBreakIterator class 28 */ 29 30 31 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ 32 errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} 33 34 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ 35 errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}} 36 37 void RBBIAPITest::TestCloneEquals() 38 { 39 40 UErrorCode status=U_ZERO_ERROR; 41 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 42 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 43 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 44 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 45 if(U_FAILURE(status)){ 46 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 47 return; 48 } 49 50 51 UnicodeString testString="Testing word break iterators's clone() and equals()"; 52 bi1->setText(testString); 53 bi2->setText(testString); 54 biequal->setText(testString); 55 56 bi3->setText("hello"); 57 58 logln((UnicodeString)"Testing equals()"); 59 60 logln((UnicodeString)"Testing == and !="); 61 UBool b = (*bi1 != *biequal); 62 b |= *bi1 == *bi2; 63 b |= *bi1 == *bi3; 64 if (b) { 65 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed."); 66 } 67 68 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3) 69 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed."); 70 71 72 // Quick test of RulesBasedBreakIterator assignment - 73 // Check that 74 // two different iterators are != 75 // they are == after assignment 76 // source and dest iterator produce the same next() after assignment. 77 // deleting one doesn't disable the other. 78 logln("Testing assignment"); 79 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status); 80 if(U_FAILURE(status)){ 81 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 82 return; 83 } 84 85 RuleBasedBreakIterator biDefault, biDefault2; 86 if(U_FAILURE(status)){ 87 errln((UnicodeString)"FAIL : in construction of default iterator"); 88 return; 89 } 90 if (biDefault == *bix) { 91 errln((UnicodeString)"ERROR: iterators should not compare =="); 92 return; 93 } 94 if (biDefault != biDefault2) { 95 errln((UnicodeString)"ERROR: iterators should compare =="); 96 return; 97 } 98 99 100 UnicodeString HelloString("Hello Kitty"); 101 bix->setText(HelloString); 102 if (*bix == *bi2) { 103 errln(UnicodeString("ERROR: strings should not be equal before assignment.")); 104 } 105 *bix = *bi2; 106 if (*bix != *bi2) { 107 errln(UnicodeString("ERROR: strings should be equal before assignment.")); 108 } 109 110 int bixnext = bix->next(); 111 int bi2next = bi2->next(); 112 if (! (bixnext == bi2next && bixnext == 7)) { 113 errln(UnicodeString("ERROR: iterators behaved differently after assignment.")); 114 } 115 delete bix; 116 if (bi2->next() != 8) { 117 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy.")); 118 } 119 120 121 122 logln((UnicodeString)"Testing clone()"); 123 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone(); 124 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone(); 125 126 if(*bi1clone != *bi1 || *bi1clone != *biequal || 127 *bi1clone == *bi3 || *bi1clone == *bi2) 128 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed"); 129 130 if(*bi2clone == *bi1 || *bi2clone == *biequal || 131 *bi2clone == *bi3 || *bi2clone != *bi2) 132 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed"); 133 134 if(bi1->getText() != bi1clone->getText() || 135 bi2clone->getText() != bi2->getText() || 136 *bi2clone == *bi1clone ) 137 errln((UnicodeString)"ERROR: RBBI's clone() method failed"); 138 139 delete bi1clone; 140 delete bi2clone; 141 delete bi1; 142 delete bi3; 143 delete bi2; 144 delete biequal; 145 } 146 147 void RBBIAPITest::TestBoilerPlate() 148 { 149 UErrorCode status = U_ZERO_ERROR; 150 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status); 151 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status); 152 if (U_FAILURE(status)) { 153 errcheckln(status, "Creation of break iterator failed %s", u_errorName(status)); 154 return; 155 } 156 if(*a!=*b){ 157 errln("Failed: boilerplate method operator!= does not return correct results"); 158 } 159 BreakIterator* c = BreakIterator::createWordInstance(Locale("ja"),status); 160 if(a && c){ 161 if(*c==*a){ 162 errln("Failed: boilerplate method opertator== does not return correct results"); 163 } 164 }else{ 165 errln("creation of break iterator failed"); 166 } 167 delete a; 168 delete b; 169 delete c; 170 } 171 172 void RBBIAPITest::TestgetRules() 173 { 174 UErrorCode status=U_ZERO_ERROR; 175 176 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 177 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 178 if(U_FAILURE(status)){ 179 errcheckln(status, "FAIL: in construction - %s", u_errorName(status)); 180 delete bi1; 181 delete bi2; 182 return; 183 } 184 185 186 187 logln((UnicodeString)"Testing toString()"); 188 189 bi1->setText((UnicodeString)"Hello there"); 190 191 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone(); 192 193 UnicodeString temp=bi1->getRules(); 194 UnicodeString temp2=bi2->getRules(); 195 UnicodeString temp3=bi3->getRules(); 196 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0) 197 errln((UnicodeString)"ERROR: error in getRules() method"); 198 199 delete bi1; 200 delete bi2; 201 delete bi3; 202 } 203 void RBBIAPITest::TestHashCode() 204 { 205 UErrorCode status=U_ZERO_ERROR; 206 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 207 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 208 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 209 if(U_FAILURE(status)){ 210 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 211 delete bi1; 212 delete bi2; 213 delete bi3; 214 return; 215 } 216 217 218 logln((UnicodeString)"Testing hashCode()"); 219 220 bi1->setText((UnicodeString)"Hash code"); 221 bi2->setText((UnicodeString)"Hash code"); 222 bi3->setText((UnicodeString)"Hash code"); 223 224 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone(); 225 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone(); 226 227 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() || 228 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode()) 229 errln((UnicodeString)"ERROR: identical objects have different hashcodes"); 230 231 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() || 232 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode()) 233 errln((UnicodeString)"ERROR: different objects have same hashcodes"); 234 235 delete bi1clone; 236 delete bi2clone; 237 delete bi1; 238 delete bi2; 239 delete bi3; 240 241 } 242 void RBBIAPITest::TestGetSetAdoptText() 243 { 244 logln((UnicodeString)"Testing getText setText "); 245 IcuTestErrorCode status(*this, "TestGetSetAdoptText"); 246 UnicodeString str1="first string."; 247 UnicodeString str2="Second string."; 248 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status)); 249 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status)); 250 if(status.isFailure()){ 251 errcheckln(status, "Fail : in construction - %s", status.errorName()); 252 return; 253 } 254 255 256 CharacterIterator* text1= new StringCharacterIterator(str1); 257 CharacterIterator* text1Clone = text1->clone(); 258 CharacterIterator* text2= new StringCharacterIterator(str2); 259 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str" 260 261 wordIter1->setText(str1); 262 CharacterIterator *tci = &wordIter1->getText(); 263 UnicodeString tstr; 264 tci->getText(tstr); 265 TEST_ASSERT(tstr == str1); 266 if(wordIter1->current() != 0) 267 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); 268 269 wordIter1->next(2); 270 271 wordIter1->setText(str2); 272 if(wordIter1->current() != 0) 273 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); 274 275 276 charIter1->adoptText(text1Clone); 277 TEST_ASSERT(wordIter1->getText() != charIter1->getText()); 278 tci = &wordIter1->getText(); 279 tci->getText(tstr); 280 TEST_ASSERT(tstr == str2); 281 tci = &charIter1->getText(); 282 tci->getText(tstr); 283 TEST_ASSERT(tstr == str1); 284 285 286 LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone()); 287 rb->adoptText(text1); 288 if(rb->getText() != *text1) 289 errln((UnicodeString)"ERROR:1 error in adoptText "); 290 rb->adoptText(text2); 291 if(rb->getText() != *text2) 292 errln((UnicodeString)"ERROR:2 error in adoptText "); 293 294 // Adopt where iterator range is less than the entire orignal source string. 295 // (With the change of the break engine to working with UText internally, 296 // CharacterIterators starting at positions other than zero are not supported) 297 rb->adoptText(text3); 298 TEST_ASSERT(rb->preceding(2) == 0); 299 TEST_ASSERT(rb->following(11) == BreakIterator::DONE); 300 //if(rb->preceding(2) != 3) { 301 // errln((UnicodeString)"ERROR:3 error in adoptText "); 302 //} 303 //if(rb->following(11) != BreakIterator::DONE) { 304 // errln((UnicodeString)"ERROR:4 error in adoptText "); 305 //} 306 307 // UText API 308 // 309 // Quick test to see if UText is working at all. 310 // 311 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */ 312 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */ 313 // 012345678901 314 315 status.reset(); 316 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status)); 317 wordIter1->setText(ut.getAlias(), status); 318 TEST_ASSERT_SUCCESS(status); 319 320 int32_t pos; 321 pos = wordIter1->first(); 322 TEST_ASSERT(pos==0); 323 pos = wordIter1->next(); 324 TEST_ASSERT(pos==5); 325 pos = wordIter1->next(); 326 TEST_ASSERT(pos==6); 327 pos = wordIter1->next(); 328 TEST_ASSERT(pos==11); 329 pos = wordIter1->next(); 330 TEST_ASSERT(pos==UBRK_DONE); 331 332 status.reset(); 333 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status)); 334 TEST_ASSERT_SUCCESS(status); 335 wordIter1->setText(ut2.getAlias(), status); 336 TEST_ASSERT_SUCCESS(status); 337 338 pos = wordIter1->first(); 339 TEST_ASSERT(pos==0); 340 pos = wordIter1->next(); 341 TEST_ASSERT(pos==3); 342 pos = wordIter1->next(); 343 TEST_ASSERT(pos==4); 344 345 pos = wordIter1->last(); 346 TEST_ASSERT(pos==6); 347 pos = wordIter1->previous(); 348 TEST_ASSERT(pos==4); 349 pos = wordIter1->previous(); 350 TEST_ASSERT(pos==3); 351 pos = wordIter1->previous(); 352 TEST_ASSERT(pos==0); 353 pos = wordIter1->previous(); 354 TEST_ASSERT(pos==UBRK_DONE); 355 356 status.reset(); 357 UnicodeString sEmpty; 358 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status)); 359 wordIter1->getUText(gut2.getAlias(), status); 360 TEST_ASSERT_SUCCESS(status); 361 status.reset(); 362 } 363 364 365 void RBBIAPITest::TestIteration() 366 { 367 // This test just verifies that the API is present. 368 // Testing for correct operation of the break rules happens elsewhere. 369 370 UErrorCode status=U_ZERO_ERROR; 371 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 372 if (U_FAILURE(status) || bi == NULL) { 373 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); 374 } 375 delete bi; 376 377 status=U_ZERO_ERROR; 378 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 379 if (U_FAILURE(status) || bi == NULL) { 380 errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status)); 381 } 382 delete bi; 383 384 status=U_ZERO_ERROR; 385 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status); 386 if (U_FAILURE(status) || bi == NULL) { 387 errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status)); 388 } 389 delete bi; 390 391 status=U_ZERO_ERROR; 392 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status); 393 if (U_FAILURE(status) || bi == NULL) { 394 errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status)); 395 } 396 delete bi; 397 398 status=U_ZERO_ERROR; 399 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status); 400 if (U_FAILURE(status) || bi == NULL) { 401 errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status)); 402 } 403 delete bi; 404 405 status=U_ZERO_ERROR; 406 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 407 if (U_FAILURE(status) || bi == NULL) { 408 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); 409 return; // Skip the rest of these tests. 410 } 411 412 413 UnicodeString testString="0123456789"; 414 bi->setText(testString); 415 416 int32_t i; 417 i = bi->first(); 418 if (i != 0) { 419 errln("Incorrect value from bi->first(). Expected 0, got %d.", i); 420 } 421 422 i = bi->last(); 423 if (i != 10) { 424 errln("Incorrect value from bi->last(). Expected 10, got %d", i); 425 } 426 427 // 428 // Previous 429 // 430 bi->last(); 431 i = bi->previous(); 432 if (i != 9) { 433 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i); 434 } 435 436 437 bi->first(); 438 i = bi->previous(); 439 if (i != BreakIterator::DONE) { 440 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__, i); 441 } 442 443 // 444 // next() 445 // 446 bi->first(); 447 i = bi->next(); 448 if (i != 1) { 449 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i); 450 } 451 452 bi->last(); 453 i = bi->next(); 454 if (i != BreakIterator::DONE) { 455 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__, i); 456 } 457 458 459 // 460 // current() 461 // 462 bi->first(); 463 i = bi->current(); 464 if (i != 0) { 465 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); 466 } 467 468 bi->next(); 469 i = bi->current(); 470 if (i != 1) { 471 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i); 472 } 473 474 bi->last(); 475 bi->next(); 476 i = bi->current(); 477 if (i != 10) { 478 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i); 479 } 480 481 bi->first(); 482 bi->previous(); 483 i = bi->current(); 484 if (i != 0) { 485 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); 486 } 487 488 489 // 490 // Following() 491 // 492 i = bi->following(4); 493 if (i != 5) { 494 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i); 495 } 496 497 i = bi->following(9); 498 if (i != 10) { 499 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__, i); 500 } 501 502 i = bi->following(10); 503 if (i != BreakIterator::DONE) { 504 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i); 505 } 506 507 508 // 509 // Preceding 510 // 511 i = bi->preceding(4); 512 if (i != 3) { 513 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i); 514 } 515 516 i = bi->preceding(10); 517 if (i != 9) { 518 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i); 519 } 520 521 i = bi->preceding(1); 522 if (i != 0) { 523 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i); 524 } 525 526 i = bi->preceding(0); 527 if (i != BreakIterator::DONE) { 528 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i); 529 } 530 531 532 // 533 // isBoundary() 534 // 535 bi->first(); 536 if (bi->isBoundary(3) != TRUE) { 537 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i); 538 } 539 i = bi->current(); 540 if (i != 3) { 541 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__, i); 542 } 543 544 545 if (bi->isBoundary(11) != FALSE) { 546 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i); 547 } 548 i = bi->current(); 549 if (i != 10) { 550 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i); 551 } 552 553 // 554 // next(n) 555 // 556 bi->first(); 557 i = bi->next(4); 558 if (i != 4) { 559 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i); 560 } 561 562 i = bi->next(6); 563 if (i != 10) { 564 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__, i); 565 } 566 567 bi->first(); 568 i = bi->next(11); 569 if (i != BreakIterator::DONE) { 570 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__, i); 571 } 572 573 delete bi; 574 575 } 576 577 578 579 580 581 582 void RBBIAPITest::TestBuilder() { 583 UnicodeString rulesString1 = "$Letters = [:L:];\n" 584 "$Numbers = [:N:];\n" 585 "$Letters+;\n" 586 "$Numbers+;\n" 587 "[^$Letters $Numbers];\n" 588 "!.*;\n"; 589 UnicodeString testString1 = "abc123..abc"; 590 // 01234567890 591 int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; 592 UErrorCode status=U_ZERO_ERROR; 593 UParseError parseError; 594 595 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 596 if(U_FAILURE(status)) { 597 dataerrln("Fail : in construction - %s", u_errorName(status)); 598 } else { 599 bi->setText(testString1); 600 doBoundaryTest(*bi, testString1, bounds1); 601 } 602 delete bi; 603 } 604 605 606 // 607 // TestQuoteGrouping 608 // Single quotes within rules imply a grouping, so that a modifier 609 // following the quoted text (* or +) applies to all of the quoted chars. 610 // 611 void RBBIAPITest::TestQuoteGrouping() { 612 UnicodeString rulesString1 = "#Here comes the rule...\n" 613 "'$@!'*;\n" // (\$\@\!)* 614 ".;\n"; 615 616 UnicodeString testString1 = "$@!$@!X$@!!X"; 617 // 0123456789012 618 int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; 619 UErrorCode status=U_ZERO_ERROR; 620 UParseError parseError; 621 622 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 623 if(U_FAILURE(status)) { 624 dataerrln("Fail : in construction - %s", u_errorName(status)); 625 } else { 626 bi->setText(testString1); 627 doBoundaryTest(*bi, testString1, bounds1); 628 } 629 delete bi; 630 } 631 632 // 633 // TestRuleStatus 634 // Test word break rule status constants. 635 // 636 void RBBIAPITest::TestRuleStatus() { 637 UChar str[30]; 638 u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094", 639 // 012345678901234567 8 9 0 1 2 3 4 5 6 640 // Ideographic Katakana Hiragana 641 str, 30); 642 UnicodeString testString1(str); 643 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26}; 644 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER, 645 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE, 646 UBRK_WORD_IDEO, UBRK_WORD_IDEO, UBRK_WORD_NONE, 647 UBRK_WORD_KANA, UBRK_WORD_NONE, UBRK_WORD_KANA, UBRK_WORD_KANA}; 648 649 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, 650 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT, 651 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT, 652 UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT}; 653 654 UErrorCode status=U_ZERO_ERROR; 655 656 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); 657 if(U_FAILURE(status)) { 658 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 659 } else { 660 bi->setText(testString1); 661 // First test that the breaks are in the right spots. 662 doBoundaryTest(*bi, testString1, bounds1); 663 664 // Then go back and check tag values 665 int32_t i = 0; 666 int32_t pos, tag; 667 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) { 668 if (pos != bounds1[i]) { 669 errln("FAIL: unexpected word break at postion %d", pos); 670 break; 671 } 672 tag = bi->getRuleStatus(); 673 if (tag < tag_lo[i] || tag >= tag_hi[i]) { 674 errln("FAIL: incorrect tag value %d at position %d", tag, pos); 675 break; 676 } 677 678 // Check that we get the same tag values from getRuleStatusVec() 679 int32_t vec[10]; 680 int t = bi->getRuleStatusVec(vec, 10, status); 681 TEST_ASSERT_SUCCESS(status); 682 TEST_ASSERT(t==1); 683 TEST_ASSERT(vec[0] == tag); 684 } 685 } 686 delete bi; 687 688 // Now test line break status. This test mostly is to confirm that the status constants 689 // are correctly declared in the header. 690 testString1 = "test line. \n"; 691 // break type s s h 692 693 bi = (RuleBasedBreakIterator *) 694 BreakIterator::createLineInstance(Locale::getEnglish(), status); 695 if(U_FAILURE(status)) { 696 errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status)); 697 } else { 698 int32_t i = 0; 699 int32_t pos, tag; 700 UBool success; 701 702 bi->setText(testString1); 703 pos = bi->current(); 704 tag = bi->getRuleStatus(); 705 for (i=0; i<3; i++) { 706 switch (i) { 707 case 0: 708 success = pos==0 && tag==UBRK_LINE_SOFT; break; 709 case 1: 710 success = pos==5 && tag==UBRK_LINE_SOFT; break; 711 case 2: 712 success = pos==12 && tag==UBRK_LINE_HARD; break; 713 default: 714 success = FALSE; break; 715 } 716 if (success == FALSE) { 717 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d", 718 i, pos, tag); 719 break; 720 } 721 pos = bi->next(); 722 tag = bi->getRuleStatus(); 723 } 724 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT || 725 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT || 726 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) { 727 errln("UBRK_LINE_* constants from header are inconsistent."); 728 } 729 } 730 delete bi; 731 732 } 733 734 735 // 736 // TestRuleStatusVec 737 // Test the vector form of break rule status. 738 // 739 void RBBIAPITest::TestRuleStatusVec() { 740 UnicodeString rulesString( "[A-N]{100}; \n" 741 "[a-w]{200}; \n" 742 "[\\p{L}]{300}; \n" 743 "[\\p{N}]{400}; \n" 744 "[0-5]{500}; \n" 745 "!.*;\n", -1, US_INV); 746 UnicodeString testString1 = "Aapz5?"; 747 int32_t statusVals[10]; 748 int32_t numStatuses; 749 int32_t pos; 750 751 UErrorCode status=U_ZERO_ERROR; 752 UParseError parseError; 753 754 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status); 755 if (U_FAILURE(status)) { 756 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); 757 } else { 758 bi->setText(testString1); 759 760 // A 761 pos = bi->next(); 762 TEST_ASSERT(pos==1); 763 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 764 TEST_ASSERT_SUCCESS(status); 765 TEST_ASSERT(numStatuses == 2); 766 TEST_ASSERT(statusVals[0] == 100); 767 TEST_ASSERT(statusVals[1] == 300); 768 769 // a 770 pos = bi->next(); 771 TEST_ASSERT(pos==2); 772 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 773 TEST_ASSERT_SUCCESS(status); 774 TEST_ASSERT(numStatuses == 2); 775 TEST_ASSERT(statusVals[0] == 200); 776 TEST_ASSERT(statusVals[1] == 300); 777 778 // p 779 pos = bi->next(); 780 TEST_ASSERT(pos==3); 781 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 782 TEST_ASSERT_SUCCESS(status); 783 TEST_ASSERT(numStatuses == 2); 784 TEST_ASSERT(statusVals[0] == 200); 785 TEST_ASSERT(statusVals[1] == 300); 786 787 // z 788 pos = bi->next(); 789 TEST_ASSERT(pos==4); 790 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 791 TEST_ASSERT_SUCCESS(status); 792 TEST_ASSERT(numStatuses == 1); 793 TEST_ASSERT(statusVals[0] == 300); 794 795 // 5 796 pos = bi->next(); 797 TEST_ASSERT(pos==5); 798 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 799 TEST_ASSERT_SUCCESS(status); 800 TEST_ASSERT(numStatuses == 2); 801 TEST_ASSERT(statusVals[0] == 400); 802 TEST_ASSERT(statusVals[1] == 500); 803 804 // ? 805 pos = bi->next(); 806 TEST_ASSERT(pos==6); 807 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 808 TEST_ASSERT_SUCCESS(status); 809 TEST_ASSERT(numStatuses == 1); 810 TEST_ASSERT(statusVals[0] == 0); 811 812 // 813 // Check buffer overflow error handling. Char == A 814 // 815 bi->first(); 816 pos = bi->next(); 817 TEST_ASSERT(pos==1); 818 memset(statusVals, -1, sizeof(statusVals)); 819 numStatuses = bi->getRuleStatusVec(statusVals, 0, status); 820 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 821 TEST_ASSERT(numStatuses == 2); 822 TEST_ASSERT(statusVals[0] == -1); 823 824 status = U_ZERO_ERROR; 825 memset(statusVals, -1, sizeof(statusVals)); 826 numStatuses = bi->getRuleStatusVec(statusVals, 1, status); 827 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 828 TEST_ASSERT(numStatuses == 2); 829 TEST_ASSERT(statusVals[0] == 100); 830 TEST_ASSERT(statusVals[1] == -1); 831 832 status = U_ZERO_ERROR; 833 memset(statusVals, -1, sizeof(statusVals)); 834 numStatuses = bi->getRuleStatusVec(statusVals, 2, status); 835 TEST_ASSERT_SUCCESS(status); 836 TEST_ASSERT(numStatuses == 2); 837 TEST_ASSERT(statusVals[0] == 100); 838 TEST_ASSERT(statusVals[1] == 300); 839 TEST_ASSERT(statusVals[2] == -1); 840 } 841 delete bi; 842 843 } 844 845 // 846 // Bug 2190 Regression test. Builder crash on rule consisting of only a 847 // $variable reference 848 void RBBIAPITest::TestBug2190() { 849 UnicodeString rulesString1 = "$aaa = abcd;\n" 850 "$bbb = $aaa;\n" 851 "$bbb;\n"; 852 UnicodeString testString1 = "abcdabcd"; 853 // 01234567890 854 int32_t bounds1[] = {0, 4, 8}; 855 UErrorCode status=U_ZERO_ERROR; 856 UParseError parseError; 857 858 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 859 if(U_FAILURE(status)) { 860 dataerrln("Fail : in construction - %s", u_errorName(status)); 861 } else { 862 bi->setText(testString1); 863 doBoundaryTest(*bi, testString1, bounds1); 864 } 865 delete bi; 866 } 867 868 869 void RBBIAPITest::TestRegistration() { 870 #if !UCONFIG_NO_SERVICE 871 UErrorCode status = U_ZERO_ERROR; 872 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status); 873 874 // ok to not delete these if we exit because of error? 875 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status); 876 BreakIterator* root_word = BreakIterator::createWordInstance("", status); 877 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status); 878 879 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) { 880 dataerrln("Error creating instances of break interactors - %s", u_errorName(status)); 881 delete ja_word; 882 delete ja_char; 883 delete root_word; 884 delete root_char; 885 886 return; 887 } 888 889 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status); 890 { 891 if (ja_word && *ja_word == *root_word) { 892 errln("japan not different from root"); 893 } 894 } 895 896 { 897 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status); 898 UBool fail = TRUE; 899 if(result){ 900 fail = *result != *ja_word; 901 } 902 delete result; 903 if (fail) { 904 errln("bad result for xx_XX/word"); 905 } 906 } 907 908 { 909 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status); 910 UBool fail = TRUE; 911 if(result){ 912 fail = *result != *ja_char; 913 } 914 delete result; 915 if (fail) { 916 errln("bad result for ja_JP/char"); 917 } 918 } 919 920 { 921 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status); 922 UBool fail = TRUE; 923 if(result){ 924 fail = *result != *root_char; 925 } 926 delete result; 927 if (fail) { 928 errln("bad result for xx_XX/char"); 929 } 930 } 931 932 { 933 StringEnumeration* avail = BreakIterator::getAvailableLocales(); 934 UBool found = FALSE; 935 const UnicodeString* p; 936 while ((p = avail->snext(status))) { 937 if (p->compare("xx") == 0) { 938 found = TRUE; 939 break; 940 } 941 } 942 delete avail; 943 if (!found) { 944 errln("did not find test locale"); 945 } 946 } 947 948 { 949 UBool unreg = BreakIterator::unregister(key, status); 950 if (!unreg) { 951 errln("unable to unregister"); 952 } 953 } 954 955 { 956 BreakIterator* result = BreakIterator::createWordInstance("en_US", status); 957 BreakIterator* root = BreakIterator::createWordInstance("", status); 958 UBool fail = TRUE; 959 if(root){ 960 fail = *root != *result; 961 } 962 delete root; 963 delete result; 964 if (fail) { 965 errln("did not get root break"); 966 } 967 } 968 969 { 970 StringEnumeration* avail = BreakIterator::getAvailableLocales(); 971 UBool found = FALSE; 972 const UnicodeString* p; 973 while ((p = avail->snext(status))) { 974 if (p->compare("xx") == 0) { 975 found = TRUE; 976 break; 977 } 978 } 979 delete avail; 980 if (found) { 981 errln("found test locale"); 982 } 983 } 984 985 { 986 int32_t count; 987 UBool foundLocale = FALSE; 988 const Locale *avail = BreakIterator::getAvailableLocales(count); 989 for (int i=0; i<count; i++) { 990 if (avail[i] == Locale::getEnglish()) { 991 foundLocale = TRUE; 992 break; 993 } 994 } 995 if (foundLocale == FALSE) { 996 errln("BreakIterator::getAvailableLocales(&count), failed to find EN."); 997 } 998 } 999 1000 1001 // ja_word was adopted by factory 1002 delete ja_char; 1003 delete root_word; 1004 delete root_char; 1005 #endif 1006 } 1007 1008 void RBBIAPITest::RoundtripRule(const char *dataFile) { 1009 UErrorCode status = U_ZERO_ERROR; 1010 UParseError parseError; 1011 parseError.line = 0; 1012 parseError.offset = 0; 1013 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status)); 1014 uint32_t length; 1015 const UChar *builtSource; 1016 const uint8_t *rbbiRules; 1017 const uint8_t *builtRules; 1018 1019 if (U_FAILURE(status)) { 1020 errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status)); 1021 return; 1022 } 1023 1024 builtRules = (const uint8_t *)udata_getMemory(data.getAlias()); 1025 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource); 1026 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status); 1027 if (U_FAILURE(status)) { 1028 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n", 1029 u_errorName(status), parseError.line, parseError.offset); 1030 return; 1031 }; 1032 rbbiRules = brkItr->getBinaryRules(length); 1033 logln("Comparing \"%s\" len=%d", dataFile, length); 1034 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) { 1035 errln("Built rules and rebuilt rules are different %s", dataFile); 1036 return; 1037 } 1038 delete brkItr; 1039 } 1040 1041 void RBBIAPITest::TestRoundtripRules() { 1042 RoundtripRule("word"); 1043 RoundtripRule("title"); 1044 RoundtripRule("sent"); 1045 RoundtripRule("line"); 1046 RoundtripRule("char"); 1047 if (!quick) { 1048 RoundtripRule("word_ja"); 1049 RoundtripRule("word_POSIX"); 1050 } 1051 } 1052 1053 // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader* 1054 // (these are protected so we access them via a local class RBBIWithProtectedFunctions). 1055 // This is just a sanity check, not a thorough test (e.g. we don't check that the 1056 // first delete actually frees rulesCopy). 1057 void RBBIAPITest::TestCreateFromRBBIData() { 1058 // Get some handy RBBIData 1059 const char *brkName = "word"; // or "sent", "line", "char", etc. 1060 UErrorCode status = U_ZERO_ERROR; 1061 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status)); 1062 if ( U_SUCCESS(status) ) { 1063 const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data.getAlias()); 1064 uint32_t length = builtRules->fLength; 1065 RBBIWithProtectedFunctions * brkItr; 1066 1067 // Try the memory-adopting constructor, need to copy the data first 1068 RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length); 1069 if ( rulesCopy ) { 1070 uprv_memcpy( rulesCopy, builtRules, length ); 1071 1072 brkItr = new RBBIWithProtectedFunctions(rulesCopy, status); 1073 if ( U_SUCCESS(status) ) { 1074 delete brkItr; // this should free rulesCopy 1075 } else { 1076 errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) ); 1077 status = U_ZERO_ERROR;// reset for the next test 1078 uprv_free( rulesCopy ); 1079 } 1080 } 1081 1082 // Now try the non-adopting constructor 1083 brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status); 1084 if ( U_SUCCESS(status) ) { 1085 delete brkItr; // this should NOT attempt to free builtRules 1086 if (builtRules->fLength != length) { // sanity check 1087 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" ); 1088 } 1089 } else { 1090 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) ); 1091 } 1092 } 1093 } 1094 1095 //--------------------------------------------- 1096 // runIndexedTest 1097 //--------------------------------------------- 1098 1099 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) 1100 { 1101 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API "); 1102 switch (index) { 1103 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break; 1104 #if !UCONFIG_NO_FILE_IO 1105 case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break; 1106 case 1: name = "TestgetRules"; if (exec) TestgetRules(); break; 1107 case 2: name = "TestHashCode"; if (exec) TestHashCode(); break; 1108 case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break; 1109 case 4: name = "TestIteration"; if (exec) TestIteration(); break; 1110 #else 1111 case 0: case 1: case 2: case 3: case 4: name = "skip"; break; 1112 #endif 1113 case 5: name = "TestBuilder"; if (exec) TestBuilder(); break; 1114 case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break; 1115 case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break; 1116 case 8: name = "TestBug2190"; if (exec) TestBug2190(); break; 1117 #if !UCONFIG_NO_FILE_IO 1118 case 9: name = "TestRegistration"; if (exec) TestRegistration(); break; 1119 case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; 1120 case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break; 1121 case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break; 1122 case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break; 1123 #else 1124 case 9: case 10: case 11: case 12: case 13: name = "skip"; break; 1125 #endif 1126 1127 default: name = ""; break; // needed to end loop 1128 } 1129 } 1130 1131 //--------------------------------------------- 1132 //Internal subroutines 1133 //--------------------------------------------- 1134 1135 void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text, int32_t *boundaries){ 1136 logln((UnicodeString)"testIsBoundary():"); 1137 int32_t p = 0; 1138 UBool isB; 1139 for (int32_t i = 0; i < text.length(); i++) { 1140 isB = bi.isBoundary(i); 1141 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB); 1142 1143 if (i == boundaries[p]) { 1144 if (!isB) 1145 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false"); 1146 p++; 1147 } 1148 else { 1149 if (isB) 1150 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true"); 1151 } 1152 } 1153 } 1154 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){ 1155 UnicodeString selected; 1156 UnicodeString expected=CharsToUnicodeString(expectedString); 1157 1158 if(gotoffset != expectedOffset) 1159 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset); 1160 if(start <= gotoffset){ 1161 testString.extractBetween(start, gotoffset, selected); 1162 } 1163 else{ 1164 testString.extractBetween(gotoffset, start, selected); 1165 } 1166 if(selected.compare(expected) != 0) 1167 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\"")); 1168 else 1169 logln(prettify("****selected \"" + selected + "\"")); 1170 } 1171 1172 //--------------------------------------------- 1173 //RBBIWithProtectedFunctions class functions 1174 //--------------------------------------------- 1175 1176 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status) 1177 : RuleBasedBreakIterator(data, status) 1178 { 1179 } 1180 1181 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status) 1182 : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status) 1183 { 1184 } 1185 1186 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 1187