1 /* 2 ***************************************************************************** 3 * Copyright (C) 2001-2011, International Business Machines orporation 4 * and others. All Rights Reserved. 5 ****************************************************************************/ 6 7 #include "unicode/utypes.h" 8 9 #if !UCONFIG_NO_COLLATION 10 11 #include "srchtest.h" 12 #if !UCONFIG_NO_BREAK_ITERATION 13 #include "../cintltst/usrchdat.c" 14 #endif 15 #include "unicode/stsearch.h" 16 #include "unicode/ustring.h" 17 #include "unicode/schriter.h" 18 #include <string.h> 19 #include <stdio.h> 20 21 // private definitions ----------------------------------------------------- 22 23 #define CASE(id,test) \ 24 case id: \ 25 name = #test; \ 26 if (exec) { \ 27 logln(#test "---"); \ 28 logln((UnicodeString)""); \ 29 if(areBroken) { \ 30 dataerrln(__FILE__ " cannot test - failed to create collator."); \ 31 } else { \ 32 test(); \ 33 } \ 34 } \ 35 break; 36 37 // public contructors and destructors -------------------------------------- 38 39 StringSearchTest::StringSearchTest() 40 #if !UCONFIG_NO_BREAK_ITERATION 41 : 42 m_en_wordbreaker_(NULL), m_en_characterbreaker_(NULL) 43 #endif 44 { 45 #if !UCONFIG_NO_BREAK_ITERATION 46 UErrorCode status = U_ZERO_ERROR; 47 48 m_en_us_ = (RuleBasedCollator *)Collator::createInstance("en_US", status); 49 m_fr_fr_ = (RuleBasedCollator *)Collator::createInstance("fr_FR", status); 50 m_de_ = (RuleBasedCollator *)Collator::createInstance("de_DE", status); 51 m_es_ = (RuleBasedCollator *)Collator::createInstance("es_ES", status); 52 if(U_FAILURE(status)) { 53 delete m_en_us_; 54 delete m_fr_fr_; 55 delete m_de_; 56 delete m_es_; 57 m_en_us_ = 0; 58 m_fr_fr_ = 0; 59 m_de_ = 0; 60 m_es_ = 0; 61 errln("Collator creation failed with %s", u_errorName(status)); 62 return; 63 } 64 65 66 UnicodeString rules; 67 rules.setTo(((RuleBasedCollator *)m_de_)->getRules()); 68 UChar extrarules[128]; 69 u_unescape(EXTRACOLLATIONRULE, extrarules, 128); 70 rules.append(extrarules, u_strlen(extrarules)); 71 delete m_de_; 72 73 m_de_ = new RuleBasedCollator(rules, status); 74 75 rules.setTo(((RuleBasedCollator *)m_es_)->getRules()); 76 rules.append(extrarules, u_strlen(extrarules)); 77 78 delete m_es_; 79 80 m_es_ = new RuleBasedCollator(rules, status); 81 82 #if !UCONFIG_NO_BREAK_ITERATION 83 m_en_wordbreaker_ = BreakIterator::createWordInstance( 84 Locale::getEnglish(), status); 85 m_en_characterbreaker_ = BreakIterator::createCharacterInstance( 86 Locale::getEnglish(), status); 87 #endif 88 #endif 89 } 90 91 StringSearchTest::~StringSearchTest() 92 { 93 #if !UCONFIG_NO_BREAK_ITERATION 94 delete m_en_us_; 95 delete m_fr_fr_; 96 delete m_de_; 97 delete m_es_; 98 #if !UCONFIG_NO_BREAK_ITERATION 99 delete m_en_wordbreaker_; 100 delete m_en_characterbreaker_; 101 #endif 102 #endif 103 } 104 105 // public methods ---------------------------------------------------------- 106 107 void StringSearchTest::runIndexedTest(int32_t index, UBool exec, 108 const char* &name, char* ) 109 { 110 #if !UCONFIG_NO_BREAK_ITERATION 111 UBool areBroken = FALSE; 112 if (m_en_us_ == NULL && m_fr_fr_ == NULL && m_de_ == NULL && 113 m_es_ == NULL && m_en_wordbreaker_ == NULL && 114 m_en_characterbreaker_ == NULL && exec) { 115 areBroken = TRUE; 116 } 117 118 switch (index) { 119 #if !UCONFIG_NO_FILE_IO 120 CASE(0, TestOpenClose) 121 #endif 122 CASE(1, TestInitialization) 123 CASE(2, TestBasic) 124 CASE(3, TestNormExact) 125 CASE(4, TestStrength) 126 #if UCONFIG_NO_BREAK_ITERATION 127 case 5: 128 name = "TestBreakIterator"; 129 break; 130 #else 131 CASE(5, TestBreakIterator) 132 #endif 133 CASE(6, TestVariable) 134 CASE(7, TestOverlap) 135 CASE(8, TestCollator) 136 CASE(9, TestPattern) 137 CASE(10, TestText) 138 CASE(11, TestCompositeBoundaries) 139 CASE(12, TestGetSetOffset) 140 CASE(13, TestGetSetAttribute) 141 CASE(14, TestGetMatch) 142 CASE(15, TestSetMatch) 143 CASE(16, TestReset) 144 CASE(17, TestSupplementary) 145 CASE(18, TestContraction) 146 CASE(19, TestIgnorable) 147 CASE(20, TestCanonical) 148 CASE(21, TestNormCanonical) 149 CASE(22, TestStrengthCanonical) 150 #if UCONFIG_NO_BREAK_ITERATION 151 case 23: 152 name = "TestBreakIteratorCanonical"; 153 break; 154 #else 155 CASE(23, TestBreakIteratorCanonical) 156 #endif 157 CASE(24, TestVariableCanonical) 158 CASE(25, TestOverlapCanonical) 159 CASE(26, TestCollatorCanonical) 160 CASE(27, TestPatternCanonical) 161 CASE(28, TestTextCanonical) 162 CASE(29, TestCompositeBoundariesCanonical) 163 CASE(30, TestGetSetOffsetCanonical) 164 CASE(31, TestSupplementaryCanonical) 165 CASE(32, TestContractionCanonical) 166 CASE(33, TestUClassID) 167 CASE(34, TestSubclass) 168 CASE(35, TestCoverage) 169 CASE(36, TestDiacriticMatch) 170 default: name = ""; break; 171 } 172 #else 173 name=""; 174 #endif 175 } 176 177 #if !UCONFIG_NO_BREAK_ITERATION 178 // private methods ------------------------------------------------------ 179 180 RuleBasedCollator * StringSearchTest::getCollator(const char *collator) 181 { 182 if (collator == NULL) { 183 return m_en_us_; 184 } 185 if (strcmp(collator, "fr") == 0) { 186 return m_fr_fr_; 187 } 188 else if (strcmp(collator, "de") == 0) { 189 return m_de_; 190 } 191 else if (strcmp(collator, "es") == 0) { 192 return m_es_; 193 } 194 else { 195 return m_en_us_; 196 } 197 } 198 199 BreakIterator * StringSearchTest::getBreakIterator(const char *breaker) 200 { 201 #if UCONFIG_NO_BREAK_ITERATION 202 return NULL; 203 #else 204 if (breaker == NULL) { 205 return NULL; 206 } 207 if (strcmp(breaker, "wordbreaker") == 0) { 208 return m_en_wordbreaker_; 209 } 210 else { 211 return m_en_characterbreaker_; 212 } 213 #endif 214 } 215 216 char * StringSearchTest::toCharString(const UnicodeString &text) 217 { 218 static char result[1024]; 219 int index = 0; 220 int count = 0; 221 int length = text.length(); 222 223 for (; count < length; count ++) { 224 UChar ch = text[count]; 225 if (ch >= 0x20 && ch <= 0x7e) { 226 result[index ++] = (char)ch; 227 } 228 else { 229 sprintf(result+index, "\\u%04x", ch); 230 index += 6; /* \uxxxx */ 231 } 232 } 233 result[index] = 0; 234 235 return result; 236 } 237 238 Collator::ECollationStrength StringSearchTest::getECollationStrength( 239 const UCollationStrength &strength) const 240 { 241 switch (strength) 242 { 243 case UCOL_PRIMARY : 244 return Collator::PRIMARY; 245 case UCOL_SECONDARY : 246 return Collator::SECONDARY; 247 case UCOL_TERTIARY : 248 return Collator::TERTIARY; 249 default : 250 return Collator::IDENTICAL; 251 } 252 } 253 254 UBool StringSearchTest::assertEqualWithStringSearch(StringSearch *strsrch, 255 const SearchData *search) 256 { 257 int32_t count = 0; 258 UErrorCode status = U_ZERO_ERROR; 259 int32_t matchindex = search->offset[count]; 260 UnicodeString matchtext; 261 int32_t matchlength; 262 263 strsrch->setAttribute(USEARCH_ELEMENT_COMPARISON, search->elemCompare, status); 264 if (U_FAILURE(status)) { 265 errln("Error setting USEARCH_ELEMENT_COMPARISON attribute %s", u_errorName(status)); 266 return FALSE; 267 } 268 269 if (strsrch->getMatchedStart() != USEARCH_DONE || 270 strsrch->getMatchedLength() != 0) { 271 errln("Error with the initialization of match start and length"); 272 } 273 274 // start of next matches 275 while (U_SUCCESS(status) && matchindex >= 0) { 276 matchlength = search->size[count]; 277 strsrch->next(status); 278 if (matchindex != strsrch->getMatchedStart() || 279 matchlength != strsrch->getMatchedLength()) { 280 char *str = toCharString(strsrch->getText()); 281 errln("Text: %s", str); 282 str = toCharString(strsrch->getPattern()); 283 errln("Pattern: %s", str); 284 errln("Error next match found at %d (len:%d); expected %d (len:%d)", 285 strsrch->getMatchedStart(), strsrch->getMatchedLength(), 286 matchindex, matchlength); 287 return FALSE; 288 } 289 count ++; 290 291 strsrch->getMatchedText(matchtext); 292 293 if (U_FAILURE(status) || 294 strsrch->getText().compareBetween(matchindex, 295 matchindex + matchlength, 296 matchtext, 0, 297 matchtext.length())) { 298 errln("Error getting next matched text"); 299 } 300 301 matchindex = search->offset[count]; 302 } 303 strsrch->next(status); 304 if (strsrch->getMatchedStart() != USEARCH_DONE || 305 strsrch->getMatchedLength() != 0) { 306 char *str = toCharString(strsrch->getText()); 307 errln("Text: %s", str); 308 str = toCharString(strsrch->getPattern()); 309 errln("Pattern: %s", str); 310 errln("Error next match found at %d (len:%d); expected <NO MATCH>", 311 strsrch->getMatchedStart(), strsrch->getMatchedLength()); 312 return FALSE; 313 } 314 315 // start of previous matches 316 count = count == 0 ? 0 : count - 1; 317 matchindex = search->offset[count]; 318 while (U_SUCCESS(status) && matchindex >= 0) { 319 matchlength = search->size[count]; 320 strsrch->previous(status); 321 if (matchindex != strsrch->getMatchedStart() || 322 matchlength != strsrch->getMatchedLength()) { 323 char *str = toCharString(strsrch->getText()); 324 errln("Text: %s", str); 325 str = toCharString(strsrch->getPattern()); 326 errln("Pattern: %s", str); 327 errln("Error previous match found at %d (len:%d); expected %d (len:%d)", 328 strsrch->getMatchedStart(), strsrch->getMatchedLength(), 329 matchindex, matchlength); 330 return FALSE; 331 } 332 333 strsrch->getMatchedText(matchtext); 334 335 if (U_FAILURE(status) || 336 strsrch->getText().compareBetween(matchindex, 337 matchindex + matchlength, 338 matchtext, 0, 339 matchtext.length())) { 340 errln("Error getting previous matched text"); 341 } 342 343 matchindex = count > 0 ? search->offset[count - 1] : -1; 344 count --; 345 } 346 strsrch->previous(status); 347 if (strsrch->getMatchedStart() != USEARCH_DONE || 348 strsrch->getMatchedLength() != 0) { 349 char *str = toCharString(strsrch->getText()); 350 errln("Text: %s", str); 351 str = toCharString(strsrch->getPattern()); 352 errln("Pattern: %s", str); 353 errln("Error previous match found at %d (len:%d); expected <NO MATCH>", 354 strsrch->getMatchedStart(), strsrch->getMatchedLength()); 355 return FALSE; 356 } 357 358 int32_t nextStart; 359 UBool isOverlap = (strsrch->getAttribute(USEARCH_OVERLAP) == USEARCH_ON); 360 361 // start of following matches 362 count = 0; 363 matchindex = search->offset[count]; 364 nextStart = 0; 365 366 while (TRUE) { 367 strsrch->following(nextStart, status); 368 369 if (matchindex < 0) { 370 if (strsrch->getMatchedStart() != USEARCH_DONE || 371 strsrch->getMatchedLength() != 0) { 372 char *str = toCharString(strsrch->getText()); 373 errln("Text: %s", str); 374 str = toCharString(strsrch->getPattern()); 375 errln("Pattern: %s", str); 376 errln("Error following match starting at %d (overlap:%d) found at %d (len:%d); expected <NO MATCH>", 377 nextStart, isOverlap, 378 strsrch->getMatchedStart(), strsrch->getMatchedLength()); 379 return FALSE; 380 } 381 // no more matches 382 break; 383 } 384 385 matchlength = search->size[count]; 386 if (strsrch->getMatchedStart() != matchindex 387 || strsrch->getMatchedLength() != matchlength 388 || U_FAILURE(status)) { 389 char *str = toCharString(strsrch->getText()); 390 errln("Text: %s\n", str); 391 str = toCharString(strsrch->getPattern()); 392 errln("Pattern: %s\n", str); 393 errln("Error following match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n", 394 nextStart, isOverlap, 395 strsrch->getMatchedStart(), strsrch->getMatchedLength(), 396 matchindex, matchlength); 397 return FALSE; 398 } 399 400 if (isOverlap || strsrch->getMatchedLength() == 0) { 401 nextStart = strsrch->getMatchedStart() + 1; 402 } else { 403 nextStart = strsrch->getMatchedStart() + strsrch->getMatchedLength(); 404 } 405 406 count++; 407 matchindex = search->offset[count]; 408 } 409 410 // start preceding matches 411 count = -1; // last non-negative offset index, could be -1 if no match 412 while (search->offset[count + 1] >= 0) { 413 count++; 414 } 415 nextStart = strsrch->getText().length(); 416 417 while (TRUE) { 418 strsrch->preceding(nextStart, status); 419 420 if (count < 0) { 421 if (strsrch->getMatchedStart() != USEARCH_DONE || strsrch->getMatchedLength() != 0) { 422 char *str = toCharString(strsrch->getText()); 423 errln("Text: %s\n", str); 424 str = toCharString(strsrch->getPattern()); 425 errln("Pattern: %s\n", str); 426 errln("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected <NO MATCH>\n", 427 nextStart, isOverlap, 428 strsrch->getMatchedStart(), 429 strsrch->getMatchedLength()); 430 return FALSE; 431 } 432 // no more matches 433 break; 434 } 435 436 matchindex = search->offset[count]; 437 matchlength = search->size[count]; 438 if (strsrch->getMatchedStart() != matchindex 439 || strsrch->getMatchedLength() != matchlength 440 || U_FAILURE(status)) { 441 char *str = toCharString(strsrch->getText()); 442 errln("Text: %s\n", str); 443 str = toCharString(strsrch->getPattern()); 444 errln("Pattern: %s\n", str); 445 errln("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n", 446 nextStart, isOverlap, 447 strsrch->getMatchedStart(), strsrch->getMatchedLength(), 448 matchindex, matchlength); 449 return FALSE; 450 } 451 452 nextStart = matchindex; 453 count--; 454 } 455 456 strsrch->setAttribute(USEARCH_ELEMENT_COMPARISON, USEARCH_STANDARD_ELEMENT_COMPARISON, status); 457 return TRUE; 458 } 459 460 UBool StringSearchTest::assertEqual(const SearchData *search) 461 { 462 UErrorCode status = U_ZERO_ERROR; 463 464 Collator *collator = getCollator(search->collator); 465 BreakIterator *breaker = getBreakIterator(search->breaker); 466 StringSearch *strsrch, *strsrch2; 467 UChar temp[128]; 468 469 #if UCONFIG_NO_BREAK_ITERATION 470 if(search->breaker) { 471 return TRUE; /* skip test */ 472 } 473 #endif 474 u_unescape(search->text, temp, 128); 475 UnicodeString text; 476 text.setTo(temp); 477 u_unescape(search->pattern, temp, 128); 478 UnicodeString pattern; 479 pattern.setTo(temp); 480 481 #if !UCONFIG_NO_BREAK_ITERATION 482 if (breaker != NULL) { 483 breaker->setText(text); 484 } 485 #endif 486 collator->setStrength(getECollationStrength(search->strength)); 487 strsrch = new StringSearch(pattern, text, (RuleBasedCollator *)collator, 488 breaker, status); 489 if (U_FAILURE(status)) { 490 errln("Error opening string search %s", u_errorName(status)); 491 return FALSE; 492 } 493 494 if (!assertEqualWithStringSearch(strsrch, search)) { 495 collator->setStrength(getECollationStrength(UCOL_TERTIARY)); 496 delete strsrch; 497 return FALSE; 498 } 499 500 501 strsrch2 = strsrch->clone(); 502 if( strsrch2 == strsrch || *strsrch2 != *strsrch || 503 !assertEqualWithStringSearch(strsrch2, search) 504 ) { 505 infoln("failure with StringSearch.clone()"); 506 collator->setStrength(getECollationStrength(UCOL_TERTIARY)); 507 delete strsrch; 508 delete strsrch2; 509 return FALSE; 510 } 511 delete strsrch2; 512 513 collator->setStrength(getECollationStrength(UCOL_TERTIARY)); 514 delete strsrch; 515 return TRUE; 516 } 517 518 UBool StringSearchTest::assertCanonicalEqual(const SearchData *search) 519 { 520 UErrorCode status = U_ZERO_ERROR; 521 Collator *collator = getCollator(search->collator); 522 BreakIterator *breaker = getBreakIterator(search->breaker); 523 StringSearch *strsrch; 524 UChar temp[128]; 525 UBool result = TRUE; 526 527 #if UCONFIG_NO_BREAK_ITERATION 528 if(search->breaker) { 529 return TRUE; /* skip test */ 530 } 531 #endif 532 533 u_unescape(search->text, temp, 128); 534 UnicodeString text; 535 text.setTo(temp); 536 u_unescape(search->pattern, temp, 128); 537 UnicodeString pattern; 538 pattern.setTo(temp); 539 540 #if !UCONFIG_NO_BREAK_ITERATION 541 if (breaker != NULL) { 542 breaker->setText(text); 543 } 544 #endif 545 collator->setStrength(getECollationStrength(search->strength)); 546 collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 547 strsrch = new StringSearch(pattern, text, (RuleBasedCollator *)collator, 548 breaker, status); 549 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); 550 if (U_FAILURE(status)) { 551 errln("Error opening string search %s", u_errorName(status)); 552 result = FALSE; 553 goto bail; 554 } 555 556 if (!assertEqualWithStringSearch(strsrch, search)) { 557 result = FALSE; 558 goto bail; 559 } 560 561 bail: 562 collator->setStrength(getECollationStrength(UCOL_TERTIARY)); 563 collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); 564 delete strsrch; 565 566 return result; 567 } 568 569 UBool StringSearchTest::assertEqualWithAttribute(const SearchData *search, 570 USearchAttributeValue canonical, 571 USearchAttributeValue overlap) 572 { 573 UErrorCode status = U_ZERO_ERROR; 574 Collator *collator = getCollator(search->collator); 575 BreakIterator *breaker = getBreakIterator(search->breaker); 576 StringSearch *strsrch; 577 UChar temp[128]; 578 579 580 #if UCONFIG_NO_BREAK_ITERATION 581 if(search->breaker) { 582 return TRUE; /* skip test */ 583 } 584 #endif 585 586 u_unescape(search->text, temp, 128); 587 UnicodeString text; 588 text.setTo(temp); 589 u_unescape(search->pattern, temp, 128); 590 UnicodeString pattern; 591 pattern.setTo(temp); 592 593 #if !UCONFIG_NO_BREAK_ITERATION 594 if (breaker != NULL) { 595 breaker->setText(text); 596 } 597 #endif 598 collator->setStrength(getECollationStrength(search->strength)); 599 strsrch = new StringSearch(pattern, text, (RuleBasedCollator *)collator, 600 breaker, status); 601 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, canonical, status); 602 strsrch->setAttribute(USEARCH_OVERLAP, overlap, status); 603 604 if (U_FAILURE(status)) { 605 errln("Error opening string search %s", u_errorName(status)); 606 return FALSE; 607 } 608 609 if (!assertEqualWithStringSearch(strsrch, search)) { 610 collator->setStrength(getECollationStrength(UCOL_TERTIARY)); 611 delete strsrch; 612 return FALSE; 613 } 614 collator->setStrength(getECollationStrength(UCOL_TERTIARY)); 615 delete strsrch; 616 return TRUE; 617 } 618 619 void StringSearchTest::TestOpenClose() 620 { 621 UErrorCode status = U_ZERO_ERROR; 622 StringSearch *result; 623 BreakIterator *breakiter = m_en_wordbreaker_; 624 UnicodeString pattern; 625 UnicodeString text; 626 UnicodeString temp("a"); 627 StringCharacterIterator chariter(text); 628 629 /* testing null arguments */ 630 result = new StringSearch(pattern, text, NULL, NULL, status); 631 if (U_SUCCESS(status)) { 632 errln("Error: NULL arguments should produce an error"); 633 } 634 delete result; 635 636 chariter.setText(text); 637 status = U_ZERO_ERROR; 638 result = new StringSearch(pattern, chariter, NULL, NULL, status); 639 if (U_SUCCESS(status)) { 640 errln("Error: NULL arguments should produce an error"); 641 } 642 delete result; 643 644 text.append(0, 0x1); 645 status = U_ZERO_ERROR; 646 result = new StringSearch(pattern, text, NULL, NULL, status); 647 if (U_SUCCESS(status)) { 648 errln("Error: Empty pattern should produce an error"); 649 } 650 delete result; 651 652 chariter.setText(text); 653 status = U_ZERO_ERROR; 654 result = new StringSearch(pattern, chariter, NULL, NULL, status); 655 if (U_SUCCESS(status)) { 656 errln("Error: Empty pattern should produce an error"); 657 } 658 delete result; 659 660 text.remove(); 661 pattern.append(temp); 662 status = U_ZERO_ERROR; 663 result = new StringSearch(pattern, text, NULL, NULL, status); 664 if (U_SUCCESS(status)) { 665 errln("Error: Empty text should produce an error"); 666 } 667 delete result; 668 669 chariter.setText(text); 670 status = U_ZERO_ERROR; 671 result = new StringSearch(pattern, chariter, NULL, NULL, status); 672 if (U_SUCCESS(status)) { 673 errln("Error: Empty text should produce an error"); 674 } 675 delete result; 676 677 text.append(temp); 678 status = U_ZERO_ERROR; 679 result = new StringSearch(pattern, text, NULL, NULL, status); 680 if (U_SUCCESS(status)) { 681 errln("Error: NULL arguments should produce an error"); 682 } 683 delete result; 684 685 chariter.setText(text); 686 status = U_ZERO_ERROR; 687 result = new StringSearch(pattern, chariter, NULL, NULL, status); 688 if (U_SUCCESS(status)) { 689 errln("Error: NULL arguments should produce an error"); 690 } 691 delete result; 692 693 status = U_ZERO_ERROR; 694 result = new StringSearch(pattern, text, m_en_us_, NULL, status); 695 if (U_FAILURE(status)) { 696 errln("Error: NULL break iterator is valid for opening search"); 697 } 698 delete result; 699 700 status = U_ZERO_ERROR; 701 result = new StringSearch(pattern, chariter, m_en_us_, NULL, status); 702 if (U_FAILURE(status)) { 703 errln("Error: NULL break iterator is valid for opening search"); 704 } 705 delete result; 706 707 status = U_ZERO_ERROR; 708 result = new StringSearch(pattern, text, Locale::getEnglish(), NULL, status); 709 if (U_FAILURE(status) || result == NULL) { 710 errln("Error: NULL break iterator is valid for opening search"); 711 } 712 delete result; 713 714 status = U_ZERO_ERROR; 715 result = new StringSearch(pattern, chariter, Locale::getEnglish(), NULL, status); 716 if (U_FAILURE(status)) { 717 errln("Error: NULL break iterator is valid for opening search"); 718 } 719 delete result; 720 721 status = U_ZERO_ERROR; 722 result = new StringSearch(pattern, text, m_en_us_, breakiter, status); 723 if (U_FAILURE(status)) { 724 errln("Error: Break iterator is valid for opening search"); 725 } 726 delete result; 727 728 status = U_ZERO_ERROR; 729 result = new StringSearch(pattern, chariter, m_en_us_, NULL, status); 730 if (U_FAILURE(status)) { 731 errln("Error: Break iterator is valid for opening search"); 732 } 733 delete result; 734 } 735 736 void StringSearchTest::TestInitialization() 737 { 738 UErrorCode status = U_ZERO_ERROR; 739 UnicodeString pattern; 740 UnicodeString text; 741 UnicodeString temp("a"); 742 StringSearch *result; 743 int count; 744 745 /* simple test on the pattern ce construction */ 746 pattern.append(temp); 747 pattern.append(temp); 748 text.append(temp); 749 text.append(temp); 750 text.append(temp); 751 result = new StringSearch(pattern, text, m_en_us_, NULL, status); 752 if (U_FAILURE(status)) { 753 errln("Error opening search %s", u_errorName(status)); 754 } 755 StringSearch *copy = new StringSearch(*result); 756 if (*(copy->getCollator()) != *(result->getCollator()) || 757 copy->getBreakIterator() != result->getBreakIterator() || 758 copy->getMatchedLength() != result->getMatchedLength() || 759 copy->getMatchedStart() != result->getMatchedStart() || 760 copy->getOffset() != result->getOffset() || 761 copy->getPattern() != result->getPattern() || 762 copy->getText() != result->getText() || 763 *(copy) != *(result)) 764 { 765 errln("Error copying StringSearch"); 766 } 767 delete copy; 768 769 copy = (StringSearch *)result->safeClone(); 770 if (*(copy->getCollator()) != *(result->getCollator()) || 771 copy->getBreakIterator() != result->getBreakIterator() || 772 copy->getMatchedLength() != result->getMatchedLength() || 773 copy->getMatchedStart() != result->getMatchedStart() || 774 copy->getOffset() != result->getOffset() || 775 copy->getPattern() != result->getPattern() || 776 copy->getText() != result->getText() || 777 *(copy) != *(result)) { 778 errln("Error copying StringSearch"); 779 } 780 delete result; 781 782 /* testing if an extremely large pattern will fail the initialization */ 783 for (count = 0; count < 512; count ++) { 784 pattern.append(temp); 785 } 786 result = new StringSearch(pattern, text, m_en_us_, NULL, status); 787 if (*result != *result) { 788 errln("Error: string search object expected to match itself"); 789 } 790 if (*result == *copy) { 791 errln("Error: string search objects are not expected to match"); 792 } 793 *copy = *result; 794 if (*(copy->getCollator()) != *(result->getCollator()) || 795 copy->getBreakIterator() != result->getBreakIterator() || 796 copy->getMatchedLength() != result->getMatchedLength() || 797 copy->getMatchedStart() != result->getMatchedStart() || 798 copy->getOffset() != result->getOffset() || 799 copy->getPattern() != result->getPattern() || 800 copy->getText() != result->getText() || 801 *(copy) != *(result)) { 802 errln("Error copying StringSearch"); 803 } 804 if (U_FAILURE(status)) { 805 errln("Error opening search %s", u_errorName(status)); 806 } 807 delete result; 808 delete copy; 809 } 810 811 void StringSearchTest::TestBasic() 812 { 813 int count = 0; 814 while (BASIC[count].text != NULL) { 815 //printf("count %d", count); 816 if (!assertEqual(&BASIC[count])) { 817 infoln("Error at test number %d", count); 818 } 819 count ++; 820 } 821 } 822 823 void StringSearchTest::TestNormExact() 824 { 825 int count = 0; 826 UErrorCode status = U_ZERO_ERROR; 827 m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 828 if (U_FAILURE(status)) { 829 errln("Error setting collation normalization %s", 830 u_errorName(status)); 831 } 832 while (BASIC[count].text != NULL) { 833 if (!assertEqual(&BASIC[count])) { 834 infoln("Error at test number %d", count); 835 } 836 count ++; 837 } 838 count = 0; 839 while (NORMEXACT[count].text != NULL) { 840 if (!assertEqual(&NORMEXACT[count])) { 841 infoln("Error at test number %d", count); 842 } 843 count ++; 844 } 845 m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); 846 count = 0; 847 while (NONNORMEXACT[count].text != NULL) { 848 if (!assertEqual(&NONNORMEXACT[count])) { 849 infoln("Error at test number %d", count); 850 } 851 count ++; 852 } 853 } 854 855 void StringSearchTest::TestStrength() 856 { 857 int count = 0; 858 while (STRENGTH[count].text != NULL) { 859 if (!assertEqual(&STRENGTH[count])) { 860 infoln("Error at test number %d", count); 861 } 862 count ++; 863 } 864 } 865 866 #if !UCONFIG_NO_BREAK_ITERATION 867 868 void StringSearchTest::TestBreakIterator() 869 { 870 UChar temp[128]; 871 u_unescape(BREAKITERATOREXACT[0].text, temp, 128); 872 UnicodeString text; 873 text.setTo(temp, u_strlen(temp)); 874 u_unescape(BREAKITERATOREXACT[0].pattern, temp, 128); 875 UnicodeString pattern; 876 pattern.setTo(temp, u_strlen(temp)); 877 878 UErrorCode status = U_ZERO_ERROR; 879 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, 880 status); 881 if (U_FAILURE(status)) { 882 errln("Error opening string search %s", u_errorName(status)); 883 } 884 885 strsrch->setBreakIterator(NULL, status); 886 if (U_FAILURE(status) || strsrch->getBreakIterator() != NULL) { 887 errln("Error usearch_getBreakIterator returned wrong object"); 888 } 889 890 strsrch->setBreakIterator(m_en_characterbreaker_, status); 891 if (U_FAILURE(status) || 892 strsrch->getBreakIterator() != m_en_characterbreaker_) { 893 errln("Error usearch_getBreakIterator returned wrong object"); 894 } 895 896 strsrch->setBreakIterator(m_en_wordbreaker_, status); 897 if (U_FAILURE(status) || 898 strsrch->getBreakIterator() != m_en_wordbreaker_) { 899 errln("Error usearch_getBreakIterator returned wrong object"); 900 } 901 902 delete strsrch; 903 904 int count = 0; 905 while (count < 4) { 906 // special purposes for tests numbers 0-3 907 const SearchData *search = &(BREAKITERATOREXACT[count]); 908 RuleBasedCollator *collator = getCollator(search->collator); 909 BreakIterator *breaker = getBreakIterator(search->breaker); 910 StringSearch *strsrch; 911 912 u_unescape(search->text, temp, 128); 913 text.setTo(temp, u_strlen(temp)); 914 u_unescape(search->pattern, temp, 128); 915 pattern.setTo(temp, u_strlen(temp)); 916 if (breaker != NULL) { 917 breaker->setText(text); 918 } 919 collator->setStrength(getECollationStrength(search->strength)); 920 921 strsrch = new StringSearch(pattern, text, collator, breaker, status); 922 if (U_FAILURE(status) || 923 strsrch->getBreakIterator() != breaker) { 924 errln("Error setting break iterator"); 925 if (strsrch != NULL) { 926 delete strsrch; 927 } 928 } 929 if (!assertEqualWithStringSearch(strsrch, search)) { 930 collator->setStrength(getECollationStrength(UCOL_TERTIARY)); 931 delete strsrch; 932 } 933 search = &(BREAKITERATOREXACT[count + 1]); 934 breaker = getBreakIterator(search->breaker); 935 if (breaker != NULL) { 936 breaker->setText(text); 937 } 938 strsrch->setBreakIterator(breaker, status); 939 if (U_FAILURE(status) || 940 strsrch->getBreakIterator() != breaker) { 941 errln("Error setting break iterator"); 942 delete strsrch; 943 } 944 strsrch->reset(); 945 if (!assertEqualWithStringSearch(strsrch, search)) { 946 infoln("Error at test number %d", count); 947 } 948 delete strsrch; 949 count += 2; 950 } 951 count = 0; 952 while (BREAKITERATOREXACT[count].text != NULL) { 953 if (!assertEqual(&BREAKITERATOREXACT[count])) { 954 infoln("Error at test number %d", count); 955 } 956 count ++; 957 } 958 } 959 960 #endif 961 962 void StringSearchTest::TestVariable() 963 { 964 int count = 0; 965 UErrorCode status = U_ZERO_ERROR; 966 m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); 967 if (U_FAILURE(status)) { 968 errln("Error setting collation alternate attribute %s", 969 u_errorName(status)); 970 } 971 while (VARIABLE[count].text != NULL) { 972 logln("variable %d", count); 973 if (!assertEqual(&VARIABLE[count])) { 974 infoln("Error at test number %d", count); 975 } 976 count ++; 977 } 978 m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, 979 status); 980 } 981 982 void StringSearchTest::TestOverlap() 983 { 984 int count = 0; 985 while (OVERLAP[count].text != NULL) { 986 if (!assertEqualWithAttribute(&OVERLAP[count], USEARCH_OFF, 987 USEARCH_ON)) { 988 errln("Error at overlap test number %d", count); 989 } 990 count ++; 991 } 992 count = 0; 993 while (NONOVERLAP[count].text != NULL) { 994 if (!assertEqual(&NONOVERLAP[count])) { 995 errln("Error at non overlap test number %d", count); 996 } 997 count ++; 998 } 999 1000 count = 0; 1001 while (count < 1) { 1002 const SearchData *search = &(OVERLAP[count]); 1003 UChar temp[128]; 1004 u_unescape(search->text, temp, 128); 1005 UnicodeString text; 1006 text.setTo(temp, u_strlen(temp)); 1007 u_unescape(search->pattern, temp, 128); 1008 UnicodeString pattern; 1009 pattern.setTo(temp, u_strlen(temp)); 1010 1011 RuleBasedCollator *collator = getCollator(search->collator); 1012 UErrorCode status = U_ZERO_ERROR; 1013 StringSearch *strsrch = new StringSearch(pattern, text, 1014 collator, NULL, 1015 status); 1016 1017 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status); 1018 if (U_FAILURE(status) || 1019 strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_ON) { 1020 errln("Error setting overlap option"); 1021 } 1022 if (!assertEqualWithStringSearch(strsrch, search)) { 1023 delete strsrch; 1024 return; 1025 } 1026 1027 search = &(NONOVERLAP[count]); 1028 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_OFF, status); 1029 if (U_FAILURE(status) || 1030 strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) { 1031 errln("Error setting overlap option"); 1032 } 1033 strsrch->reset(); 1034 if (!assertEqualWithStringSearch(strsrch, search)) { 1035 delete strsrch; 1036 errln("Error at test number %d", count); 1037 } 1038 1039 count ++; 1040 delete strsrch; 1041 } 1042 } 1043 1044 void StringSearchTest::TestCollator() 1045 { 1046 // test collator that thinks "o" and "p" are the same thing 1047 UChar temp[128]; 1048 u_unescape(COLLATOR[0].text, temp, 128); 1049 UnicodeString text; 1050 text.setTo(temp, u_strlen(temp)); 1051 u_unescape(COLLATOR[0].pattern, temp, 128); 1052 UnicodeString pattern; 1053 pattern.setTo(temp, u_strlen(temp)); 1054 1055 UErrorCode status = U_ZERO_ERROR; 1056 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, 1057 status); 1058 if (U_FAILURE(status)) { 1059 errln("Error opening string search %s", u_errorName(status)); 1060 delete strsrch; 1061 return; 1062 } 1063 if (!assertEqualWithStringSearch(strsrch, &COLLATOR[0])) { 1064 delete strsrch; 1065 return; 1066 } 1067 1068 u_unescape(TESTCOLLATORRULE, temp, 128); 1069 UnicodeString rules; 1070 rules.setTo(temp, u_strlen(temp)); 1071 RuleBasedCollator *tailored = new RuleBasedCollator(rules, status); 1072 tailored->setStrength(getECollationStrength(COLLATOR[1].strength)); 1073 1074 if (U_FAILURE(status)) { 1075 errln("Error opening rule based collator %s", u_errorName(status)); 1076 delete strsrch; 1077 delete tailored; 1078 return; 1079 } 1080 1081 strsrch->setCollator(tailored, status); 1082 if (U_FAILURE(status) || (*strsrch->getCollator()) != (*tailored)) { 1083 errln("Error setting rule based collator"); 1084 delete strsrch; 1085 delete tailored; 1086 } 1087 strsrch->reset(); 1088 if (!assertEqualWithStringSearch(strsrch, &COLLATOR[1])) { 1089 delete strsrch; 1090 delete tailored; 1091 return; 1092 } 1093 1094 strsrch->setCollator(m_en_us_, status); 1095 strsrch->reset(); 1096 if (U_FAILURE(status) || (*strsrch->getCollator()) != (*m_en_us_)) { 1097 errln("Error setting rule based collator"); 1098 delete strsrch; 1099 delete tailored; 1100 } 1101 if (!assertEqualWithStringSearch(strsrch, &COLLATOR[0])) { 1102 errln("Error searching collator test"); 1103 } 1104 delete strsrch; 1105 delete tailored; 1106 } 1107 1108 void StringSearchTest::TestPattern() 1109 { 1110 1111 UChar temp[512]; 1112 int templength; 1113 u_unescape(PATTERN[0].text, temp, 512); 1114 UnicodeString text; 1115 text.setTo(temp, u_strlen(temp)); 1116 u_unescape(PATTERN[0].pattern, temp, 512); 1117 UnicodeString pattern; 1118 pattern.setTo(temp, u_strlen(temp)); 1119 1120 m_en_us_->setStrength(getECollationStrength(PATTERN[0].strength)); 1121 UErrorCode status = U_ZERO_ERROR; 1122 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, 1123 status); 1124 1125 if (U_FAILURE(status)) { 1126 errln("Error opening string search %s", u_errorName(status)); 1127 m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY)); 1128 if (strsrch != NULL) { 1129 delete strsrch; 1130 } 1131 return; 1132 } 1133 if (strsrch->getPattern() != pattern) { 1134 errln("Error setting pattern"); 1135 } 1136 if (!assertEqualWithStringSearch(strsrch, &PATTERN[0])) { 1137 m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY)); 1138 if (strsrch != NULL) { 1139 delete strsrch; 1140 } 1141 return; 1142 } 1143 1144 u_unescape(PATTERN[1].pattern, temp, 512); 1145 pattern.setTo(temp, u_strlen(temp)); 1146 strsrch->setPattern(pattern, status); 1147 if (pattern != strsrch->getPattern()) { 1148 errln("Error setting pattern"); 1149 m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY)); 1150 if (strsrch != NULL) { 1151 delete strsrch; 1152 } 1153 return; 1154 } 1155 strsrch->reset(); 1156 if (U_FAILURE(status)) { 1157 errln("Error setting pattern %s", u_errorName(status)); 1158 } 1159 if (!assertEqualWithStringSearch(strsrch, &PATTERN[1])) { 1160 m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY)); 1161 if (strsrch != NULL) { 1162 delete strsrch; 1163 } 1164 return; 1165 } 1166 1167 u_unescape(PATTERN[0].pattern, temp, 512); 1168 pattern.setTo(temp, u_strlen(temp)); 1169 strsrch->setPattern(pattern, status); 1170 if (pattern != strsrch->getPattern()) { 1171 errln("Error setting pattern"); 1172 m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY)); 1173 if (strsrch != NULL) { 1174 delete strsrch; 1175 } 1176 return; 1177 } 1178 strsrch->reset(); 1179 if (U_FAILURE(status)) { 1180 errln("Error setting pattern %s", u_errorName(status)); 1181 } 1182 if (!assertEqualWithStringSearch(strsrch, &PATTERN[0])) { 1183 m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY)); 1184 if (strsrch != NULL) { 1185 delete strsrch; 1186 } 1187 return; 1188 } 1189 /* enormous pattern size to see if this crashes */ 1190 for (templength = 0; templength != 512; templength ++) { 1191 temp[templength] = 0x61; 1192 } 1193 temp[511] = 0; 1194 pattern.setTo(temp, 511); 1195 strsrch->setPattern(pattern, status); 1196 if (U_FAILURE(status)) { 1197 errln("Error setting pattern with size 512, %s", u_errorName(status)); 1198 } 1199 m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY)); 1200 if (strsrch != NULL) { 1201 delete strsrch; 1202 } 1203 } 1204 1205 void StringSearchTest::TestText() 1206 { 1207 UChar temp[128]; 1208 u_unescape(TEXT[0].text, temp, 128); 1209 UnicodeString text; 1210 text.setTo(temp, u_strlen(temp)); 1211 u_unescape(TEXT[0].pattern, temp, 128); 1212 UnicodeString pattern; 1213 pattern.setTo(temp, u_strlen(temp)); 1214 1215 UErrorCode status = U_ZERO_ERROR; 1216 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, 1217 status); 1218 if (U_FAILURE(status)) { 1219 errln("Error opening string search %s", u_errorName(status)); 1220 return; 1221 } 1222 if (text != strsrch->getText()) { 1223 errln("Error setting text"); 1224 } 1225 if (!assertEqualWithStringSearch(strsrch, &TEXT[0])) { 1226 delete strsrch; 1227 return; 1228 } 1229 1230 u_unescape(TEXT[1].text, temp, 128); 1231 text.setTo(temp, u_strlen(temp)); 1232 strsrch->setText(text, status); 1233 if (text != strsrch->getText()) { 1234 errln("Error setting text"); 1235 delete strsrch; 1236 return; 1237 } 1238 if (U_FAILURE(status)) { 1239 errln("Error setting text %s", u_errorName(status)); 1240 } 1241 if (!assertEqualWithStringSearch(strsrch, &TEXT[1])) { 1242 delete strsrch; 1243 return; 1244 } 1245 1246 u_unescape(TEXT[0].text, temp, 128); 1247 text.setTo(temp, u_strlen(temp)); 1248 StringCharacterIterator chariter(text); 1249 strsrch->setText(chariter, status); 1250 if (text != strsrch->getText()) { 1251 errln("Error setting text"); 1252 delete strsrch; 1253 return; 1254 } 1255 if (U_FAILURE(status)) { 1256 errln("Error setting pattern %s", u_errorName(status)); 1257 } 1258 if (!assertEqualWithStringSearch(strsrch, &TEXT[0])) { 1259 errln("Error searching within set text"); 1260 } 1261 delete strsrch; 1262 } 1263 1264 void StringSearchTest::TestCompositeBoundaries() 1265 { 1266 int count = 0; 1267 while (COMPOSITEBOUNDARIES[count].text != NULL) { 1268 logln("composite %d", count); 1269 if (!assertEqual(&COMPOSITEBOUNDARIES[count])) { 1270 errln("Error at test number %d", count); 1271 } 1272 count ++; 1273 } 1274 } 1275 1276 void StringSearchTest::TestGetSetOffset() 1277 { 1278 UErrorCode status = U_ZERO_ERROR; 1279 UnicodeString pattern("1234567890123456"); 1280 UnicodeString text("12345678901234567890123456789012"); 1281 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, 1282 NULL, status); 1283 /* testing out of bounds error */ 1284 strsrch->setOffset(-1, status); 1285 if (U_SUCCESS(status)) { 1286 errln("Error expecting set offset error"); 1287 } 1288 strsrch->setOffset(128, status); 1289 if (U_SUCCESS(status)) { 1290 errln("Error expecting set offset error"); 1291 } 1292 int index = 0; 1293 while (BASIC[index].text != NULL) { 1294 UErrorCode status = U_ZERO_ERROR; 1295 SearchData search = BASIC[index ++]; 1296 UChar temp[128]; 1297 1298 u_unescape(search.text, temp, 128); 1299 text.setTo(temp, u_strlen(temp)); 1300 u_unescape(search.pattern, temp, 128); 1301 pattern.setTo(temp, u_strlen(temp)); 1302 strsrch->setText(text, status); 1303 strsrch->setPattern(pattern, status); 1304 strsrch->getCollator()->setStrength(getECollationStrength( 1305 search.strength)); 1306 strsrch->reset(); 1307 1308 int count = 0; 1309 int32_t matchindex = search.offset[count]; 1310 while (U_SUCCESS(status) && matchindex >= 0) { 1311 int32_t matchlength = search.size[count]; 1312 strsrch->next(status); 1313 if (matchindex != strsrch->getMatchedStart() || 1314 matchlength != strsrch->getMatchedLength()) { 1315 char *str = toCharString(strsrch->getText()); 1316 errln("Text: %s", str); 1317 str = toCharString(strsrch->getPattern()); 1318 errln("Pattern: %s", str); 1319 errln("Error match found at %d %d", 1320 strsrch->getMatchedStart(), 1321 strsrch->getMatchedLength()); 1322 return; 1323 } 1324 matchindex = search.offset[count + 1] == -1 ? -1 : 1325 search.offset[count + 2]; 1326 if (search.offset[count + 1] != -1) { 1327 strsrch->setOffset(search.offset[count + 1] + 1, status); 1328 if (strsrch->getOffset() != search.offset[count + 1] + 1) { 1329 errln("Error setting offset\n"); 1330 return; 1331 } 1332 } 1333 1334 count += 2; 1335 } 1336 strsrch->next(status); 1337 if (strsrch->getMatchedStart() != USEARCH_DONE) { 1338 char *str = toCharString(strsrch->getText()); 1339 errln("Text: %s", str); 1340 str = toCharString(strsrch->getPattern()); 1341 errln("Pattern: %s", str); 1342 errln("Error match found at %d %d", 1343 strsrch->getMatchedStart(), 1344 strsrch->getMatchedLength()); 1345 return; 1346 } 1347 } 1348 strsrch->getCollator()->setStrength(getECollationStrength( 1349 UCOL_TERTIARY)); 1350 delete strsrch; 1351 } 1352 1353 void StringSearchTest::TestGetSetAttribute() 1354 { 1355 UErrorCode status = U_ZERO_ERROR; 1356 UnicodeString pattern("pattern"); 1357 UnicodeString text("text"); 1358 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, 1359 status); 1360 if (U_FAILURE(status)) { 1361 errln("Error opening search %s", u_errorName(status)); 1362 return; 1363 } 1364 1365 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_DEFAULT, status); 1366 if (U_FAILURE(status) || 1367 strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) { 1368 errln("Error setting overlap to the default"); 1369 } 1370 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status); 1371 if (U_FAILURE(status) || 1372 strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_ON) { 1373 errln("Error setting overlap true"); 1374 } 1375 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_OFF, status); 1376 if (U_FAILURE(status) || 1377 strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) { 1378 errln("Error setting overlap false"); 1379 } 1380 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ATTRIBUTE_VALUE_COUNT, 1381 status); 1382 if (U_SUCCESS(status)) { 1383 errln("Error setting overlap to illegal value"); 1384 } 1385 status = U_ZERO_ERROR; 1386 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_DEFAULT, status); 1387 if (U_FAILURE(status) || 1388 strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_OFF) { 1389 errln("Error setting canonical match to the default"); 1390 } 1391 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); 1392 if (U_FAILURE(status) || 1393 strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_ON) { 1394 errln("Error setting canonical match true"); 1395 } 1396 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_OFF, status); 1397 if (U_FAILURE(status) || 1398 strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_OFF) { 1399 errln("Error setting canonical match false"); 1400 } 1401 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, 1402 USEARCH_ATTRIBUTE_VALUE_COUNT, status); 1403 if (U_SUCCESS(status)) { 1404 errln("Error setting canonical match to illegal value"); 1405 } 1406 status = U_ZERO_ERROR; 1407 strsrch->setAttribute(USEARCH_ATTRIBUTE_COUNT, USEARCH_DEFAULT, status); 1408 if (U_SUCCESS(status)) { 1409 errln("Error setting illegal attribute success"); 1410 } 1411 1412 delete strsrch; 1413 } 1414 1415 void StringSearchTest::TestGetMatch() 1416 { 1417 UChar temp[128]; 1418 SearchData search = MATCH[0]; 1419 u_unescape(search.text, temp, 128); 1420 UnicodeString text; 1421 text.setTo(temp, u_strlen(temp)); 1422 u_unescape(search.pattern, temp, 128); 1423 UnicodeString pattern; 1424 pattern.setTo(temp, u_strlen(temp)); 1425 1426 UErrorCode status = U_ZERO_ERROR; 1427 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, 1428 status); 1429 if (U_FAILURE(status)) { 1430 errln("Error opening string search %s", u_errorName(status)); 1431 if (strsrch != NULL) { 1432 delete strsrch; 1433 } 1434 return; 1435 } 1436 1437 int count = 0; 1438 int32_t matchindex = search.offset[count]; 1439 UnicodeString matchtext; 1440 while (U_SUCCESS(status) && matchindex >= 0) { 1441 int32_t matchlength = search.size[count]; 1442 strsrch->next(status); 1443 if (matchindex != strsrch->getMatchedStart() || 1444 matchlength != strsrch->getMatchedLength()) { 1445 char *str = toCharString(strsrch->getText()); 1446 errln("Text: %s", str); 1447 str = toCharString(strsrch->getPattern()); 1448 errln("Pattern: %s", str); 1449 errln("Error match found at %d %d", strsrch->getMatchedStart(), 1450 strsrch->getMatchedLength()); 1451 return; 1452 } 1453 count ++; 1454 1455 status = U_ZERO_ERROR; 1456 strsrch->getMatchedText(matchtext); 1457 if (matchtext.length() != matchlength || U_FAILURE(status)){ 1458 errln("Error getting match text"); 1459 } 1460 matchindex = search.offset[count]; 1461 } 1462 status = U_ZERO_ERROR; 1463 strsrch->next(status); 1464 if (strsrch->getMatchedStart() != USEARCH_DONE || 1465 strsrch->getMatchedLength() != 0) { 1466 errln("Error end of match not found"); 1467 } 1468 status = U_ZERO_ERROR; 1469 strsrch->getMatchedText(matchtext); 1470 if (matchtext.length() != 0) { 1471 errln("Error getting null matches"); 1472 } 1473 delete strsrch; 1474 } 1475 1476 void StringSearchTest::TestSetMatch() 1477 { 1478 int count = 0; 1479 while (MATCH[count].text != NULL) { 1480 SearchData search = MATCH[count]; 1481 UChar temp[128]; 1482 UErrorCode status = U_ZERO_ERROR; 1483 u_unescape(search.text, temp, 128); 1484 UnicodeString text; 1485 text.setTo(temp, u_strlen(temp)); 1486 u_unescape(search.pattern, temp, 128); 1487 UnicodeString pattern; 1488 pattern.setTo(temp, u_strlen(temp)); 1489 1490 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, 1491 NULL, status); 1492 if (U_FAILURE(status)) { 1493 errln("Error opening string search %s", u_errorName(status)); 1494 if (strsrch != NULL) { 1495 delete strsrch; 1496 } 1497 return; 1498 } 1499 1500 int size = 0; 1501 while (search.offset[size] != -1) { 1502 size ++; 1503 } 1504 1505 if (strsrch->first(status) != search.offset[0] || U_FAILURE(status)) { 1506 errln("Error getting first match"); 1507 } 1508 if (strsrch->last(status) != search.offset[size -1] || 1509 U_FAILURE(status)) { 1510 errln("Error getting last match"); 1511 } 1512 1513 int index = 0; 1514 while (index < size) { 1515 if (index + 2 < size) { 1516 if (strsrch->following(search.offset[index + 2] - 1, status) 1517 != search.offset[index + 2] || U_FAILURE(status)) { 1518 errln("Error getting following match at index %d", 1519 search.offset[index + 2] - 1); 1520 } 1521 } 1522 if (index + 1 < size) { 1523 if (strsrch->preceding(search.offset[index + 1] + 1524 search.size[index + 1] + 1, 1525 status) != search.offset[index + 1] || 1526 U_FAILURE(status)) { 1527 errln("Error getting preceeding match at index %d", 1528 search.offset[index + 1] + 1); 1529 } 1530 } 1531 index += 2; 1532 } 1533 status = U_ZERO_ERROR; 1534 if (strsrch->following(text.length(), status) != USEARCH_DONE) { 1535 errln("Error expecting out of bounds match"); 1536 } 1537 if (strsrch->preceding(0, status) != USEARCH_DONE) { 1538 errln("Error expecting out of bounds match"); 1539 } 1540 count ++; 1541 delete strsrch; 1542 } 1543 } 1544 1545 void StringSearchTest::TestReset() 1546 { 1547 UErrorCode status = U_ZERO_ERROR; 1548 UnicodeString text("fish fish"); 1549 UnicodeString pattern("s"); 1550 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, 1551 status); 1552 if (U_FAILURE(status)) { 1553 errln("Error opening string search %s", u_errorName(status)); 1554 if (strsrch != NULL) { 1555 delete strsrch; 1556 } 1557 return; 1558 } 1559 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status); 1560 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); 1561 strsrch->setOffset(9, status); 1562 if (U_FAILURE(status)) { 1563 errln("Error setting attributes and offsets"); 1564 } 1565 else { 1566 strsrch->reset(); 1567 if (strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF || 1568 strsrch->getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_OFF || 1569 strsrch->getOffset() != 0 || strsrch->getMatchedLength() != 0 || 1570 strsrch->getMatchedStart() != USEARCH_DONE) { 1571 errln("Error resetting string search"); 1572 } 1573 strsrch->previous(status); 1574 if (strsrch->getMatchedStart() != 7 || 1575 strsrch->getMatchedLength() != 1) { 1576 errln("Error resetting string search\n"); 1577 } 1578 } 1579 delete strsrch; 1580 } 1581 1582 void StringSearchTest::TestSupplementary() 1583 { 1584 int count = 0; 1585 while (SUPPLEMENTARY[count].text != NULL) { 1586 if (!assertEqual(&SUPPLEMENTARY[count])) { 1587 errln("Error at test number %d", count); 1588 } 1589 count ++; 1590 } 1591 } 1592 1593 void StringSearchTest::TestContraction() 1594 { 1595 UChar temp[128]; 1596 UErrorCode status = U_ZERO_ERROR; 1597 1598 u_unescape(CONTRACTIONRULE, temp, 128); 1599 UnicodeString rules; 1600 rules.setTo(temp, u_strlen(temp)); 1601 RuleBasedCollator *collator = new RuleBasedCollator(rules, 1602 getECollationStrength(UCOL_TERTIARY), UCOL_ON, status); 1603 if (U_FAILURE(status)) { 1604 errln("Error opening collator %s", u_errorName(status)); 1605 } 1606 UnicodeString text("text"); 1607 UnicodeString pattern("pattern"); 1608 StringSearch *strsrch = new StringSearch(pattern, text, collator, NULL, 1609 status); 1610 if (U_FAILURE(status)) { 1611 errln("Error opening string search %s", u_errorName(status)); 1612 } 1613 1614 int count = 0; 1615 while (CONTRACTION[count].text != NULL) { 1616 u_unescape(CONTRACTION[count].text, temp, 128); 1617 text.setTo(temp, u_strlen(temp)); 1618 u_unescape(CONTRACTION[count].pattern, temp, 128); 1619 pattern.setTo(temp, u_strlen(temp)); 1620 strsrch->setText(text, status); 1621 strsrch->setPattern(pattern, status); 1622 if (!assertEqualWithStringSearch(strsrch, &CONTRACTION[count])) { 1623 errln("Error at test number %d", count); 1624 } 1625 count ++; 1626 } 1627 delete strsrch; 1628 delete collator; 1629 } 1630 1631 void StringSearchTest::TestIgnorable() 1632 { 1633 UChar temp[128]; 1634 u_unescape(IGNORABLERULE, temp, 128); 1635 UnicodeString rules; 1636 rules.setTo(temp, u_strlen(temp)); 1637 UErrorCode status = U_ZERO_ERROR; 1638 int count = 0; 1639 RuleBasedCollator *collator = new RuleBasedCollator(rules, 1640 getECollationStrength(IGNORABLE[count].strength), 1641 UCOL_ON, status); 1642 if (U_FAILURE(status)) { 1643 errln("Error opening collator %s", u_errorName(status)); 1644 return; 1645 } 1646 UnicodeString pattern("pattern"); 1647 UnicodeString text("text"); 1648 StringSearch *strsrch = new StringSearch(pattern, text, collator, NULL, 1649 status); 1650 if (U_FAILURE(status)) { 1651 errln("Error opening string search %s", u_errorName(status)); 1652 delete collator; 1653 return; 1654 } 1655 1656 while (IGNORABLE[count].text != NULL) { 1657 u_unescape(IGNORABLE[count].text, temp, 128); 1658 text.setTo(temp, u_strlen(temp)); 1659 u_unescape(IGNORABLE[count].pattern, temp, 128); 1660 pattern.setTo(temp, u_strlen(temp)); 1661 strsrch->setText(text, status); 1662 strsrch->setPattern(pattern, status); 1663 if (!assertEqualWithStringSearch(strsrch, &IGNORABLE[count])) { 1664 errln("Error at test number %d", count); 1665 } 1666 count ++; 1667 } 1668 delete strsrch; 1669 delete collator; 1670 } 1671 1672 void StringSearchTest::TestDiacriticMatch() 1673 { 1674 UChar temp[128]; 1675 UErrorCode status = U_ZERO_ERROR; 1676 int count = 0; 1677 RuleBasedCollator* coll = NULL; 1678 StringSearch *strsrch = NULL; 1679 1680 UnicodeString pattern("pattern"); 1681 UnicodeString text("text"); 1682 1683 const SearchData *search; 1684 1685 search = &(DIACRITICMATCH[count]); 1686 while (search->text != NULL) { 1687 coll = getCollator(search->collator); 1688 coll->setStrength(getECollationStrength(search->strength)); 1689 strsrch = new StringSearch(pattern, text, coll, getBreakIterator(search->breaker), status); 1690 if (U_FAILURE(status)) { 1691 errln("Error opening string search %s", u_errorName(status)); 1692 return; 1693 } 1694 u_unescape(search->text, temp, 128); 1695 text.setTo(temp, u_strlen(temp)); 1696 u_unescape(search->pattern, temp, 128); 1697 pattern.setTo(temp, u_strlen(temp)); 1698 strsrch->setText(text, status); 1699 strsrch->setPattern(pattern, status); 1700 if (!assertEqualWithStringSearch(strsrch, search)) { 1701 errln("Error at test number %d", count); 1702 } 1703 search = &(DIACRITICMATCH[++count]); 1704 delete strsrch; 1705 } 1706 1707 } 1708 1709 void StringSearchTest::TestCanonical() 1710 { 1711 int count = 0; 1712 while (BASICCANONICAL[count].text != NULL) { 1713 if (!assertCanonicalEqual(&BASICCANONICAL[count])) { 1714 errln("Error at test number %d", count); 1715 } 1716 count ++; 1717 } 1718 } 1719 1720 void StringSearchTest::TestNormCanonical() 1721 { 1722 UErrorCode status = U_ZERO_ERROR; 1723 m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 1724 int count = 0; 1725 while (NORMCANONICAL[count].text != NULL) { 1726 if (!assertCanonicalEqual(&NORMCANONICAL[count])) { 1727 errln("Error at test number %d", count); 1728 } 1729 count ++; 1730 } 1731 m_en_us_->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); 1732 } 1733 1734 void StringSearchTest::TestStrengthCanonical() 1735 { 1736 int count = 0; 1737 while (STRENGTHCANONICAL[count].text != NULL) { 1738 if (!assertCanonicalEqual(&STRENGTHCANONICAL[count])) { 1739 errln("Error at test number %d", count); 1740 } 1741 count ++; 1742 } 1743 } 1744 1745 #if !UCONFIG_NO_BREAK_ITERATION 1746 1747 void StringSearchTest::TestBreakIteratorCanonical() 1748 { 1749 UErrorCode status = U_ZERO_ERROR; 1750 int count = 0; 1751 1752 while (count < 4) { 1753 // special purposes for tests numbers 0-3 1754 UChar temp[128]; 1755 const SearchData *search = &(BREAKITERATORCANONICAL[count]); 1756 1757 u_unescape(search->text, temp, 128); 1758 UnicodeString text; 1759 text.setTo(temp, u_strlen(temp)); 1760 u_unescape(search->pattern, temp, 128); 1761 UnicodeString pattern; 1762 pattern.setTo(temp, u_strlen(temp)); 1763 RuleBasedCollator *collator = getCollator(search->collator); 1764 collator->setStrength(getECollationStrength(search->strength)); 1765 1766 BreakIterator *breaker = getBreakIterator(search->breaker); 1767 StringSearch *strsrch = new StringSearch(pattern, text, collator, 1768 breaker, status); 1769 if (U_FAILURE(status)) { 1770 errln("Error creating string search data"); 1771 return; 1772 } 1773 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); 1774 if (U_FAILURE(status) || 1775 strsrch->getBreakIterator() != breaker) { 1776 errln("Error setting break iterator"); 1777 delete strsrch; 1778 return; 1779 } 1780 if (!assertEqualWithStringSearch(strsrch, search)) { 1781 collator->setStrength(getECollationStrength(UCOL_TERTIARY)); 1782 delete strsrch; 1783 return; 1784 } 1785 search = &(BREAKITERATOREXACT[count + 1]); 1786 breaker = getBreakIterator(search->breaker); 1787 if (breaker == NULL) { 1788 errln("Error creating BreakIterator"); 1789 return; 1790 } 1791 breaker->setText(strsrch->getText()); 1792 strsrch->setBreakIterator(breaker, status); 1793 if (U_FAILURE(status) || strsrch->getBreakIterator() != breaker) { 1794 errln("Error setting break iterator"); 1795 delete strsrch; 1796 return; 1797 } 1798 strsrch->reset(); 1799 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); 1800 if (!assertEqualWithStringSearch(strsrch, search)) { 1801 errln("Error at test number %d", count); 1802 return; 1803 } 1804 delete strsrch; 1805 count += 2; 1806 } 1807 count = 0; 1808 while (BREAKITERATORCANONICAL[count].text != NULL) { 1809 if (!assertEqual(&BREAKITERATORCANONICAL[count])) { 1810 errln("Error at test number %d", count); 1811 return; 1812 } 1813 count ++; 1814 } 1815 } 1816 1817 #endif 1818 1819 void StringSearchTest::TestVariableCanonical() 1820 { 1821 int count = 0; 1822 UErrorCode status = U_ZERO_ERROR; 1823 m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); 1824 if (U_FAILURE(status)) { 1825 errln("Error setting collation alternate attribute %s", 1826 u_errorName(status)); 1827 } 1828 while (VARIABLE[count].text != NULL) { 1829 logln("variable %d", count); 1830 if (!assertCanonicalEqual(&VARIABLE[count])) { 1831 errln("Error at test number %d", count); 1832 } 1833 count ++; 1834 } 1835 m_en_us_->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, 1836 status); 1837 } 1838 1839 void StringSearchTest::TestOverlapCanonical() 1840 { 1841 int count = 0; 1842 while (OVERLAPCANONICAL[count].text != NULL) { 1843 if (!assertEqualWithAttribute(&OVERLAPCANONICAL[count], USEARCH_ON, 1844 USEARCH_ON)) { 1845 errln("Error at overlap test number %d", count); 1846 } 1847 count ++; 1848 } 1849 count = 0; 1850 while (NONOVERLAP[count].text != NULL) { 1851 if (!assertCanonicalEqual(&NONOVERLAPCANONICAL[count])) { 1852 errln("Error at non overlap test number %d", count); 1853 } 1854 count ++; 1855 } 1856 1857 count = 0; 1858 while (count < 1) { 1859 UChar temp[128]; 1860 const SearchData *search = &(OVERLAPCANONICAL[count]); 1861 UErrorCode status = U_ZERO_ERROR; 1862 1863 u_unescape(search->text, temp, 128); 1864 UnicodeString text; 1865 text.setTo(temp, u_strlen(temp)); 1866 u_unescape(search->pattern, temp, 128); 1867 UnicodeString pattern; 1868 pattern.setTo(temp, u_strlen(temp)); 1869 RuleBasedCollator *collator = getCollator(search->collator); 1870 StringSearch *strsrch = new StringSearch(pattern, text, collator, 1871 NULL, status); 1872 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); 1873 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_ON, status); 1874 if (U_FAILURE(status) || 1875 strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_ON) { 1876 errln("Error setting overlap option"); 1877 } 1878 if (!assertEqualWithStringSearch(strsrch, search)) { 1879 delete strsrch; 1880 return; 1881 } 1882 search = &(NONOVERLAPCANONICAL[count]); 1883 strsrch->setAttribute(USEARCH_OVERLAP, USEARCH_OFF, status); 1884 if (U_FAILURE(status) || 1885 strsrch->getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) { 1886 errln("Error setting overlap option"); 1887 } 1888 strsrch->reset(); 1889 if (!assertEqualWithStringSearch(strsrch, search)) { 1890 delete strsrch; 1891 errln("Error at test number %d", count); 1892 } 1893 1894 count ++; 1895 delete strsrch; 1896 } 1897 } 1898 1899 void StringSearchTest::TestCollatorCanonical() 1900 { 1901 /* test collator that thinks "o" and "p" are the same thing */ 1902 UChar temp[128]; 1903 u_unescape(COLLATORCANONICAL[0].text, temp, 128); 1904 UnicodeString text; 1905 text.setTo(temp, u_strlen(temp)); 1906 u_unescape(COLLATORCANONICAL[0].pattern, temp, 128); 1907 UnicodeString pattern; 1908 pattern.setTo(temp, u_strlen(temp)); 1909 1910 UErrorCode status = U_ZERO_ERROR; 1911 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, 1912 NULL, status); 1913 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); 1914 if (U_FAILURE(status)) { 1915 errln("Error opening string search %s", u_errorName(status)); 1916 } 1917 if (!assertEqualWithStringSearch(strsrch, &COLLATORCANONICAL[0])) { 1918 delete strsrch; 1919 return; 1920 } 1921 1922 u_unescape(TESTCOLLATORRULE, temp, 128); 1923 UnicodeString rules; 1924 rules.setTo(temp, u_strlen(temp)); 1925 RuleBasedCollator *tailored = new RuleBasedCollator(rules, 1926 getECollationStrength(COLLATORCANONICAL[1].strength), 1927 UCOL_ON, status); 1928 1929 if (U_FAILURE(status)) { 1930 errln("Error opening rule based collator %s", u_errorName(status)); 1931 } 1932 1933 strsrch->setCollator(tailored, status); 1934 if (U_FAILURE(status) || *(strsrch->getCollator()) != *tailored) { 1935 errln("Error setting rule based collator"); 1936 } 1937 strsrch->reset(); 1938 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); 1939 if (!assertEqualWithStringSearch(strsrch, &COLLATORCANONICAL[1])) { 1940 delete strsrch; 1941 if (tailored != NULL) { 1942 delete tailored; 1943 } 1944 1945 return; 1946 } 1947 1948 strsrch->setCollator(m_en_us_, status); 1949 strsrch->reset(); 1950 if (U_FAILURE(status) || *(strsrch->getCollator()) != *m_en_us_) { 1951 errln("Error setting rule based collator"); 1952 } 1953 if (!assertEqualWithStringSearch(strsrch, &COLLATORCANONICAL[0])) { 1954 } 1955 delete strsrch; 1956 if (tailored != NULL) { 1957 delete tailored; 1958 } 1959 } 1960 1961 void StringSearchTest::TestPatternCanonical() 1962 { 1963 1964 UChar temp[128]; 1965 1966 u_unescape(PATTERNCANONICAL[0].text, temp, 128); 1967 UnicodeString text; 1968 text.setTo(temp, u_strlen(temp)); 1969 u_unescape(PATTERNCANONICAL[0].pattern, temp, 128); 1970 UnicodeString pattern; 1971 pattern.setTo(temp, u_strlen(temp)); 1972 1973 m_en_us_->setStrength( 1974 getECollationStrength(PATTERNCANONICAL[0].strength)); 1975 1976 UErrorCode status = U_ZERO_ERROR; 1977 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, 1978 status); 1979 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); 1980 if (U_FAILURE(status)) { 1981 errln("Error opening string search %s", u_errorName(status)); 1982 goto ENDTESTPATTERN; 1983 } 1984 if (pattern != strsrch->getPattern()) { 1985 errln("Error setting pattern"); 1986 } 1987 if (!assertEqualWithStringSearch(strsrch, &PATTERNCANONICAL[0])) { 1988 goto ENDTESTPATTERN; 1989 } 1990 1991 u_unescape(PATTERNCANONICAL[1].pattern, temp, 128); 1992 pattern.setTo(temp, u_strlen(temp)); 1993 strsrch->setPattern(pattern, status); 1994 if (pattern != strsrch->getPattern()) { 1995 errln("Error setting pattern"); 1996 goto ENDTESTPATTERN; 1997 } 1998 strsrch->reset(); 1999 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); 2000 if (U_FAILURE(status)) { 2001 errln("Error setting pattern %s", u_errorName(status)); 2002 } 2003 if (!assertEqualWithStringSearch(strsrch, &PATTERNCANONICAL[1])) { 2004 goto ENDTESTPATTERN; 2005 } 2006 2007 u_unescape(PATTERNCANONICAL[0].pattern, temp, 128); 2008 pattern.setTo(temp, u_strlen(temp)); 2009 strsrch->setPattern(pattern, status); 2010 if (pattern != strsrch->getPattern()) { 2011 errln("Error setting pattern"); 2012 goto ENDTESTPATTERN; 2013 } 2014 strsrch->reset(); 2015 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); 2016 if (U_FAILURE(status)) { 2017 errln("Error setting pattern %s", u_errorName(status)); 2018 } 2019 if (!assertEqualWithStringSearch(strsrch, &PATTERNCANONICAL[0])) { 2020 goto ENDTESTPATTERN; 2021 } 2022 ENDTESTPATTERN: 2023 m_en_us_->setStrength(getECollationStrength(UCOL_TERTIARY)); 2024 if (strsrch != NULL) { 2025 delete strsrch; 2026 } 2027 } 2028 2029 void StringSearchTest::TestTextCanonical() 2030 { 2031 UChar temp[128]; 2032 u_unescape(TEXTCANONICAL[0].text, temp, 128); 2033 UnicodeString text; 2034 text.setTo(temp, u_strlen(temp)); 2035 u_unescape(TEXTCANONICAL[0].pattern, temp, 128); 2036 UnicodeString pattern; 2037 pattern.setTo(temp, u_strlen(temp)); 2038 2039 UErrorCode status = U_ZERO_ERROR; 2040 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, 2041 status); 2042 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); 2043 2044 if (U_FAILURE(status)) { 2045 errln("Error opening string search %s", u_errorName(status)); 2046 goto ENDTESTPATTERN; 2047 } 2048 if (text != strsrch->getText()) { 2049 errln("Error setting text"); 2050 } 2051 if (!assertEqualWithStringSearch(strsrch, &TEXTCANONICAL[0])) { 2052 goto ENDTESTPATTERN; 2053 } 2054 2055 u_unescape(TEXTCANONICAL[1].text, temp, 128); 2056 text.setTo(temp, u_strlen(temp)); 2057 strsrch->setText(text, status); 2058 if (text != strsrch->getText()) { 2059 errln("Error setting text"); 2060 goto ENDTESTPATTERN; 2061 } 2062 if (U_FAILURE(status)) { 2063 errln("Error setting text %s", u_errorName(status)); 2064 } 2065 if (!assertEqualWithStringSearch(strsrch, &TEXTCANONICAL[1])) { 2066 goto ENDTESTPATTERN; 2067 } 2068 2069 u_unescape(TEXTCANONICAL[0].text, temp, 128); 2070 text.setTo(temp, u_strlen(temp)); 2071 strsrch->setText(text, status); 2072 if (text != strsrch->getText()) { 2073 errln("Error setting text"); 2074 goto ENDTESTPATTERN; 2075 } 2076 if (U_FAILURE(status)) { 2077 errln("Error setting pattern %s", u_errorName(status)); 2078 } 2079 if (!assertEqualWithStringSearch(strsrch, &TEXTCANONICAL[0])) { 2080 goto ENDTESTPATTERN; 2081 } 2082 ENDTESTPATTERN: 2083 if (strsrch != NULL) { 2084 delete strsrch; 2085 } 2086 } 2087 2088 void StringSearchTest::TestCompositeBoundariesCanonical() 2089 { 2090 int count = 0; 2091 while (COMPOSITEBOUNDARIESCANONICAL[count].text != NULL) { 2092 logln("composite %d", count); 2093 if (!assertCanonicalEqual(&COMPOSITEBOUNDARIESCANONICAL[count])) { 2094 errln("Error at test number %d", count); 2095 } 2096 count ++; 2097 } 2098 } 2099 2100 void StringSearchTest::TestGetSetOffsetCanonical() 2101 { 2102 2103 UErrorCode status = U_ZERO_ERROR; 2104 UnicodeString text("text"); 2105 UnicodeString pattern("pattern"); 2106 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, 2107 status); 2108 Collator *collator = strsrch->getCollator(); 2109 2110 collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 2111 2112 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); 2113 /* testing out of bounds error */ 2114 strsrch->setOffset(-1, status); 2115 if (U_SUCCESS(status)) { 2116 errln("Error expecting set offset error"); 2117 } 2118 strsrch->setOffset(128, status); 2119 if (U_SUCCESS(status)) { 2120 errln("Error expecting set offset error"); 2121 } 2122 int index = 0; 2123 UChar temp[128]; 2124 while (BASICCANONICAL[index].text != NULL) { 2125 SearchData search = BASICCANONICAL[index ++]; 2126 if (BASICCANONICAL[index].text == NULL) { 2127 /* skip the last one */ 2128 break; 2129 } 2130 2131 u_unescape(search.text, temp, 128); 2132 text.setTo(temp, u_strlen(temp)); 2133 u_unescape(search.pattern, temp, 128); 2134 pattern.setTo(temp, u_strlen(temp)); 2135 2136 UErrorCode status = U_ZERO_ERROR; 2137 strsrch->setText(text, status); 2138 2139 strsrch->setPattern(pattern, status); 2140 2141 int count = 0; 2142 int32_t matchindex = search.offset[count]; 2143 while (U_SUCCESS(status) && matchindex >= 0) { 2144 int32_t matchlength = search.size[count]; 2145 strsrch->next(status); 2146 if (matchindex != strsrch->getMatchedStart() || 2147 matchlength != strsrch->getMatchedLength()) { 2148 char *str = toCharString(strsrch->getText()); 2149 errln("Text: %s", str); 2150 str = toCharString(strsrch->getPattern()); 2151 errln("Pattern: %s", str); 2152 errln("Error match found at %d %d", 2153 strsrch->getMatchedStart(), 2154 strsrch->getMatchedLength()); 2155 goto bail; 2156 } 2157 matchindex = search.offset[count + 1] == -1 ? -1 : 2158 search.offset[count + 2]; 2159 if (search.offset[count + 1] != -1) { 2160 strsrch->setOffset(search.offset[count + 1] + 1, status); 2161 if (strsrch->getOffset() != search.offset[count + 1] + 1) { 2162 errln("Error setting offset"); 2163 goto bail; 2164 } 2165 } 2166 2167 count += 2; 2168 } 2169 strsrch->next(status); 2170 if (strsrch->getMatchedStart() != USEARCH_DONE) { 2171 char *str = toCharString(strsrch->getText()); 2172 errln("Text: %s", str); 2173 str = toCharString(strsrch->getPattern()); 2174 errln("Pattern: %s", str); 2175 errln("Error match found at %d %d", strsrch->getMatchedStart(), 2176 strsrch->getMatchedLength()); 2177 goto bail; 2178 } 2179 } 2180 2181 bail: 2182 collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); 2183 delete strsrch; 2184 } 2185 2186 void StringSearchTest::TestSupplementaryCanonical() 2187 { 2188 int count = 0; 2189 while (SUPPLEMENTARYCANONICAL[count].text != NULL) { 2190 if (!assertCanonicalEqual(&SUPPLEMENTARYCANONICAL[count])) { 2191 errln("Error at test number %d", count); 2192 } 2193 count ++; 2194 } 2195 } 2196 2197 void StringSearchTest::TestContractionCanonical() 2198 { 2199 UChar temp[128]; 2200 2201 u_unescape(CONTRACTIONRULE, temp, 128); 2202 UnicodeString rules; 2203 rules.setTo(temp, u_strlen(temp)); 2204 2205 UErrorCode status = U_ZERO_ERROR; 2206 RuleBasedCollator *collator = new RuleBasedCollator(rules, 2207 getECollationStrength(UCOL_TERTIARY), UCOL_ON, status); 2208 if (U_FAILURE(status)) { 2209 errln("Error opening collator %s", u_errorName(status)); 2210 } 2211 UnicodeString text("text"); 2212 UnicodeString pattern("pattern"); 2213 StringSearch *strsrch = new StringSearch(pattern, text, collator, NULL, 2214 status); 2215 strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status); 2216 if (U_FAILURE(status)) { 2217 errln("Error opening string search %s", u_errorName(status)); 2218 } 2219 2220 int count = 0; 2221 while (CONTRACTIONCANONICAL[count].text != NULL) { 2222 u_unescape(CONTRACTIONCANONICAL[count].text, temp, 128); 2223 text.setTo(temp, u_strlen(temp)); 2224 u_unescape(CONTRACTIONCANONICAL[count].pattern, temp, 128); 2225 pattern.setTo(temp, u_strlen(temp)); 2226 strsrch->setText(text, status); 2227 strsrch->setPattern(pattern, status); 2228 if (!assertEqualWithStringSearch(strsrch, 2229 &CONTRACTIONCANONICAL[count])) { 2230 errln("Error at test number %d", count); 2231 } 2232 count ++; 2233 } 2234 delete strsrch; 2235 delete collator; 2236 } 2237 2238 void StringSearchTest::TestUClassID() 2239 { 2240 char id = *((char *)StringSearch::getStaticClassID()); 2241 if (id != 0) { 2242 errln("Static class id for StringSearch should be 0"); 2243 } 2244 UErrorCode status = U_ZERO_ERROR; 2245 UnicodeString text("text"); 2246 UnicodeString pattern("pattern"); 2247 StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL, 2248 status); 2249 id = *((char *)strsrch->getDynamicClassID()); 2250 if (id != 0) { 2251 errln("Dynamic class id for StringSearch should be 0"); 2252 } 2253 delete strsrch; 2254 } 2255 2256 class TestSearch : public SearchIterator 2257 { 2258 public: 2259 TestSearch(const TestSearch &obj); 2260 TestSearch(const UnicodeString &text, 2261 BreakIterator *breakiter, 2262 const UnicodeString &pattern); 2263 ~TestSearch(); 2264 2265 void setOffset(int32_t position, UErrorCode &status); 2266 int32_t getOffset() const; 2267 SearchIterator* safeClone() const; 2268 2269 2270 /** 2271 * ICU "poor man's RTTI", returns a UClassID for the actual class. 2272 * 2273 * @draft ICU 2.2 2274 */ 2275 virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); } 2276 2277 /** 2278 * ICU "poor man's RTTI", returns a UClassID for this class. 2279 * 2280 * @draft ICU 2.2 2281 */ 2282 static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; } 2283 2284 UBool operator!=(const TestSearch &that) const; 2285 2286 UnicodeString m_pattern_; 2287 2288 protected: 2289 int32_t handleNext(int32_t position, UErrorCode &status); 2290 int32_t handlePrev(int32_t position, UErrorCode &status); 2291 TestSearch & operator=(const TestSearch &that); 2292 2293 private: 2294 2295 /** 2296 * The address of this static class variable serves as this class's ID 2297 * for ICU "poor man's RTTI". 2298 */ 2299 static const char fgClassID; 2300 uint32_t m_offset_; 2301 }; 2302 2303 const char TestSearch::fgClassID=0; 2304 2305 TestSearch::TestSearch(const TestSearch &obj) : SearchIterator(obj) 2306 { 2307 m_offset_ = obj.m_offset_; 2308 m_pattern_ = obj.m_pattern_; 2309 } 2310 2311 TestSearch::TestSearch(const UnicodeString &text, 2312 BreakIterator *breakiter, 2313 const UnicodeString &pattern) : SearchIterator() 2314 { 2315 m_breakiterator_ = breakiter; 2316 m_pattern_ = pattern; 2317 m_text_ = text; 2318 m_offset_ = 0; 2319 m_pattern_ = pattern; 2320 } 2321 2322 TestSearch::~TestSearch() 2323 { 2324 } 2325 2326 2327 void TestSearch::setOffset(int32_t position, UErrorCode &status) 2328 { 2329 if (position >= 0 && position <= m_text_.length()) { 2330 m_offset_ = position; 2331 } 2332 else { 2333 status = U_INDEX_OUTOFBOUNDS_ERROR; 2334 } 2335 } 2336 2337 int32_t TestSearch::getOffset() const 2338 { 2339 return m_offset_; 2340 } 2341 2342 SearchIterator * TestSearch::safeClone() const 2343 { 2344 return new TestSearch(m_text_, m_breakiterator_, m_pattern_); 2345 } 2346 2347 UBool TestSearch::operator!=(const TestSearch &that) const 2348 { 2349 if (SearchIterator::operator !=(that)) { 2350 return FALSE; 2351 } 2352 return m_offset_ != that.m_offset_ || m_pattern_ != that.m_pattern_; 2353 } 2354 2355 int32_t TestSearch::handleNext(int32_t start, UErrorCode &status) 2356 { 2357 if(U_SUCCESS(status)) { 2358 int match = m_text_.indexOf(m_pattern_, start); 2359 if (match < 0) { 2360 m_offset_ = m_text_.length(); 2361 setMatchStart(m_offset_); 2362 setMatchLength(0); 2363 return USEARCH_DONE; 2364 } 2365 setMatchStart(match); 2366 m_offset_ = match; 2367 setMatchLength(m_pattern_.length()); 2368 return match; 2369 } else { 2370 return USEARCH_DONE; 2371 } 2372 } 2373 2374 int32_t TestSearch::handlePrev(int32_t start, UErrorCode &status) 2375 { 2376 if(U_SUCCESS(status)) { 2377 int match = m_text_.lastIndexOf(m_pattern_, 0, start); 2378 if (match < 0) { 2379 m_offset_ = 0; 2380 setMatchStart(m_offset_); 2381 setMatchLength(0); 2382 return USEARCH_DONE; 2383 } 2384 setMatchStart(match); 2385 m_offset_ = match; 2386 setMatchLength(m_pattern_.length()); 2387 return match; 2388 } else { 2389 return USEARCH_DONE; 2390 } 2391 } 2392 2393 TestSearch & TestSearch::operator=(const TestSearch &that) 2394 { 2395 SearchIterator::operator=(that); 2396 m_offset_ = that.m_offset_; 2397 m_pattern_ = that.m_pattern_; 2398 return *this; 2399 } 2400 2401 void StringSearchTest::TestSubclass() 2402 { 2403 UnicodeString text("abc abcd abc"); 2404 UnicodeString pattern("abc"); 2405 TestSearch search(text, NULL, pattern); 2406 TestSearch search2(search); 2407 int expected[] = {0, 4, 9}; 2408 UErrorCode status = U_ZERO_ERROR; 2409 int i; 2410 StringCharacterIterator chariter(text); 2411 2412 search.setText(text, status); 2413 if (search.getText() != search2.getText()) { 2414 errln("Error setting text"); 2415 } 2416 2417 search.setText(chariter, status); 2418 if (search.getText() != search2.getText()) { 2419 errln("Error setting text"); 2420 } 2421 2422 search.reset(); 2423 // comparing constructors 2424 2425 for (i = 0; i < (int)(sizeof(expected) / sizeof(expected[0])); i ++) { 2426 if (search.next(status) != expected[i]) { 2427 errln("Error getting next match"); 2428 } 2429 if (search.getMatchedLength() != search.m_pattern_.length()) { 2430 errln("Error getting next match length"); 2431 } 2432 } 2433 if (search.next(status) != USEARCH_DONE) { 2434 errln("Error should have reached the end of the iteration"); 2435 } 2436 for (i = sizeof(expected) / sizeof(expected[0]) - 1; i >= 0; i --) { 2437 if (search.previous(status) != expected[i]) { 2438 errln("Error getting previous match"); 2439 } 2440 if (search.getMatchedLength() != search.m_pattern_.length()) { 2441 errln("Error getting previous match length"); 2442 } 2443 } 2444 if (search.previous(status) != USEARCH_DONE) { 2445 errln("Error should have reached the start of the iteration"); 2446 } 2447 } 2448 2449 class StubSearchIterator:public SearchIterator{ 2450 public: 2451 StubSearchIterator(){} 2452 virtual void setOffset(int32_t , UErrorCode &) {}; 2453 virtual int32_t getOffset(void) const {return 0;}; 2454 virtual SearchIterator* safeClone(void) const {return NULL;}; 2455 virtual int32_t handleNext(int32_t , UErrorCode &){return 0;}; 2456 virtual int32_t handlePrev(int32_t , UErrorCode &) {return 0;}; 2457 virtual UClassID getDynamicClassID() const { 2458 static char classID = 0; 2459 return (UClassID)&classID; 2460 } 2461 }; 2462 2463 void StringSearchTest::TestCoverage(){ 2464 StubSearchIterator stub1, stub2; 2465 UErrorCode status = U_ZERO_ERROR; 2466 2467 if (stub1 != stub2){ 2468 errln("new StubSearchIterator should be equal"); 2469 } 2470 2471 stub2.setText(UnicodeString("ABC"), status); 2472 if (U_FAILURE(status)) { 2473 errln("Error: SearchIterator::SetText"); 2474 } 2475 2476 stub1 = stub2; 2477 if (stub1 != stub2){ 2478 errln("SearchIterator::operator = assigned object should be equal"); 2479 } 2480 } 2481 2482 #endif /* !UCONFIG_NO_BREAK_ITERATION */ 2483 2484 #endif /* #if !UCONFIG_NO_COLLATION */ 2485