1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2014, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************** 7 * 8 * File CITERTST.C 9 * 10 * Modification History: 11 * Date Name Description 12 * Madhu Katragadda Ported for C API 13 * 02/19/01 synwee Modified test case for new collation iterator 14 *********************************************************************************/ 15 /* 16 * Collation Iterator tests. 17 * (Let me reiterate my position...) 18 */ 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_COLLATION 23 24 #include "unicode/ucol.h" 25 #include "unicode/ucoleitr.h" 26 #include "unicode/uloc.h" 27 #include "unicode/uchar.h" 28 #include "unicode/ustring.h" 29 #include "unicode/putil.h" 30 #include "callcoll.h" 31 #include "cmemory.h" 32 #include "cintltst.h" 33 #include "citertst.h" 34 #include "ccolltst.h" 35 #include "filestrm.h" 36 #include "cstring.h" 37 #include "ucol_imp.h" 38 #include "uparse.h" 39 #include <stdio.h> 40 41 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *); 42 43 void addCollIterTest(TestNode** root) 44 { 45 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious"); 46 addTest(root, &TestOffset, "tscoll/citertst/TestOffset"); 47 addTest(root, &TestSetText, "tscoll/citertst/TestSetText"); 48 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion"); 49 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar"); 50 addTest(root, &TestNormalizedUnicodeChar, 51 "tscoll/citertst/TestNormalizedUnicodeChar"); 52 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization"); 53 addTest(root, &TestBug672, "tscoll/citertst/TestBug672"); 54 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize"); 55 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer"); 56 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos"); 57 addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements"); 58 } 59 60 /* The locales we support */ 61 62 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"}; 63 64 static void TestBug672() { 65 UErrorCode status = U_ZERO_ERROR; 66 UChar pattern[20]; 67 UChar text[50]; 68 int i; 69 int result[3][3]; 70 71 u_uastrcpy(pattern, "resume"); 72 u_uastrcpy(text, "Time to resume updating my resume."); 73 74 for (i = 0; i < 3; ++ i) { 75 UCollator *coll = ucol_open(LOCALES[i], &status); 76 UCollationElements *pitr = ucol_openElements(coll, pattern, -1, 77 &status); 78 UCollationElements *titer = ucol_openElements(coll, text, -1, 79 &status); 80 if (U_FAILURE(status)) { 81 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n", 82 myErrorName(status)); 83 return; 84 } 85 86 log_verbose("locale tested %s\n", LOCALES[i]); 87 88 while (ucol_next(pitr, &status) != UCOL_NULLORDER && 89 U_SUCCESS(status)) { 90 } 91 if (U_FAILURE(status)) { 92 log_err("ERROR: reversing collation iterator :%s\n", 93 myErrorName(status)); 94 return; 95 } 96 ucol_reset(pitr); 97 98 ucol_setOffset(titer, u_strlen(pattern), &status); 99 if (U_FAILURE(status)) { 100 log_err("ERROR: setting offset in collator :%s\n", 101 myErrorName(status)); 102 return; 103 } 104 result[i][0] = ucol_getOffset(titer); 105 log_verbose("Text iterator set to offset %d\n", result[i][0]); 106 107 /* Use previous() */ 108 ucol_previous(titer, &status); 109 result[i][1] = ucol_getOffset(titer); 110 log_verbose("Current offset %d after previous\n", result[i][1]); 111 112 /* Add one to index */ 113 log_verbose("Adding one to current offset...\n"); 114 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status); 115 if (U_FAILURE(status)) { 116 log_err("ERROR: setting offset in collator :%s\n", 117 myErrorName(status)); 118 return; 119 } 120 result[i][2] = ucol_getOffset(titer); 121 log_verbose("Current offset in text = %d\n", result[i][2]); 122 ucol_closeElements(pitr); 123 ucol_closeElements(titer); 124 ucol_close(coll); 125 } 126 127 if (uprv_memcmp(result[0], result[1], 3) != 0 || 128 uprv_memcmp(result[1], result[2], 3) != 0) { 129 log_err("ERROR: Different locales have different offsets at the same character\n"); 130 } 131 } 132 133 134 135 /* Running this test with normalization enabled showed up a bug in the incremental 136 normalization code. */ 137 static void TestBug672Normalize() { 138 UErrorCode status = U_ZERO_ERROR; 139 UChar pattern[20]; 140 UChar text[50]; 141 int i; 142 int result[3][3]; 143 144 u_uastrcpy(pattern, "resume"); 145 u_uastrcpy(text, "Time to resume updating my resume."); 146 147 for (i = 0; i < 3; ++ i) { 148 UCollator *coll = ucol_open(LOCALES[i], &status); 149 UCollationElements *pitr = NULL; 150 UCollationElements *titer = NULL; 151 152 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 153 154 pitr = ucol_openElements(coll, pattern, -1, &status); 155 titer = ucol_openElements(coll, text, -1, &status); 156 if (U_FAILURE(status)) { 157 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n", 158 myErrorName(status)); 159 return; 160 } 161 162 log_verbose("locale tested %s\n", LOCALES[i]); 163 164 while (ucol_next(pitr, &status) != UCOL_NULLORDER && 165 U_SUCCESS(status)) { 166 } 167 if (U_FAILURE(status)) { 168 log_err("ERROR: reversing collation iterator :%s\n", 169 myErrorName(status)); 170 return; 171 } 172 ucol_reset(pitr); 173 174 ucol_setOffset(titer, u_strlen(pattern), &status); 175 if (U_FAILURE(status)) { 176 log_err("ERROR: setting offset in collator :%s\n", 177 myErrorName(status)); 178 return; 179 } 180 result[i][0] = ucol_getOffset(titer); 181 log_verbose("Text iterator set to offset %d\n", result[i][0]); 182 183 /* Use previous() */ 184 ucol_previous(titer, &status); 185 result[i][1] = ucol_getOffset(titer); 186 log_verbose("Current offset %d after previous\n", result[i][1]); 187 188 /* Add one to index */ 189 log_verbose("Adding one to current offset...\n"); 190 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status); 191 if (U_FAILURE(status)) { 192 log_err("ERROR: setting offset in collator :%s\n", 193 myErrorName(status)); 194 return; 195 } 196 result[i][2] = ucol_getOffset(titer); 197 log_verbose("Current offset in text = %d\n", result[i][2]); 198 ucol_closeElements(pitr); 199 ucol_closeElements(titer); 200 ucol_close(coll); 201 } 202 203 if (uprv_memcmp(result[0], result[1], 3) != 0 || 204 uprv_memcmp(result[1], result[2], 3) != 0) { 205 log_err("ERROR: Different locales have different offsets at the same character\n"); 206 } 207 } 208 209 210 211 212 /** 213 * Test for CollationElementIterator previous and next for the whole set of 214 * unicode characters. 215 */ 216 static void TestUnicodeChar() 217 { 218 UChar source[0x100]; 219 UCollator *en_us; 220 UCollationElements *iter; 221 UErrorCode status = U_ZERO_ERROR; 222 UChar codepoint; 223 224 UChar *test; 225 en_us = ucol_open("en_US", &status); 226 if (U_FAILURE(status)){ 227 log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n", 228 myErrorName(status)); 229 return; 230 } 231 232 for (codepoint = 1; codepoint < 0xFFFE;) 233 { 234 test = source; 235 236 while (codepoint % 0xFF != 0) 237 { 238 if (u_isdefined(codepoint)) 239 *(test ++) = codepoint; 240 codepoint ++; 241 } 242 243 if (u_isdefined(codepoint)) 244 *(test ++) = codepoint; 245 246 if (codepoint != 0xFFFF) 247 codepoint ++; 248 249 *test = 0; 250 iter=ucol_openElements(en_us, source, u_strlen(source), &status); 251 if(U_FAILURE(status)){ 252 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 253 myErrorName(status)); 254 ucol_close(en_us); 255 return; 256 } 257 /* A basic test to see if it's working at all */ 258 log_verbose("codepoint testing %x\n", codepoint); 259 backAndForth(iter); 260 ucol_closeElements(iter); 261 262 /* null termination test */ 263 iter=ucol_openElements(en_us, source, -1, &status); 264 if(U_FAILURE(status)){ 265 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 266 myErrorName(status)); 267 ucol_close(en_us); 268 return; 269 } 270 /* A basic test to see if it's working at all */ 271 backAndForth(iter); 272 ucol_closeElements(iter); 273 } 274 275 ucol_close(en_us); 276 } 277 278 /** 279 * Test for CollationElementIterator previous and next for the whole set of 280 * unicode characters with normalization on. 281 */ 282 static void TestNormalizedUnicodeChar() 283 { 284 UChar source[0x100]; 285 UCollator *th_th; 286 UCollationElements *iter; 287 UErrorCode status = U_ZERO_ERROR; 288 UChar codepoint; 289 290 UChar *test; 291 /* thai should have normalization on */ 292 th_th = ucol_open("th_TH", &status); 293 if (U_FAILURE(status)){ 294 log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n", 295 myErrorName(status)); 296 return; 297 } 298 299 for (codepoint = 1; codepoint < 0xFFFE;) 300 { 301 test = source; 302 303 while (codepoint % 0xFF != 0) 304 { 305 if (u_isdefined(codepoint)) 306 *(test ++) = codepoint; 307 codepoint ++; 308 } 309 310 if (u_isdefined(codepoint)) 311 *(test ++) = codepoint; 312 313 if (codepoint != 0xFFFF) 314 codepoint ++; 315 316 *test = 0; 317 iter=ucol_openElements(th_th, source, u_strlen(source), &status); 318 if(U_FAILURE(status)){ 319 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 320 myErrorName(status)); 321 ucol_close(th_th); 322 return; 323 } 324 325 backAndForth(iter); 326 ucol_closeElements(iter); 327 328 iter=ucol_openElements(th_th, source, -1, &status); 329 if(U_FAILURE(status)){ 330 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 331 myErrorName(status)); 332 ucol_close(th_th); 333 return; 334 } 335 336 backAndForth(iter); 337 ucol_closeElements(iter); 338 } 339 340 ucol_close(th_th); 341 } 342 343 /** 344 * Test the incremental normalization 345 */ 346 static void TestNormalization() 347 { 348 UErrorCode status = U_ZERO_ERROR; 349 const char *str = 350 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315"; 351 UCollator *coll; 352 UChar rule[50]; 353 int rulelen = u_unescape(str, rule, 50); 354 int count = 0; 355 const char *testdata[] = 356 {"\\u1ED9", "o\\u0323\\u0302", 357 "\\u0300\\u0315", "\\u0315\\u0300", 358 "A\\u0300\\u0315B", "A\\u0315\\u0300B", 359 "A\\u0316\\u0315B", "A\\u0315\\u0316B", 360 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316", 361 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B", 362 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"}; 363 int32_t srclen; 364 UChar source[10]; 365 UCollationElements *iter; 366 367 coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status); 368 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 369 if (U_FAILURE(status)){ 370 log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n", 371 myErrorName(status)); 372 return; 373 } 374 375 srclen = u_unescape(testdata[0], source, 10); 376 iter = ucol_openElements(coll, source, srclen, &status); 377 backAndForth(iter); 378 ucol_closeElements(iter); 379 380 srclen = u_unescape(testdata[1], source, 10); 381 iter = ucol_openElements(coll, source, srclen, &status); 382 backAndForth(iter); 383 ucol_closeElements(iter); 384 385 while (count < 12) { 386 srclen = u_unescape(testdata[count], source, 10); 387 iter = ucol_openElements(coll, source, srclen, &status); 388 389 if (U_FAILURE(status)){ 390 log_err("ERROR: in creation of collator element iterator\n %s\n", 391 myErrorName(status)); 392 return; 393 } 394 backAndForth(iter); 395 ucol_closeElements(iter); 396 397 iter = ucol_openElements(coll, source, -1, &status); 398 399 if (U_FAILURE(status)){ 400 log_err("ERROR: in creation of collator element iterator\n %s\n", 401 myErrorName(status)); 402 return; 403 } 404 backAndForth(iter); 405 ucol_closeElements(iter); 406 count ++; 407 } 408 ucol_close(coll); 409 } 410 411 /** 412 * Test for CollationElementIterator.previous() 413 * 414 * @bug 4108758 - Make sure it works with contracting characters 415 * 416 */ 417 static void TestPrevious() 418 { 419 UCollator *coll=NULL; 420 UChar rule[50]; 421 UChar *source; 422 UCollator *c1, *c2, *c3; 423 UCollationElements *iter; 424 UErrorCode status = U_ZERO_ERROR; 425 UChar test1[50]; 426 UChar test2[50]; 427 428 u_uastrcpy(test1, "What subset of all possible test cases?"); 429 u_uastrcpy(test2, "has the highest probability of detecting"); 430 coll = ucol_open("en_US", &status); 431 432 iter=ucol_openElements(coll, test1, u_strlen(test1), &status); 433 log_verbose("English locale testing back and forth\n"); 434 if(U_FAILURE(status)){ 435 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 436 myErrorName(status)); 437 ucol_close(coll); 438 return; 439 } 440 /* A basic test to see if it's working at all */ 441 backAndForth(iter); 442 ucol_closeElements(iter); 443 ucol_close(coll); 444 445 /* Test with a contracting character sequence */ 446 u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH"); 447 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status); 448 449 log_verbose("Contraction rule testing back and forth with no normalization\n"); 450 451 if (c1 == NULL || U_FAILURE(status)) 452 { 453 log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n", 454 myErrorName(status)); 455 return; 456 } 457 source=(UChar*)malloc(sizeof(UChar) * 20); 458 u_uastrcpy(source, "abchdcba"); 459 iter=ucol_openElements(c1, source, u_strlen(source), &status); 460 if(U_FAILURE(status)){ 461 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 462 myErrorName(status)); 463 return; 464 } 465 backAndForth(iter); 466 ucol_closeElements(iter); 467 ucol_close(c1); 468 469 /* Test with an expanding character sequence */ 470 u_uastrcpy(rule, "&a < b < c/abd < d"); 471 c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status); 472 log_verbose("Expansion rule testing back and forth with no normalization\n"); 473 if (c2 == NULL || U_FAILURE(status)) 474 { 475 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n", 476 myErrorName(status)); 477 return; 478 } 479 u_uastrcpy(source, "abcd"); 480 iter=ucol_openElements(c2, source, u_strlen(source), &status); 481 if(U_FAILURE(status)){ 482 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 483 myErrorName(status)); 484 return; 485 } 486 backAndForth(iter); 487 ucol_closeElements(iter); 488 ucol_close(c2); 489 /* Now try both */ 490 u_uastrcpy(rule, "&a < b < c/aba < d < z < ch"); 491 c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status); 492 log_verbose("Expansion/contraction rule testing back and forth with no normalization\n"); 493 494 if (c3 == NULL || U_FAILURE(status)) 495 { 496 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n", 497 myErrorName(status)); 498 return; 499 } 500 u_uastrcpy(source, "abcdbchdc"); 501 iter=ucol_openElements(c3, source, u_strlen(source), &status); 502 if(U_FAILURE(status)){ 503 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 504 myErrorName(status)); 505 return; 506 } 507 backAndForth(iter); 508 ucol_closeElements(iter); 509 ucol_close(c3); 510 source[0] = 0x0e41; 511 source[1] = 0x0e02; 512 source[2] = 0x0e41; 513 source[3] = 0x0e02; 514 source[4] = 0x0e27; 515 source[5] = 0x61; 516 source[6] = 0x62; 517 source[7] = 0x63; 518 source[8] = 0; 519 520 coll = ucol_open("th_TH", &status); 521 log_verbose("Thai locale testing back and forth with normalization\n"); 522 iter=ucol_openElements(coll, source, u_strlen(source), &status); 523 if(U_FAILURE(status)){ 524 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 525 myErrorName(status)); 526 return; 527 } 528 backAndForth(iter); 529 ucol_closeElements(iter); 530 ucol_close(coll); 531 532 /* prev test */ 533 source[0] = 0x0061; 534 source[1] = 0x30CF; 535 source[2] = 0x3099; 536 source[3] = 0x30FC; 537 source[4] = 0; 538 539 coll = ucol_open("ja_JP", &status); 540 log_verbose("Japanese locale testing back and forth with normalization\n"); 541 iter=ucol_openElements(coll, source, u_strlen(source), &status); 542 if(U_FAILURE(status)){ 543 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 544 myErrorName(status)); 545 return; 546 } 547 backAndForth(iter); 548 ucol_closeElements(iter); 549 ucol_close(coll); 550 551 free(source); 552 } 553 554 /** 555 * Test for getOffset() and setOffset() 556 */ 557 static void TestOffset() 558 { 559 UErrorCode status= U_ZERO_ERROR; 560 UCollator *en_us=NULL; 561 UCollationElements *iter, *pristine; 562 int32_t offset; 563 OrderAndOffset *orders; 564 int32_t orderLength=0; 565 int count = 0; 566 UChar test1[50]; 567 UChar test2[50]; 568 569 u_uastrcpy(test1, "What subset of all possible test cases?"); 570 u_uastrcpy(test2, "has the highest probability of detecting"); 571 en_us = ucol_open("en_US", &status); 572 log_verbose("Testing getOffset and setOffset for collations\n"); 573 iter = ucol_openElements(en_us, test1, u_strlen(test1), &status); 574 if(U_FAILURE(status)){ 575 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 576 myErrorName(status)); 577 ucol_close(en_us); 578 return; 579 } 580 581 /* testing boundaries */ 582 ucol_setOffset(iter, 0, &status); 583 if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) { 584 log_err("Error: After setting offset to 0, we should be at the end " 585 "of the backwards iteration"); 586 } 587 ucol_setOffset(iter, u_strlen(test1), &status); 588 if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) { 589 log_err("Error: After setting offset to end of the string, we should " 590 "be at the end of the backwards iteration"); 591 } 592 593 /* Run all the way through the iterator, then get the offset */ 594 595 orders = getOrders(iter, &orderLength); 596 597 offset = ucol_getOffset(iter); 598 599 if (offset != u_strlen(test1)) 600 { 601 log_err("offset at end != length %d vs %d\n", offset, 602 u_strlen(test1) ); 603 } 604 605 /* Now set the offset back to the beginning and see if it works */ 606 pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status); 607 if(U_FAILURE(status)){ 608 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 609 myErrorName(status)); 610 ucol_close(en_us); 611 return; 612 } 613 status = U_ZERO_ERROR; 614 615 ucol_setOffset(iter, 0, &status); 616 if (U_FAILURE(status)) 617 { 618 log_err("setOffset failed. %s\n", myErrorName(status)); 619 } 620 else 621 { 622 assertEqual(iter, pristine); 623 } 624 625 ucol_closeElements(pristine); 626 ucol_closeElements(iter); 627 free(orders); 628 629 /* testing offsets in normalization buffer */ 630 test1[0] = 0x61; 631 test1[1] = 0x300; 632 test1[2] = 0x316; 633 test1[3] = 0x62; 634 test1[4] = 0; 635 ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 636 iter = ucol_openElements(en_us, test1, 4, &status); 637 if(U_FAILURE(status)){ 638 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 639 myErrorName(status)); 640 ucol_close(en_us); 641 return; 642 } 643 644 count = 0; 645 while (ucol_next(iter, &status) != UCOL_NULLORDER && 646 U_SUCCESS(status)) { 647 switch (count) { 648 case 0: 649 if (ucol_getOffset(iter) != 1) { 650 log_err("ERROR: Offset of iteration should be 1\n"); 651 } 652 break; 653 case 3: 654 if (ucol_getOffset(iter) != 4) { 655 log_err("ERROR: Offset of iteration should be 4\n"); 656 } 657 break; 658 default: 659 if (ucol_getOffset(iter) != 3) { 660 log_err("ERROR: Offset of iteration should be 3\n"); 661 } 662 } 663 count ++; 664 } 665 666 ucol_reset(iter); 667 count = 0; 668 while (ucol_previous(iter, &status) != UCOL_NULLORDER && 669 U_SUCCESS(status)) { 670 switch (count) { 671 case 0: 672 case 1: 673 if (ucol_getOffset(iter) != 3) { 674 log_err("ERROR: Offset of iteration should be 3\n"); 675 } 676 break; 677 case 2: 678 if (ucol_getOffset(iter) != 1) { 679 log_err("ERROR: Offset of iteration should be 1\n"); 680 } 681 break; 682 default: 683 if (ucol_getOffset(iter) != 0) { 684 log_err("ERROR: Offset of iteration should be 0\n"); 685 } 686 } 687 count ++; 688 } 689 690 if(U_FAILURE(status)){ 691 log_err("ERROR: in iterating collation elements %s\n", 692 myErrorName(status)); 693 } 694 695 ucol_closeElements(iter); 696 ucol_close(en_us); 697 } 698 699 /** 700 * Test for setText() 701 */ 702 static void TestSetText() 703 { 704 int32_t c,i; 705 UErrorCode status = U_ZERO_ERROR; 706 UCollator *en_us=NULL; 707 UCollationElements *iter1, *iter2; 708 UChar test1[50]; 709 UChar test2[50]; 710 711 u_uastrcpy(test1, "What subset of all possible test cases?"); 712 u_uastrcpy(test2, "has the highest probability of detecting"); 713 en_us = ucol_open("en_US", &status); 714 log_verbose("testing setText for Collation elements\n"); 715 iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status); 716 if(U_FAILURE(status)){ 717 log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n", 718 myErrorName(status)); 719 ucol_close(en_us); 720 return; 721 } 722 iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status); 723 if(U_FAILURE(status)){ 724 log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n", 725 myErrorName(status)); 726 ucol_close(en_us); 727 return; 728 } 729 730 /* Run through the second iterator just to exercise it */ 731 c = ucol_next(iter2, &status); 732 i = 0; 733 734 while ( ++i < 10 && (c != UCOL_NULLORDER)) 735 { 736 if (U_FAILURE(status)) 737 { 738 log_err("iter2->next() returned an error. %s\n", myErrorName(status)); 739 ucol_closeElements(iter2); 740 ucol_closeElements(iter1); 741 ucol_close(en_us); 742 return; 743 } 744 745 c = ucol_next(iter2, &status); 746 } 747 748 /* Now set it to point to the same string as the first iterator */ 749 ucol_setText(iter2, test1, u_strlen(test1), &status); 750 if (U_FAILURE(status)) 751 { 752 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status)); 753 } 754 else 755 { 756 assertEqual(iter1, iter2); 757 } 758 759 /* Now set it to point to a null string with fake length*/ 760 ucol_setText(iter2, NULL, 2, &status); 761 if (status != U_ILLEGAL_ARGUMENT_ERROR) 762 { 763 log_err("call to iter2->setText(null, 2) should yield an illegal-argument-error - %s\n", 764 myErrorName(status)); 765 } 766 767 ucol_closeElements(iter2); 768 ucol_closeElements(iter1); 769 ucol_close(en_us); 770 } 771 772 /** @bug 4108762 773 * Test for getMaxExpansion() 774 */ 775 static void TestMaxExpansion() 776 { 777 UErrorCode status = U_ZERO_ERROR; 778 UCollator *coll ;/*= ucol_open("en_US", &status);*/ 779 UChar ch = 0; 780 UChar32 unassigned = 0xEFFFD; 781 UChar supplementary[2]; 782 uint32_t stringOffset = 0; 783 UBool isError = FALSE; 784 uint32_t sorder = 0; 785 UCollationElements *iter ;/*= ucol_openElements(coll, &ch, 1, &status);*/ 786 uint32_t temporder = 0; 787 788 UChar rule[256]; 789 u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch"); 790 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, 791 UCOL_DEFAULT_STRENGTH,NULL, &status); 792 if(U_SUCCESS(status) && coll) { 793 iter = ucol_openElements(coll, &ch, 1, &status); 794 795 while (ch < 0xFFFF && U_SUCCESS(status)) { 796 int count = 1; 797 uint32_t order; 798 int32_t size = 0; 799 800 ch ++; 801 802 ucol_setText(iter, &ch, 1, &status); 803 order = ucol_previous(iter, &status); 804 805 /* thai management */ 806 if (order == 0) 807 order = ucol_previous(iter, &status); 808 809 while (U_SUCCESS(status) && 810 ucol_previous(iter, &status) != UCOL_NULLORDER) { 811 count ++; 812 } 813 814 size = ucol_getMaxExpansion(iter, order); 815 if (U_FAILURE(status) || size < count) { 816 log_err("Failure at codepoint %d, maximum expansion count < %d\n", 817 ch, count); 818 } 819 } 820 821 /* testing for exact max expansion */ 822 ch = 0; 823 while (ch < 0x61) { 824 uint32_t order; 825 int32_t size; 826 ucol_setText(iter, &ch, 1, &status); 827 order = ucol_previous(iter, &status); 828 size = ucol_getMaxExpansion(iter, order); 829 if (U_FAILURE(status) || size != 1) { 830 log_err("Failure at codepoint %d, maximum expansion count < %d\n", 831 ch, 1); 832 } 833 ch ++; 834 } 835 836 ch = 0x63; 837 ucol_setText(iter, &ch, 1, &status); 838 temporder = ucol_previous(iter, &status); 839 840 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) { 841 log_err("Failure at codepoint %d, maximum expansion count != %d\n", 842 ch, 3); 843 } 844 845 ch = 0x64; 846 ucol_setText(iter, &ch, 1, &status); 847 temporder = ucol_previous(iter, &status); 848 849 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) { 850 log_err("Failure at codepoint %d, maximum expansion count != %d\n", 851 ch, 3); 852 } 853 854 U16_APPEND(supplementary, stringOffset, 2, unassigned, isError); 855 (void)isError; /* Suppress set but not used warning. */ 856 ucol_setText(iter, supplementary, 2, &status); 857 sorder = ucol_previous(iter, &status); 858 859 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) { 860 log_err("Failure at codepoint %d, maximum expansion count < %d\n", 861 ch, 2); 862 } 863 864 /* testing jamo */ 865 ch = 0x1165; 866 867 ucol_setText(iter, &ch, 1, &status); 868 temporder = ucol_previous(iter, &status); 869 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) { 870 log_err("Failure at codepoint %d, maximum expansion count > %d\n", 871 ch, 3); 872 } 873 874 ucol_closeElements(iter); 875 ucol_close(coll); 876 877 /* testing special jamo &a<\u1160 */ 878 rule[0] = 0x26; 879 rule[1] = 0x71; 880 rule[2] = 0x3c; 881 rule[3] = 0x1165; 882 rule[4] = 0x2f; 883 rule[5] = 0x71; 884 rule[6] = 0x71; 885 rule[7] = 0x71; 886 rule[8] = 0x71; 887 rule[9] = 0; 888 889 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, 890 UCOL_DEFAULT_STRENGTH,NULL, &status); 891 iter = ucol_openElements(coll, &ch, 1, &status); 892 893 temporder = ucol_previous(iter, &status); 894 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) { 895 log_err("Failure at codepoint %d, maximum expansion count > %d\n", 896 ch, 5); 897 } 898 899 ucol_closeElements(iter); 900 ucol_close(coll); 901 } else { 902 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status)); 903 } 904 905 } 906 907 908 static void assertEqual(UCollationElements *i1, UCollationElements *i2) 909 { 910 int32_t c1, c2; 911 int32_t count = 0; 912 UErrorCode status = U_ZERO_ERROR; 913 914 do 915 { 916 c1 = ucol_next(i1, &status); 917 c2 = ucol_next(i2, &status); 918 919 if (c1 != c2) 920 { 921 log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count, c1, c2); 922 break; 923 } 924 925 count += 1; 926 } 927 while (c1 != UCOL_NULLORDER); 928 } 929 930 /** 931 * Testing iterators with extremely small buffers 932 */ 933 static void TestSmallBuffer() 934 { 935 UErrorCode status = U_ZERO_ERROR; 936 UCollator *coll; 937 UCollationElements *testiter, 938 *iter; 939 int32_t count = 0; 940 OrderAndOffset *testorders, 941 *orders; 942 943 UChar teststr[500]; 944 UChar str[] = {0x300, 0x31A, 0}; 945 /* 946 creating a long string of decomposable characters, 947 since by default the writable buffer is of size 256 948 */ 949 while (count < 500) { 950 if ((count & 1) == 0) { 951 teststr[count ++] = 0x300; 952 } 953 else { 954 teststr[count ++] = 0x31A; 955 } 956 } 957 958 coll = ucol_open("th_TH", &status); 959 if(U_SUCCESS(status) && coll) { 960 testiter = ucol_openElements(coll, teststr, 500, &status); 961 iter = ucol_openElements(coll, str, 2, &status); 962 963 orders = getOrders(iter, &count); 964 if (count != 2) { 965 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n"); 966 } 967 968 /* 969 this will rearrange the string data to 250 characters of 0x300 first then 970 250 characters of 0x031A 971 */ 972 testorders = getOrders(testiter, &count); 973 974 if (count != 500) { 975 log_err("Error decomposition does not give the right sized collation elements\n"); 976 } 977 978 while (count != 0) { 979 /* UCA collation element for 0x0F76 */ 980 if ((count > 250 && testorders[-- count].order != orders[1].order) || 981 (count <= 250 && testorders[-- count].order != orders[0].order)) { 982 log_err("Error decomposition does not give the right collation element at %d count\n", count); 983 break; 984 } 985 } 986 987 free(testorders); 988 free(orders); 989 990 ucol_reset(testiter); 991 992 /* ensures closing of elements done properly to clear writable buffer */ 993 ucol_next(testiter, &status); 994 ucol_next(testiter, &status); 995 ucol_closeElements(testiter); 996 ucol_closeElements(iter); 997 ucol_close(coll); 998 } else { 999 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status)); 1000 } 1001 } 1002 1003 /** 1004 * Testing the discontigous contractions 1005 */ 1006 static void TestDiscontiguos() { 1007 const char *rulestr = 1008 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315"; 1009 UChar rule[50]; 1010 int rulelen = u_unescape(rulestr, rule, 50); 1011 const char *src[] = { 1012 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC", 1013 /* base character blocked */ 1014 "XD\\u0300", "XD\\u0300\\u0315", 1015 /* non blocking combining character */ 1016 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315", 1017 /* blocking combining character */ 1018 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315", 1019 /* contraction prefix */ 1020 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315", 1021 "X\\u0300\\u031A\\u0315", 1022 /* ends not with a contraction character */ 1023 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D", 1024 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D" 1025 }; 1026 const char *tgt[] = { 1027 /* non blocking combining character */ 1028 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC", 1029 /* base character blocked */ 1030 "X D \\u0300", "X D \\u0300\\u0315", 1031 /* non blocking combining character */ 1032 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319", 1033 /* blocking combining character */ 1034 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315", 1035 /* contraction prefix */ 1036 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319", 1037 "X\\u0300 \\u031A \\u0315", 1038 /* ends not with a contraction character */ 1039 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D", 1040 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D" 1041 }; 1042 int size = 20; 1043 UCollator *coll; 1044 UErrorCode status = U_ZERO_ERROR; 1045 int count = 0; 1046 UCollationElements *iter; 1047 UCollationElements *resultiter; 1048 1049 coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); 1050 iter = ucol_openElements(coll, rule, 1, &status); 1051 resultiter = ucol_openElements(coll, rule, 1, &status); 1052 1053 if (U_FAILURE(status)) { 1054 log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status)); 1055 return; 1056 } 1057 1058 while (count < size) { 1059 UChar str[20]; 1060 UChar tstr[20]; 1061 int strLen = u_unescape(src[count], str, 20); 1062 UChar *s; 1063 1064 ucol_setText(iter, str, strLen, &status); 1065 if (U_FAILURE(status)) { 1066 log_err("Error opening collation iterator\n"); 1067 return; 1068 } 1069 1070 u_unescape(tgt[count], tstr, 20); 1071 s = tstr; 1072 1073 log_verbose("count %d\n", count); 1074 1075 for (;;) { 1076 uint32_t ce; 1077 UChar *e = u_strchr(s, 0x20); 1078 if (e == 0) { 1079 e = u_strchr(s, 0); 1080 } 1081 ucol_setText(resultiter, s, (int32_t)(e - s), &status); 1082 ce = ucol_next(resultiter, &status); 1083 if (U_FAILURE(status)) { 1084 log_err("Error manipulating collation iterator\n"); 1085 return; 1086 } 1087 while (ce != UCOL_NULLORDER) { 1088 if (ce != (uint32_t)ucol_next(iter, &status) || 1089 U_FAILURE(status)) { 1090 log_err("Discontiguos contraction test mismatch\n"); 1091 return; 1092 } 1093 ce = ucol_next(resultiter, &status); 1094 if (U_FAILURE(status)) { 1095 log_err("Error getting next collation element\n"); 1096 return; 1097 } 1098 } 1099 s = e + 1; 1100 if (*e == 0) { 1101 break; 1102 } 1103 } 1104 ucol_reset(iter); 1105 backAndForth(iter); 1106 count ++; 1107 } 1108 ucol_closeElements(resultiter); 1109 ucol_closeElements(iter); 1110 ucol_close(coll); 1111 } 1112 1113 /** 1114 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with 1115 * normalization on AND jamo tailoring, among other things. 1116 * 1117 * Note: This test is sensitive to changes of the root collator, 1118 * for example whether the ae-ligature maps to three CEs (as in the DUCET) 1119 * or to two CEs (as in the CLDR 24 FractionalUCA.txt). 1120 * It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding. 1121 * For example, the DUCET's artificial secondary CE in the ae-ligature 1122 * may map to two 32-bit iterator CEs (as it did until ICU 52). 1123 */ 1124 static const UChar tsceText[] = { /* Nothing in here should be ignorable */ 1125 0x0020, 0xAC00, /* simple LV Hangul */ 1126 0x0020, 0xAC01, /* simple LVT Hangul */ 1127 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */ 1128 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */ 1129 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */ 1130 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */ 1131 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */ 1132 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */ 1133 0x0020, 0x00E6, /* small letter ae, expands */ 1134 0x0020, 0x1E4D, /* small letter o with tilde and acute, decomposes */ 1135 0x0020 1136 }; 1137 enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) }; 1138 1139 static const int32_t rootStandardOffsets[] = { 1140 0, 1,2, 1141 2, 3,4,4, 1142 4, 5,6,6, 1143 6, 7,8,8, 1144 8, 9,10,11, 1145 12, 13,14,15, 1146 16, 17,18,19, 1147 20, 21,22,23, 1148 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */ 1149 26, 27,28,28, 1150 28, 1151 29 1152 }; 1153 enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) }; 1154 1155 static const int32_t rootSearchOffsets[] = { 1156 0, 1,2, 1157 2, 3,4,4, 1158 4, 5,6,6,6, 1159 6, 7,8,8,8,8,8,8, 1160 8, 9,10,11, 1161 12, 13,14,15, 1162 16, 17,18,19,20, 1163 20, 21,22,22,23,23,23,24, 1164 24, 25,26, /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */ 1165 26, 27,28,28, 1166 28, 1167 29 1168 }; 1169 enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) }; 1170 1171 typedef struct { 1172 const char * locale; 1173 const int32_t * offsets; 1174 int32_t offsetsLen; 1175 } TSCEItem; 1176 1177 static const TSCEItem tsceItems[] = { 1178 { "root", rootStandardOffsets, kLen_rootStandardOffsets }, 1179 { "root@collation=search", rootSearchOffsets, kLen_rootSearchOffsets }, 1180 { NULL, NULL, 0 } 1181 }; 1182 1183 static void TestSearchCollatorElements(void) 1184 { 1185 const TSCEItem * tsceItemPtr; 1186 for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) { 1187 UErrorCode status = U_ZERO_ERROR; 1188 UCollator* ucol = ucol_open(tsceItemPtr->locale, &status); 1189 if ( U_SUCCESS(status) ) { 1190 UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status); 1191 if ( U_SUCCESS(status) ) { 1192 int32_t offset, element; 1193 const int32_t * nextOffsetPtr; 1194 const int32_t * limitOffsetPtr; 1195 1196 nextOffsetPtr = tsceItemPtr->offsets; 1197 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen; 1198 do { 1199 offset = ucol_getOffset(uce); 1200 element = ucol_next(uce, &status); 1201 log_verbose("(%s) offset=%2d ce=%08x\n", tsceItemPtr->locale, offset, element); 1202 if ( element == 0 ) { 1203 log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale ); 1204 } 1205 if ( nextOffsetPtr < limitOffsetPtr ) { 1206 if (offset != *nextOffsetPtr) { 1207 log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n", 1208 tsceItemPtr->locale, *nextOffsetPtr, offset ); 1209 nextOffsetPtr = limitOffsetPtr; 1210 break; 1211 } 1212 nextOffsetPtr++; 1213 } else { 1214 log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale ); 1215 } 1216 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER ); 1217 if ( nextOffsetPtr < limitOffsetPtr ) { 1218 log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale ); 1219 } 1220 1221 ucol_setOffset(uce, kLen_tsceText, &status); 1222 status = U_ZERO_ERROR; 1223 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen; 1224 limitOffsetPtr = tsceItemPtr->offsets; 1225 do { 1226 offset = ucol_getOffset(uce); 1227 element = ucol_previous(uce, &status); 1228 if ( element == 0 ) { 1229 log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale ); 1230 } 1231 if ( nextOffsetPtr > limitOffsetPtr ) { 1232 nextOffsetPtr--; 1233 if (offset != *nextOffsetPtr) { 1234 log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n", 1235 tsceItemPtr->locale, *nextOffsetPtr, offset ); 1236 nextOffsetPtr = limitOffsetPtr; 1237 break; 1238 } 1239 } else { 1240 log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale ); 1241 } 1242 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER ); 1243 if ( nextOffsetPtr > limitOffsetPtr ) { 1244 log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale ); 1245 } 1246 1247 ucol_closeElements(uce); 1248 } else { 1249 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) ); 1250 } 1251 ucol_close(ucol); 1252 } else { 1253 log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) ); 1254 } 1255 } 1256 } 1257 1258 #endif /* #if !UCONFIG_NO_COLLATION */ 1259