1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************** 7 * 8 * File CITERTST.C 9 * 10 * Modification History: 11 * Date Name Description 12 * Madhu Katragadda Ported for C API 13 * 02/19/01 synwee Modified test case for new collation iterator 14 *********************************************************************************/ 15 /* 16 * Collation Iterator tests. 17 * (Let me reiterate my position...) 18 */ 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_COLLATION 23 24 #include "unicode/ucol.h" 25 #include "unicode/uloc.h" 26 #include "unicode/uchar.h" 27 #include "unicode/ustring.h" 28 #include "unicode/putil.h" 29 #include "callcoll.h" 30 #include "cmemory.h" 31 #include "cintltst.h" 32 #include "citertst.h" 33 #include "ccolltst.h" 34 #include "filestrm.h" 35 #include "cstring.h" 36 #include "ucol_imp.h" 37 #include "ucol_tok.h" 38 #include "uparse.h" 39 #include <stdio.h> 40 41 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *); 42 43 void addCollIterTest(TestNode** root) 44 { 45 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious"); 46 addTest(root, &TestOffset, "tscoll/citertst/TestOffset"); 47 addTest(root, &TestSetText, "tscoll/citertst/TestSetText"); 48 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion"); 49 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar"); 50 addTest(root, &TestNormalizedUnicodeChar, 51 "tscoll/citertst/TestNormalizedUnicodeChar"); 52 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization"); 53 addTest(root, &TestBug672, "tscoll/citertst/TestBug672"); 54 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize"); 55 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer"); 56 addTest(root, &TestCEs, "tscoll/citertst/TestCEs"); 57 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos"); 58 addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow"); 59 addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity"); 60 addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity"); 61 } 62 63 /* The locales we support */ 64 65 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"}; 66 67 static void TestBug672() { 68 UErrorCode status = U_ZERO_ERROR; 69 UChar pattern[20]; 70 UChar text[50]; 71 int i; 72 int result[3][3]; 73 74 u_uastrcpy(pattern, "resume"); 75 u_uastrcpy(text, "Time to resume updating my resume."); 76 77 for (i = 0; i < 3; ++ i) { 78 UCollator *coll = ucol_open(LOCALES[i], &status); 79 UCollationElements *pitr = ucol_openElements(coll, pattern, -1, 80 &status); 81 UCollationElements *titer = ucol_openElements(coll, text, -1, 82 &status); 83 if (U_FAILURE(status)) { 84 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n", 85 myErrorName(status)); 86 return; 87 } 88 89 log_verbose("locale tested %s\n", LOCALES[i]); 90 91 while (ucol_next(pitr, &status) != UCOL_NULLORDER && 92 U_SUCCESS(status)) { 93 } 94 if (U_FAILURE(status)) { 95 log_err("ERROR: reversing collation iterator :%s\n", 96 myErrorName(status)); 97 return; 98 } 99 ucol_reset(pitr); 100 101 ucol_setOffset(titer, u_strlen(pattern), &status); 102 if (U_FAILURE(status)) { 103 log_err("ERROR: setting offset in collator :%s\n", 104 myErrorName(status)); 105 return; 106 } 107 result[i][0] = ucol_getOffset(titer); 108 log_verbose("Text iterator set to offset %d\n", result[i][0]); 109 110 /* Use previous() */ 111 ucol_previous(titer, &status); 112 result[i][1] = ucol_getOffset(titer); 113 log_verbose("Current offset %d after previous\n", result[i][1]); 114 115 /* Add one to index */ 116 log_verbose("Adding one to current offset...\n"); 117 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status); 118 if (U_FAILURE(status)) { 119 log_err("ERROR: setting offset in collator :%s\n", 120 myErrorName(status)); 121 return; 122 } 123 result[i][2] = ucol_getOffset(titer); 124 log_verbose("Current offset in text = %d\n", result[i][2]); 125 ucol_closeElements(pitr); 126 ucol_closeElements(titer); 127 ucol_close(coll); 128 } 129 130 if (uprv_memcmp(result[0], result[1], 3) != 0 || 131 uprv_memcmp(result[1], result[2], 3) != 0) { 132 log_err("ERROR: Different locales have different offsets at the same character\n"); 133 } 134 } 135 136 137 138 /* Running this test with normalization enabled showed up a bug in the incremental 139 normalization code. */ 140 static void TestBug672Normalize() { 141 UErrorCode status = U_ZERO_ERROR; 142 UChar pattern[20]; 143 UChar text[50]; 144 int i; 145 int result[3][3]; 146 147 u_uastrcpy(pattern, "resume"); 148 u_uastrcpy(text, "Time to resume updating my resume."); 149 150 for (i = 0; i < 3; ++ i) { 151 UCollator *coll = ucol_open(LOCALES[i], &status); 152 UCollationElements *pitr = NULL; 153 UCollationElements *titer = NULL; 154 155 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 156 157 pitr = ucol_openElements(coll, pattern, -1, &status); 158 titer = ucol_openElements(coll, text, -1, &status); 159 if (U_FAILURE(status)) { 160 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n", 161 myErrorName(status)); 162 return; 163 } 164 165 log_verbose("locale tested %s\n", LOCALES[i]); 166 167 while (ucol_next(pitr, &status) != UCOL_NULLORDER && 168 U_SUCCESS(status)) { 169 } 170 if (U_FAILURE(status)) { 171 log_err("ERROR: reversing collation iterator :%s\n", 172 myErrorName(status)); 173 return; 174 } 175 ucol_reset(pitr); 176 177 ucol_setOffset(titer, u_strlen(pattern), &status); 178 if (U_FAILURE(status)) { 179 log_err("ERROR: setting offset in collator :%s\n", 180 myErrorName(status)); 181 return; 182 } 183 result[i][0] = ucol_getOffset(titer); 184 log_verbose("Text iterator set to offset %d\n", result[i][0]); 185 186 /* Use previous() */ 187 ucol_previous(titer, &status); 188 result[i][1] = ucol_getOffset(titer); 189 log_verbose("Current offset %d after previous\n", result[i][1]); 190 191 /* Add one to index */ 192 log_verbose("Adding one to current offset...\n"); 193 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status); 194 if (U_FAILURE(status)) { 195 log_err("ERROR: setting offset in collator :%s\n", 196 myErrorName(status)); 197 return; 198 } 199 result[i][2] = ucol_getOffset(titer); 200 log_verbose("Current offset in text = %d\n", result[i][2]); 201 ucol_closeElements(pitr); 202 ucol_closeElements(titer); 203 ucol_close(coll); 204 } 205 206 if (uprv_memcmp(result[0], result[1], 3) != 0 || 207 uprv_memcmp(result[1], result[2], 3) != 0) { 208 log_err("ERROR: Different locales have different offsets at the same character\n"); 209 } 210 } 211 212 213 214 215 /** 216 * Test for CollationElementIterator previous and next for the whole set of 217 * unicode characters. 218 */ 219 static void TestUnicodeChar() 220 { 221 UChar source[0x100]; 222 UCollator *en_us; 223 UCollationElements *iter; 224 UErrorCode status = U_ZERO_ERROR; 225 UChar codepoint; 226 227 UChar *test; 228 en_us = ucol_open("en_US", &status); 229 if (U_FAILURE(status)){ 230 log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n", 231 myErrorName(status)); 232 return; 233 } 234 235 for (codepoint = 1; codepoint < 0xFFFE;) 236 { 237 test = source; 238 239 while (codepoint % 0xFF != 0) 240 { 241 if (u_isdefined(codepoint)) 242 *(test ++) = codepoint; 243 codepoint ++; 244 } 245 246 if (u_isdefined(codepoint)) 247 *(test ++) = codepoint; 248 249 if (codepoint != 0xFFFF) 250 codepoint ++; 251 252 *test = 0; 253 iter=ucol_openElements(en_us, source, u_strlen(source), &status); 254 if(U_FAILURE(status)){ 255 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 256 myErrorName(status)); 257 ucol_close(en_us); 258 return; 259 } 260 /* A basic test to see if it's working at all */ 261 log_verbose("codepoint testing %x\n", codepoint); 262 backAndForth(iter); 263 ucol_closeElements(iter); 264 265 /* null termination test */ 266 iter=ucol_openElements(en_us, source, -1, &status); 267 if(U_FAILURE(status)){ 268 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 269 myErrorName(status)); 270 ucol_close(en_us); 271 return; 272 } 273 /* A basic test to see if it's working at all */ 274 backAndForth(iter); 275 ucol_closeElements(iter); 276 } 277 278 ucol_close(en_us); 279 } 280 281 /** 282 * Test for CollationElementIterator previous and next for the whole set of 283 * unicode characters with normalization on. 284 */ 285 static void TestNormalizedUnicodeChar() 286 { 287 UChar source[0x100]; 288 UCollator *th_th; 289 UCollationElements *iter; 290 UErrorCode status = U_ZERO_ERROR; 291 UChar codepoint; 292 293 UChar *test; 294 /* thai should have normalization on */ 295 th_th = ucol_open("th_TH", &status); 296 if (U_FAILURE(status)){ 297 log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n", 298 myErrorName(status)); 299 return; 300 } 301 302 for (codepoint = 1; codepoint < 0xFFFE;) 303 { 304 test = source; 305 306 while (codepoint % 0xFF != 0) 307 { 308 if (u_isdefined(codepoint)) 309 *(test ++) = codepoint; 310 codepoint ++; 311 } 312 313 if (u_isdefined(codepoint)) 314 *(test ++) = codepoint; 315 316 if (codepoint != 0xFFFF) 317 codepoint ++; 318 319 *test = 0; 320 iter=ucol_openElements(th_th, source, u_strlen(source), &status); 321 if(U_FAILURE(status)){ 322 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 323 myErrorName(status)); 324 ucol_close(th_th); 325 return; 326 } 327 328 backAndForth(iter); 329 ucol_closeElements(iter); 330 331 iter=ucol_openElements(th_th, source, -1, &status); 332 if(U_FAILURE(status)){ 333 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 334 myErrorName(status)); 335 ucol_close(th_th); 336 return; 337 } 338 339 backAndForth(iter); 340 ucol_closeElements(iter); 341 } 342 343 ucol_close(th_th); 344 } 345 346 /** 347 * Test the incremental normalization 348 */ 349 static void TestNormalization() 350 { 351 UErrorCode status = U_ZERO_ERROR; 352 const char *str = 353 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315"; 354 UCollator *coll; 355 UChar rule[50]; 356 int rulelen = u_unescape(str, rule, 50); 357 int count = 0; 358 const char *testdata[] = 359 {"\\u1ED9", "o\\u0323\\u0302", 360 "\\u0300\\u0315", "\\u0315\\u0300", 361 "A\\u0300\\u0315B", "A\\u0315\\u0300B", 362 "A\\u0316\\u0315B", "A\\u0315\\u0316B", 363 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316", 364 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B", 365 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"}; 366 int32_t srclen; 367 UChar source[10]; 368 UCollationElements *iter; 369 370 coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status); 371 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 372 if (U_FAILURE(status)){ 373 log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n", 374 myErrorName(status)); 375 return; 376 } 377 378 srclen = u_unescape(testdata[0], source, 10); 379 iter = ucol_openElements(coll, source, srclen, &status); 380 backAndForth(iter); 381 ucol_closeElements(iter); 382 383 srclen = u_unescape(testdata[1], source, 10); 384 iter = ucol_openElements(coll, source, srclen, &status); 385 backAndForth(iter); 386 ucol_closeElements(iter); 387 388 while (count < 12) { 389 srclen = u_unescape(testdata[count], source, 10); 390 iter = ucol_openElements(coll, source, srclen, &status); 391 392 if (U_FAILURE(status)){ 393 log_err("ERROR: in creation of collator element iterator\n %s\n", 394 myErrorName(status)); 395 return; 396 } 397 backAndForth(iter); 398 ucol_closeElements(iter); 399 400 iter = ucol_openElements(coll, source, -1, &status); 401 402 if (U_FAILURE(status)){ 403 log_err("ERROR: in creation of collator element iterator\n %s\n", 404 myErrorName(status)); 405 return; 406 } 407 backAndForth(iter); 408 ucol_closeElements(iter); 409 count ++; 410 } 411 ucol_close(coll); 412 } 413 414 /** 415 * Test for CollationElementIterator.previous() 416 * 417 * @bug 4108758 - Make sure it works with contracting characters 418 * 419 */ 420 static void TestPrevious() 421 { 422 UCollator *coll=NULL; 423 UChar rule[50]; 424 UChar *source; 425 UCollator *c1, *c2, *c3; 426 UCollationElements *iter; 427 UErrorCode status = U_ZERO_ERROR; 428 UChar test1[50]; 429 UChar test2[50]; 430 431 u_uastrcpy(test1, "What subset of all possible test cases?"); 432 u_uastrcpy(test2, "has the highest probability of detecting"); 433 coll = ucol_open("en_US", &status); 434 435 iter=ucol_openElements(coll, test1, u_strlen(test1), &status); 436 log_verbose("English locale testing back and forth\n"); 437 if(U_FAILURE(status)){ 438 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 439 myErrorName(status)); 440 ucol_close(coll); 441 return; 442 } 443 /* A basic test to see if it's working at all */ 444 backAndForth(iter); 445 ucol_closeElements(iter); 446 ucol_close(coll); 447 448 /* Test with a contracting character sequence */ 449 u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH"); 450 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status); 451 452 log_verbose("Contraction rule testing back and forth with no normalization\n"); 453 454 if (c1 == NULL || U_FAILURE(status)) 455 { 456 log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n", 457 myErrorName(status)); 458 return; 459 } 460 source=(UChar*)malloc(sizeof(UChar) * 20); 461 u_uastrcpy(source, "abchdcba"); 462 iter=ucol_openElements(c1, source, u_strlen(source), &status); 463 if(U_FAILURE(status)){ 464 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 465 myErrorName(status)); 466 return; 467 } 468 backAndForth(iter); 469 ucol_closeElements(iter); 470 ucol_close(c1); 471 472 /* Test with an expanding character sequence */ 473 u_uastrcpy(rule, "&a < b < c/abd < d"); 474 c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status); 475 log_verbose("Expansion rule testing back and forth with no normalization\n"); 476 if (c2 == NULL || U_FAILURE(status)) 477 { 478 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n", 479 myErrorName(status)); 480 return; 481 } 482 u_uastrcpy(source, "abcd"); 483 iter=ucol_openElements(c2, source, u_strlen(source), &status); 484 if(U_FAILURE(status)){ 485 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 486 myErrorName(status)); 487 return; 488 } 489 backAndForth(iter); 490 ucol_closeElements(iter); 491 ucol_close(c2); 492 /* Now try both */ 493 u_uastrcpy(rule, "&a < b < c/aba < d < z < ch"); 494 c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status); 495 log_verbose("Expansion/contraction rule testing back and forth with no normalization\n"); 496 497 if (c3 == NULL || U_FAILURE(status)) 498 { 499 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n", 500 myErrorName(status)); 501 return; 502 } 503 u_uastrcpy(source, "abcdbchdc"); 504 iter=ucol_openElements(c3, source, u_strlen(source), &status); 505 if(U_FAILURE(status)){ 506 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 507 myErrorName(status)); 508 return; 509 } 510 backAndForth(iter); 511 ucol_closeElements(iter); 512 ucol_close(c3); 513 source[0] = 0x0e41; 514 source[1] = 0x0e02; 515 source[2] = 0x0e41; 516 source[3] = 0x0e02; 517 source[4] = 0x0e27; 518 source[5] = 0x61; 519 source[6] = 0x62; 520 source[7] = 0x63; 521 source[8] = 0; 522 523 coll = ucol_open("th_TH", &status); 524 log_verbose("Thai locale testing back and forth with normalization\n"); 525 iter=ucol_openElements(coll, source, u_strlen(source), &status); 526 if(U_FAILURE(status)){ 527 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 528 myErrorName(status)); 529 return; 530 } 531 backAndForth(iter); 532 ucol_closeElements(iter); 533 ucol_close(coll); 534 535 /* prev test */ 536 source[0] = 0x0061; 537 source[1] = 0x30CF; 538 source[2] = 0x3099; 539 source[3] = 0x30FC; 540 source[4] = 0; 541 542 coll = ucol_open("ja_JP", &status); 543 log_verbose("Japanese locale testing back and forth with normalization\n"); 544 iter=ucol_openElements(coll, source, u_strlen(source), &status); 545 if(U_FAILURE(status)){ 546 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 547 myErrorName(status)); 548 return; 549 } 550 backAndForth(iter); 551 ucol_closeElements(iter); 552 ucol_close(coll); 553 554 free(source); 555 } 556 557 /** 558 * Test for getOffset() and setOffset() 559 */ 560 static void TestOffset() 561 { 562 UErrorCode status= U_ZERO_ERROR; 563 UCollator *en_us=NULL; 564 UCollationElements *iter, *pristine; 565 int32_t offset; 566 OrderAndOffset *orders; 567 int32_t orderLength=0; 568 int count = 0; 569 UChar test1[50]; 570 UChar test2[50]; 571 572 u_uastrcpy(test1, "What subset of all possible test cases?"); 573 u_uastrcpy(test2, "has the highest probability of detecting"); 574 en_us = ucol_open("en_US", &status); 575 log_verbose("Testing getOffset and setOffset for collations\n"); 576 iter = ucol_openElements(en_us, test1, u_strlen(test1), &status); 577 if(U_FAILURE(status)){ 578 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 579 myErrorName(status)); 580 ucol_close(en_us); 581 return; 582 } 583 584 /* testing boundaries */ 585 ucol_setOffset(iter, 0, &status); 586 if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) { 587 log_err("Error: After setting offset to 0, we should be at the end " 588 "of the backwards iteration"); 589 } 590 ucol_setOffset(iter, u_strlen(test1), &status); 591 if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) { 592 log_err("Error: After setting offset to end of the string, we should " 593 "be at the end of the backwards iteration"); 594 } 595 596 /* Run all the way through the iterator, then get the offset */ 597 598 orders = getOrders(iter, &orderLength); 599 600 offset = ucol_getOffset(iter); 601 602 if (offset != u_strlen(test1)) 603 { 604 log_err("offset at end != length %d vs %d\n", offset, 605 u_strlen(test1) ); 606 } 607 608 /* Now set the offset back to the beginning and see if it works */ 609 pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status); 610 if(U_FAILURE(status)){ 611 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 612 myErrorName(status)); 613 ucol_close(en_us); 614 return; 615 } 616 status = U_ZERO_ERROR; 617 618 ucol_setOffset(iter, 0, &status); 619 if (U_FAILURE(status)) 620 { 621 log_err("setOffset failed. %s\n", myErrorName(status)); 622 } 623 else 624 { 625 assertEqual(iter, pristine); 626 } 627 628 ucol_closeElements(pristine); 629 ucol_closeElements(iter); 630 free(orders); 631 632 /* testing offsets in normalization buffer */ 633 test1[0] = 0x61; 634 test1[1] = 0x300; 635 test1[2] = 0x316; 636 test1[3] = 0x62; 637 test1[4] = 0; 638 ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 639 iter = ucol_openElements(en_us, test1, 4, &status); 640 if(U_FAILURE(status)){ 641 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 642 myErrorName(status)); 643 ucol_close(en_us); 644 return; 645 } 646 647 count = 0; 648 while (ucol_next(iter, &status) != UCOL_NULLORDER && 649 U_SUCCESS(status)) { 650 switch (count) { 651 case 0: 652 if (ucol_getOffset(iter) != 1) { 653 log_err("ERROR: Offset of iteration should be 1\n"); 654 } 655 break; 656 case 3: 657 if (ucol_getOffset(iter) != 4) { 658 log_err("ERROR: Offset of iteration should be 4\n"); 659 } 660 break; 661 default: 662 if (ucol_getOffset(iter) != 3) { 663 log_err("ERROR: Offset of iteration should be 3\n"); 664 } 665 } 666 count ++; 667 } 668 669 ucol_reset(iter); 670 count = 0; 671 while (ucol_previous(iter, &status) != UCOL_NULLORDER && 672 U_SUCCESS(status)) { 673 switch (count) { 674 case 0: 675 case 1: 676 if (ucol_getOffset(iter) != 3) { 677 log_err("ERROR: Offset of iteration should be 3\n"); 678 } 679 break; 680 case 2: 681 if (ucol_getOffset(iter) != 1) { 682 log_err("ERROR: Offset of iteration should be 1\n"); 683 } 684 break; 685 default: 686 if (ucol_getOffset(iter) != 0) { 687 log_err("ERROR: Offset of iteration should be 0\n"); 688 } 689 } 690 count ++; 691 } 692 693 if(U_FAILURE(status)){ 694 log_err("ERROR: in iterating collation elements %s\n", 695 myErrorName(status)); 696 } 697 698 ucol_closeElements(iter); 699 ucol_close(en_us); 700 } 701 702 /** 703 * Test for setText() 704 */ 705 static void TestSetText() 706 { 707 int32_t c,i; 708 UErrorCode status = U_ZERO_ERROR; 709 UCollator *en_us=NULL; 710 UCollationElements *iter1, *iter2; 711 UChar test1[50]; 712 UChar test2[50]; 713 714 u_uastrcpy(test1, "What subset of all possible test cases?"); 715 u_uastrcpy(test2, "has the highest probability of detecting"); 716 en_us = ucol_open("en_US", &status); 717 log_verbose("testing setText for Collation elements\n"); 718 iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status); 719 if(U_FAILURE(status)){ 720 log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n", 721 myErrorName(status)); 722 ucol_close(en_us); 723 return; 724 } 725 iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status); 726 if(U_FAILURE(status)){ 727 log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n", 728 myErrorName(status)); 729 ucol_close(en_us); 730 return; 731 } 732 733 /* Run through the second iterator just to exercise it */ 734 c = ucol_next(iter2, &status); 735 i = 0; 736 737 while ( ++i < 10 && (c != UCOL_NULLORDER)) 738 { 739 if (U_FAILURE(status)) 740 { 741 log_err("iter2->next() returned an error. %s\n", myErrorName(status)); 742 ucol_closeElements(iter2); 743 ucol_closeElements(iter1); 744 ucol_close(en_us); 745 return; 746 } 747 748 c = ucol_next(iter2, &status); 749 } 750 751 /* Now set it to point to the same string as the first iterator */ 752 ucol_setText(iter2, test1, u_strlen(test1), &status); 753 if (U_FAILURE(status)) 754 { 755 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status)); 756 } 757 else 758 { 759 assertEqual(iter1, iter2); 760 } 761 762 /* Now set it to point to a null string with fake length*/ 763 ucol_setText(iter2, NULL, 2, &status); 764 if (U_FAILURE(status)) 765 { 766 log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status)); 767 } 768 else 769 { 770 if (ucol_next(iter2, &status) != UCOL_NULLORDER) { 771 log_err("iter2 with null text expected to return UCOL_NULLORDER\n"); 772 } 773 } 774 775 ucol_closeElements(iter2); 776 ucol_closeElements(iter1); 777 ucol_close(en_us); 778 } 779 780 /** @bug 4108762 781 * Test for getMaxExpansion() 782 */ 783 static void TestMaxExpansion() 784 { 785 UErrorCode status = U_ZERO_ERROR; 786 UCollator *coll ;/*= ucol_open("en_US", &status);*/ 787 UChar ch = 0; 788 UChar32 unassigned = 0xEFFFD; 789 UChar supplementary[2]; 790 uint32_t stringOffset = 0; 791 UBool isError = FALSE; 792 uint32_t sorder = 0; 793 UCollationElements *iter ;/*= ucol_openElements(coll, &ch, 1, &status);*/ 794 uint32_t temporder = 0; 795 796 UChar rule[256]; 797 u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch"); 798 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, 799 UCOL_DEFAULT_STRENGTH,NULL, &status); 800 if(U_SUCCESS(status) && coll) { 801 iter = ucol_openElements(coll, &ch, 1, &status); 802 803 while (ch < 0xFFFF && U_SUCCESS(status)) { 804 int count = 1; 805 uint32_t order; 806 int32_t size = 0; 807 808 ch ++; 809 810 ucol_setText(iter, &ch, 1, &status); 811 order = ucol_previous(iter, &status); 812 813 /* thai management */ 814 if (order == 0) 815 order = ucol_previous(iter, &status); 816 817 while (U_SUCCESS(status) && 818 ucol_previous(iter, &status) != UCOL_NULLORDER) { 819 count ++; 820 } 821 822 size = ucol_getMaxExpansion(iter, order); 823 if (U_FAILURE(status) || size < count) { 824 log_err("Failure at codepoint %d, maximum expansion count < %d\n", 825 ch, count); 826 } 827 } 828 829 /* testing for exact max expansion */ 830 ch = 0; 831 while (ch < 0x61) { 832 uint32_t order; 833 int32_t size; 834 ucol_setText(iter, &ch, 1, &status); 835 order = ucol_previous(iter, &status); 836 size = ucol_getMaxExpansion(iter, order); 837 if (U_FAILURE(status) || size != 1) { 838 log_err("Failure at codepoint %d, maximum expansion count < %d\n", 839 ch, 1); 840 } 841 ch ++; 842 } 843 844 ch = 0x63; 845 ucol_setText(iter, &ch, 1, &status); 846 temporder = ucol_previous(iter, &status); 847 848 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) { 849 log_err("Failure at codepoint %d, maximum expansion count != %d\n", 850 ch, 3); 851 } 852 853 ch = 0x64; 854 ucol_setText(iter, &ch, 1, &status); 855 temporder = ucol_previous(iter, &status); 856 857 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) { 858 log_err("Failure at codepoint %d, maximum expansion count != %d\n", 859 ch, 3); 860 } 861 862 U16_APPEND(supplementary, stringOffset, 2, unassigned, isError); 863 ucol_setText(iter, supplementary, 2, &status); 864 sorder = ucol_previous(iter, &status); 865 866 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) { 867 log_err("Failure at codepoint %d, maximum expansion count < %d\n", 868 ch, 2); 869 } 870 871 /* testing jamo */ 872 ch = 0x1165; 873 874 ucol_setText(iter, &ch, 1, &status); 875 temporder = ucol_previous(iter, &status); 876 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) { 877 log_err("Failure at codepoint %d, maximum expansion count > %d\n", 878 ch, 3); 879 } 880 881 ucol_closeElements(iter); 882 ucol_close(coll); 883 884 /* testing special jamo &a<\u1160 */ 885 rule[0] = 0x26; 886 rule[1] = 0x71; 887 rule[2] = 0x3c; 888 rule[3] = 0x1165; 889 rule[4] = 0x2f; 890 rule[5] = 0x71; 891 rule[6] = 0x71; 892 rule[7] = 0x71; 893 rule[8] = 0x71; 894 rule[9] = 0; 895 896 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, 897 UCOL_DEFAULT_STRENGTH,NULL, &status); 898 iter = ucol_openElements(coll, &ch, 1, &status); 899 900 temporder = ucol_previous(iter, &status); 901 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) { 902 log_err("Failure at codepoint %d, maximum expansion count > %d\n", 903 ch, 5); 904 } 905 906 ucol_closeElements(iter); 907 ucol_close(coll); 908 } else { 909 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status)); 910 } 911 912 } 913 914 915 static void assertEqual(UCollationElements *i1, UCollationElements *i2) 916 { 917 int32_t c1, c2; 918 int32_t count = 0; 919 UErrorCode status = U_ZERO_ERROR; 920 921 do 922 { 923 c1 = ucol_next(i1, &status); 924 c2 = ucol_next(i2, &status); 925 926 if (c1 != c2) 927 { 928 log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count, c1, c2); 929 break; 930 } 931 932 count += 1; 933 } 934 while (c1 != UCOL_NULLORDER); 935 } 936 937 /** 938 * Testing iterators with extremely small buffers 939 */ 940 static void TestSmallBuffer() 941 { 942 UErrorCode status = U_ZERO_ERROR; 943 UCollator *coll; 944 UCollationElements *testiter, 945 *iter; 946 int32_t count = 0; 947 OrderAndOffset *testorders, 948 *orders; 949 950 UChar teststr[500]; 951 UChar str[] = {0x300, 0x31A, 0}; 952 /* 953 creating a long string of decomposable characters, 954 since by default the writable buffer is of size 256 955 */ 956 while (count < 500) { 957 if ((count & 1) == 0) { 958 teststr[count ++] = 0x300; 959 } 960 else { 961 teststr[count ++] = 0x31A; 962 } 963 } 964 965 coll = ucol_open("th_TH", &status); 966 if(U_SUCCESS(status) && coll) { 967 testiter = ucol_openElements(coll, teststr, 500, &status); 968 iter = ucol_openElements(coll, str, 2, &status); 969 970 orders = getOrders(iter, &count); 971 if (count != 2) { 972 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n"); 973 } 974 975 /* 976 this will rearrange the string data to 250 characters of 0x300 first then 977 250 characters of 0x031A 978 */ 979 testorders = getOrders(testiter, &count); 980 981 if (count != 500) { 982 log_err("Error decomposition does not give the right sized collation elements\n"); 983 } 984 985 while (count != 0) { 986 /* UCA collation element for 0x0F76 */ 987 if ((count > 250 && testorders[-- count].order != orders[1].order) || 988 (count <= 250 && testorders[-- count].order != orders[0].order)) { 989 log_err("Error decomposition does not give the right collation element at %d count\n", count); 990 break; 991 } 992 } 993 994 free(testorders); 995 free(orders); 996 997 ucol_reset(testiter); 998 999 /* ensures closing of elements done properly to clear writable buffer */ 1000 ucol_next(testiter, &status); 1001 ucol_next(testiter, &status); 1002 ucol_closeElements(testiter); 1003 ucol_closeElements(iter); 1004 ucol_close(coll); 1005 } else { 1006 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status)); 1007 } 1008 } 1009 1010 /** 1011 * Sniplets of code from genuca 1012 */ 1013 static int32_t hex2num(char hex) { 1014 if(hex>='0' && hex <='9') { 1015 return hex-'0'; 1016 } else if(hex>='a' && hex<='f') { 1017 return hex-'a'+10; 1018 } else if(hex>='A' && hex<='F') { 1019 return hex-'A'+10; 1020 } else { 1021 return 0; 1022 } 1023 } 1024 1025 /** 1026 * Getting codepoints from a string 1027 * @param str character string contain codepoints seperated by space and ended 1028 * by a semicolon 1029 * @param codepoints array for storage, assuming size > 5 1030 * @return position at the end of the codepoint section 1031 */ 1032 static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) { 1033 UErrorCode errorCode = U_ZERO_ERROR; 1034 char *semi = uprv_strchr(str, ';'); 1035 char *pipe = uprv_strchr(str, '|'); 1036 char *s; 1037 *codepoints = 0; 1038 *contextCPs = 0; 1039 if(semi == NULL) { 1040 log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str); 1041 return str; 1042 } 1043 if(pipe != NULL) { 1044 int32_t contextLength; 1045 *pipe = 0; 1046 contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode); 1047 *pipe = '|'; 1048 if(U_FAILURE(errorCode)) { 1049 log_err("error parsing precontext string from FractionalUCA.txt %s\n", str); 1050 return str; 1051 } 1052 /* prepend the precontext string to the codepoints */ 1053 u_memcpy(codepoints, contextCPs, contextLength); 1054 codepoints += contextLength; 1055 /* start of the code point string */ 1056 s = pipe + 1; 1057 } else { 1058 s = str; 1059 } 1060 u_parseString(s, codepoints, 99, NULL, &errorCode); 1061 if(U_FAILURE(errorCode)) { 1062 log_err("error parsing code point string from FractionalUCA.txt %s\n", str); 1063 return str; 1064 } 1065 return semi + 1; 1066 } 1067 1068 /** 1069 * Sniplets of code from genuca 1070 */ 1071 static int32_t 1072 readElement(char **from, char *to, char separator, UErrorCode *status) 1073 { 1074 if (U_SUCCESS(*status)) { 1075 char buffer[1024]; 1076 int32_t i = 0; 1077 while (**from != separator) { 1078 if (**from != ' ') { 1079 *(buffer+i++) = **from; 1080 } 1081 (*from)++; 1082 } 1083 (*from)++; 1084 *(buffer + i) = 0; 1085 strcpy(to, buffer); 1086 return i/2; 1087 } 1088 1089 return 0; 1090 } 1091 1092 /** 1093 * Sniplets of code from genuca 1094 */ 1095 static uint32_t 1096 getSingleCEValue(char *primary, char *secondary, char *tertiary, 1097 UErrorCode *status) 1098 { 1099 if (U_SUCCESS(*status)) { 1100 uint32_t value = 0; 1101 char primsave = '\0'; 1102 char secsave = '\0'; 1103 char tersave = '\0'; 1104 char *primend = primary+4; 1105 char *secend = secondary+2; 1106 char *terend = tertiary+2; 1107 uint32_t primvalue; 1108 uint32_t secvalue; 1109 uint32_t tervalue; 1110 1111 if (uprv_strlen(primary) > 4) { 1112 primsave = *primend; 1113 *primend = '\0'; 1114 } 1115 1116 if (uprv_strlen(secondary) > 2) { 1117 secsave = *secend; 1118 *secend = '\0'; 1119 } 1120 1121 if (uprv_strlen(tertiary) > 2) { 1122 tersave = *terend; 1123 *terend = '\0'; 1124 } 1125 1126 primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0; 1127 secvalue = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0; 1128 tervalue = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0; 1129 if(primvalue <= 0xFF) { 1130 primvalue <<= 8; 1131 } 1132 1133 value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK) 1134 | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK) 1135 | (tervalue & UCOL_TERTIARYORDERMASK); 1136 1137 if(primsave!='\0') { 1138 *primend = primsave; 1139 } 1140 if(secsave!='\0') { 1141 *secend = secsave; 1142 } 1143 if(tersave!='\0') { 1144 *terend = tersave; 1145 } 1146 return value; 1147 } 1148 return 0; 1149 } 1150 1151 /** 1152 * Getting collation elements generated from a string 1153 * @param str character string contain collation elements contained in [] and 1154 * seperated by space 1155 * @param ce array for storage, assuming size > 20 1156 * @param status error status 1157 * @return position at the end of the codepoint section 1158 */ 1159 static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) { 1160 char *pStartCP = uprv_strchr(str, '['); 1161 int count = 0; 1162 char *pEndCP; 1163 char primary[100]; 1164 char secondary[100]; 1165 char tertiary[100]; 1166 1167 while (*pStartCP == '[') { 1168 uint32_t primarycount = 0; 1169 uint32_t secondarycount = 0; 1170 uint32_t tertiarycount = 0; 1171 uint32_t CEi = 1; 1172 pEndCP = strchr(pStartCP, ']'); 1173 if(pEndCP == NULL) { 1174 break; 1175 } 1176 pStartCP ++; 1177 1178 primarycount = readElement(&pStartCP, primary, ',', status); 1179 secondarycount = readElement(&pStartCP, secondary, ',', status); 1180 tertiarycount = readElement(&pStartCP, tertiary, ']', status); 1181 1182 /* I want to get the CEs entered right here, including continuation */ 1183 ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status); 1184 if (U_FAILURE(*status)) { 1185 break; 1186 } 1187 1188 while (2 * CEi < primarycount || CEi < secondarycount || 1189 CEi < tertiarycount) { 1190 uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */ 1191 if (2 * CEi < primarycount) { 1192 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28); 1193 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24); 1194 } 1195 1196 if (2 * CEi + 1 < primarycount) { 1197 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20); 1198 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16); 1199 } 1200 1201 if (CEi < secondarycount) { 1202 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12); 1203 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8); 1204 } 1205 1206 if (CEi < tertiarycount) { 1207 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4); 1208 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF); 1209 } 1210 1211 CEi ++; 1212 ces[count ++] = value; 1213 } 1214 1215 pStartCP = pEndCP + 1; 1216 } 1217 ces[count] = 0; 1218 return pStartCP; 1219 } 1220 1221 /** 1222 * Getting the FractionalUCA.txt file stream 1223 */ 1224 static FileStream * getFractionalUCA(void) 1225 { 1226 char newPath[256]; 1227 char backupPath[256]; 1228 FileStream *result = NULL; 1229 1230 /* Look inside ICU_DATA first */ 1231 uprv_strcpy(newPath, ctest_dataSrcDir()); 1232 uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING ); 1233 uprv_strcat(newPath, "FractionalUCA.txt"); 1234 1235 /* As a fallback, try to guess where the source data was located 1236 * at the time ICU was built, and look there. 1237 */ 1238 #if defined (U_TOPSRCDIR) 1239 strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data"); 1240 #else 1241 { 1242 UErrorCode errorCode = U_ZERO_ERROR; 1243 strcpy(backupPath, loadTestData(&errorCode)); 1244 strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data"); 1245 } 1246 #endif 1247 strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt"); 1248 1249 result = T_FileStream_open(newPath, "rb"); 1250 1251 if (result == NULL) { 1252 result = T_FileStream_open(backupPath, "rb"); 1253 if (result == NULL) { 1254 log_err("Failed to open either %s or %s\n", newPath, backupPath); 1255 } 1256 } 1257 return result; 1258 } 1259 1260 /** 1261 * Testing the CEs returned by the iterator 1262 */ 1263 static void TestCEs() { 1264 FileStream *file = NULL; 1265 char line[2048]; 1266 char *str; 1267 UChar codepoints[10]; 1268 uint32_t ces[20]; 1269 UErrorCode status = U_ZERO_ERROR; 1270 UCollator *coll = ucol_open("", &status); 1271 uint32_t lineNo = 0; 1272 UChar contextCPs[5]; 1273 1274 if (U_FAILURE(status)) { 1275 log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status)); 1276 return; 1277 } 1278 1279 file = getFractionalUCA(); 1280 1281 if (file == NULL) { 1282 log_err("*** unable to open input FractionalUCA.txt file ***\n"); 1283 return; 1284 } 1285 1286 1287 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) { 1288 int count = 0; 1289 UCollationElements *iter; 1290 int32_t preContextCeLen=0; 1291 lineNo++; 1292 /* skip this line if it is empty or a comment or is a return value 1293 or start of some variable section */ 1294 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' || 1295 line[0] == 0x000D || line[0] == '[') { 1296 continue; 1297 } 1298 1299 str = getCodePoints(line, codepoints, contextCPs); 1300 1301 /* these are 'fake' codepoints in the fractional UCA, and are used just 1302 * for positioning of indirect values. They should not go through this 1303 * test. 1304 */ 1305 if(*codepoints == 0xFDD0) { 1306 continue; 1307 } 1308 if (*contextCPs != 0) { 1309 iter = ucol_openElements(coll, contextCPs, -1, &status); 1310 if (U_FAILURE(status)) { 1311 log_err("Error in opening collation elements\n"); 1312 break; 1313 } 1314 while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) { 1315 preContextCeLen++; 1316 } 1317 ucol_closeElements(iter); 1318 } 1319 1320 getCEs(str, ces+preContextCeLen, &status); 1321 if (U_FAILURE(status)) { 1322 log_err("Error in parsing collation elements in FractionalUCA.txt\n"); 1323 break; 1324 } 1325 iter = ucol_openElements(coll, codepoints, -1, &status); 1326 if (U_FAILURE(status)) { 1327 log_err("Error in opening collation elements\n"); 1328 break; 1329 } 1330 for (;;) { 1331 uint32_t ce = (uint32_t)ucol_next(iter, &status); 1332 if (ce == 0xFFFFFFFF) { 1333 ce = 0; 1334 } 1335 /* we now unconditionally reorder Thai/Lao prevowels, so this 1336 * test would fail if we don't skip here. 1337 */ 1338 if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) { 1339 continue; 1340 } 1341 if (ce != ces[count] || U_FAILURE(status)) { 1342 log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n"); 1343 break; 1344 } 1345 if (ces[count] == 0) { 1346 break; 1347 } 1348 count ++; 1349 } 1350 ucol_closeElements(iter); 1351 } 1352 1353 T_FileStream_close(file); 1354 ucol_close(coll); 1355 } 1356 1357 /** 1358 * Testing the discontigous contractions 1359 */ 1360 static void TestDiscontiguos() { 1361 const char *rulestr = 1362 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315"; 1363 UChar rule[50]; 1364 int rulelen = u_unescape(rulestr, rule, 50); 1365 const char *src[] = { 1366 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC", 1367 /* base character blocked */ 1368 "XD\\u0300", "XD\\u0300\\u0315", 1369 /* non blocking combining character */ 1370 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315", 1371 /* blocking combining character */ 1372 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315", 1373 /* contraction prefix */ 1374 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315", 1375 "X\\u0300\\u031A\\u0315", 1376 /* ends not with a contraction character */ 1377 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D", 1378 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D" 1379 }; 1380 const char *tgt[] = { 1381 /* non blocking combining character */ 1382 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC", 1383 /* base character blocked */ 1384 "X D \\u0300", "X D \\u0300\\u0315", 1385 /* non blocking combining character */ 1386 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319", 1387 /* blocking combining character */ 1388 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315", 1389 /* contraction prefix */ 1390 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319", 1391 "X\\u0300 \\u031A \\u0315", 1392 /* ends not with a contraction character */ 1393 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D", 1394 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D" 1395 }; 1396 int size = 20; 1397 UCollator *coll; 1398 UErrorCode status = U_ZERO_ERROR; 1399 int count = 0; 1400 UCollationElements *iter; 1401 UCollationElements *resultiter; 1402 1403 coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); 1404 iter = ucol_openElements(coll, rule, 1, &status); 1405 resultiter = ucol_openElements(coll, rule, 1, &status); 1406 1407 if (U_FAILURE(status)) { 1408 log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status)); 1409 return; 1410 } 1411 1412 while (count < size) { 1413 UChar str[20]; 1414 UChar tstr[20]; 1415 int strLen = u_unescape(src[count], str, 20); 1416 UChar *s; 1417 1418 ucol_setText(iter, str, strLen, &status); 1419 if (U_FAILURE(status)) { 1420 log_err("Error opening collation iterator\n"); 1421 return; 1422 } 1423 1424 u_unescape(tgt[count], tstr, 20); 1425 s = tstr; 1426 1427 log_verbose("count %d\n", count); 1428 1429 for (;;) { 1430 uint32_t ce; 1431 UChar *e = u_strchr(s, 0x20); 1432 if (e == 0) { 1433 e = u_strchr(s, 0); 1434 } 1435 ucol_setText(resultiter, s, (int32_t)(e - s), &status); 1436 ce = ucol_next(resultiter, &status); 1437 if (U_FAILURE(status)) { 1438 log_err("Error manipulating collation iterator\n"); 1439 return; 1440 } 1441 while (ce != UCOL_NULLORDER) { 1442 if (ce != (uint32_t)ucol_next(iter, &status) || 1443 U_FAILURE(status)) { 1444 log_err("Discontiguos contraction test mismatch\n"); 1445 return; 1446 } 1447 ce = ucol_next(resultiter, &status); 1448 if (U_FAILURE(status)) { 1449 log_err("Error getting next collation element\n"); 1450 return; 1451 } 1452 } 1453 s = e + 1; 1454 if (*e == 0) { 1455 break; 1456 } 1457 } 1458 ucol_reset(iter); 1459 backAndForth(iter); 1460 count ++; 1461 } 1462 ucol_closeElements(resultiter); 1463 ucol_closeElements(iter); 1464 ucol_close(coll); 1465 } 1466 1467 static void TestCEBufferOverflow() 1468 { 1469 UChar str[UCOL_EXPAND_CE_BUFFER_SIZE + 1]; 1470 UErrorCode status = U_ZERO_ERROR; 1471 UChar rule[10]; 1472 UCollator *coll; 1473 UCollationElements *iter; 1474 1475 u_uastrcpy(rule, "&z < AB"); 1476 coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status); 1477 if (U_FAILURE(status)) { 1478 log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status)); 1479 return; 1480 } 1481 1482 /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic 1483 test. this will cause an overflow in getPrev */ 1484 str[0] = 0x0041; /* 'A' */ 1485 /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/ 1486 uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE); 1487 str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042; /* 'B' */ 1488 iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1, 1489 &status); 1490 if (ucol_previous(iter, &status) == UCOL_NULLORDER || 1491 status == U_BUFFER_OVERFLOW_ERROR) { 1492 log_err("CE buffer should not overflow with long string of trail surrogates\n"); 1493 } 1494 ucol_closeElements(iter); 1495 ucol_close(coll); 1496 } 1497 1498 /** 1499 * Checking collation element validity. 1500 */ 1501 #define MAX_CODEPOINTS_TO_SHOW 10 1502 static void showCodepoints(const UChar *codepoints, int length, char * codepointText) { 1503 int i, lengthToUse = length; 1504 if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) { 1505 lengthToUse = MAX_CODEPOINTS_TO_SHOW; 1506 } 1507 for (i = 0; i < lengthToUse; ++i) { 1508 int bytesWritten = sprintf(codepointText, " %04X", *codepoints++); 1509 if (bytesWritten <= 0) { 1510 break; 1511 } 1512 codepointText += bytesWritten; 1513 } 1514 if (i < length) { 1515 sprintf(codepointText, " ..."); 1516 } 1517 } 1518 1519 static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints, 1520 int length) 1521 { 1522 UErrorCode status = U_ZERO_ERROR; 1523 UCollationElements *iter = ucol_openElements(coll, codepoints, length, 1524 &status); 1525 UBool result = FALSE; 1526 UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE; 1527 const char * collLocale; 1528 1529 if (U_FAILURE(status)) { 1530 log_err("Error creating iterator for testing validity\n"); 1531 return FALSE; 1532 } 1533 collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status); 1534 if (U_FAILURE(status) || collLocale==NULL) { 1535 status = U_ZERO_ERROR; 1536 collLocale = "?"; 1537 } 1538 1539 for (;;) { 1540 uint32_t ce = ucol_next(iter, &status); 1541 uint32_t primary, p1, p2, secondary, tertiary; 1542 if (ce == UCOL_NULLORDER) { 1543 result = TRUE; 1544 break; 1545 } 1546 if (ce == 0) { 1547 continue; 1548 } 1549 if (ce == 0x02000202) { 1550 /* special CE for merge-sort character */ 1551 if (*codepoints == 0xFFFE /* && length == 1 */) { 1552 /* 1553 * Note: We should check for length==1 but the token parser appears 1554 * to give us trailing NUL characters. 1555 * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet() 1556 * rather than the internal collation rule parser 1557 */ 1558 continue; 1559 } else { 1560 log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n", 1561 (int)*codepoints, (int)length); 1562 break; 1563 } 1564 } 1565 primary = UCOL_PRIMARYORDER(ce); 1566 p1 = primary >> 8; 1567 p2 = primary & 0xFF; 1568 secondary = UCOL_SECONDARYORDER(ce); 1569 tertiary = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION; 1570 1571 if (!isContinuation(ce)) { 1572 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) { 1573 log_err("Empty CE %08lX except for case bits\n", (long)ce); 1574 break; 1575 } 1576 if (p1 == 0) { 1577 if (p2 != 0) { 1578 log_err("Primary 00 xx in %08lX\n", (long)ce); 1579 break; 1580 } 1581 primaryDone = TRUE; 1582 } else { 1583 if (p1 <= 2 || p1 >= 0xF0) { 1584 /* Primary first bytes F0..FF are specials. */ 1585 log_err("Primary first byte of %08lX out of range\n", (long)ce); 1586 break; 1587 } 1588 if (p2 == 0) { 1589 primaryDone = TRUE; 1590 } else { 1591 if (p2 <= 3 || p2 >= 0xFF) { 1592 /* Primary second bytes 03 and FF are sort key compression terminators. */ 1593 log_err("Primary second byte of %08lX out of range\n", (long)ce); 1594 break; 1595 } 1596 primaryDone = FALSE; 1597 } 1598 } 1599 if (secondary == 0) { 1600 if (primary != 0) { 1601 log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce); 1602 break; 1603 } 1604 secondaryDone = TRUE; 1605 } else { 1606 if (secondary <= 2 || 1607 (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80)) 1608 ) { 1609 /* Secondary first bytes common+1..+0x80 are used for sort key compression. */ 1610 log_err("Secondary byte of %08lX out of range\n", (long)ce); 1611 break; 1612 } 1613 secondaryDone = FALSE; 1614 } 1615 if (tertiary == 0) { 1616 /* We know that ce != 0. */ 1617 log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce); 1618 break; 1619 } 1620 if (tertiary <= 2) { 1621 log_err("Tertiary byte of %08lX out of range\n", (long)ce); 1622 break; 1623 } 1624 tertiaryDone = FALSE; 1625 } else { 1626 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) { 1627 log_err("Empty continuation %08lX\n", (long)ce); 1628 break; 1629 } 1630 if (primaryDone && primary != 0) { 1631 log_err("Primary was done but continues in %08lX\n", (long)ce); 1632 break; 1633 } 1634 if (p1 == 0) { 1635 if (p2 != 0) { 1636 log_err("Primary 00 xx in %08lX\n", (long)ce); 1637 break; 1638 } 1639 primaryDone = TRUE; 1640 } else { 1641 if (p1 <= 2) { 1642 log_err("Primary first byte of %08lX out of range\n", (long)ce); 1643 break; 1644 } 1645 if (p2 == 0) { 1646 primaryDone = TRUE; 1647 } else { 1648 if (p2 <= 3) { 1649 log_err("Primary second byte of %08lX out of range\n", (long)ce); 1650 break; 1651 } 1652 } 1653 } 1654 if (secondaryDone && secondary != 0) { 1655 log_err("Secondary was done but continues in %08lX\n", (long)ce); 1656 break; 1657 } 1658 if (secondary == 0) { 1659 secondaryDone = TRUE; 1660 } else { 1661 if (secondary <= 2) { 1662 log_err("Secondary byte of %08lX out of range\n", (long)ce); 1663 break; 1664 } 1665 } 1666 if (tertiaryDone && tertiary != 0) { 1667 log_err("Tertiary was done but continues in %08lX\n", (long)ce); 1668 break; 1669 } 1670 if (tertiary == 0) { 1671 tertiaryDone = TRUE; 1672 } else if (tertiary <= 2) { 1673 log_err("Tertiary byte of %08lX out of range\n", (long)ce); 1674 break; 1675 } 1676 } 1677 } 1678 if (!result) { 1679 char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5]; 1680 showCodepoints(codepoints, length, codepointText); 1681 log_err("Locale: %s Code point string: %s\n", collLocale, codepointText); 1682 } 1683 ucol_closeElements(iter); 1684 return result; 1685 } 1686 1687 static void TestCEValidity() 1688 { 1689 /* testing UCA collation elements */ 1690 UErrorCode status = U_ZERO_ERROR; 1691 /* en_US has no tailorings */ 1692 UCollator *coll = ucol_open("root", &status); 1693 /* tailored locales */ 1694 char locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"}; 1695 const char *loc; 1696 FileStream *file = NULL; 1697 char line[2048]; 1698 UChar codepoints[11]; 1699 int count = 0; 1700 int maxCount = 0; 1701 UChar contextCPs[3]; 1702 UChar32 c; 1703 UParseError parseError; 1704 if (U_FAILURE(status)) { 1705 log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status)); 1706 return; 1707 } 1708 log_verbose("Testing UCA elements\n"); 1709 file = getFractionalUCA(); 1710 if (file == NULL) { 1711 log_err("Fractional UCA data can not be opened\n"); 1712 return; 1713 } 1714 1715 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) { 1716 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' || 1717 line[0] == 0x000D || line[0] == '[') { 1718 continue; 1719 } 1720 1721 getCodePoints(line, codepoints, contextCPs); 1722 checkCEValidity(coll, codepoints, u_strlen(codepoints)); 1723 } 1724 1725 log_verbose("Testing UCA elements for the whole range of unicode characters\n"); 1726 for (c = 0; c <= 0xffff; ++c) { 1727 if (u_isdefined(c)) { 1728 codepoints[0] = (UChar)c; 1729 checkCEValidity(coll, codepoints, 1); 1730 } 1731 } 1732 for (; c <= 0x10ffff; ++c) { 1733 if (u_isdefined(c)) { 1734 int32_t i = 0; 1735 U16_APPEND_UNSAFE(codepoints, i, c); 1736 checkCEValidity(coll, codepoints, i); 1737 } 1738 } 1739 1740 ucol_close(coll); 1741 1742 /* testing tailored collation elements */ 1743 log_verbose("Testing tailored elements\n"); 1744 if(getTestOption(QUICK_OPTION)) { 1745 maxCount = sizeof(locale)/sizeof(locale[0]); 1746 } else { 1747 maxCount = uloc_countAvailable(); 1748 } 1749 while (count < maxCount) { 1750 const UChar *rules = NULL, 1751 *current = NULL; 1752 UChar *rulesCopy = NULL; 1753 int32_t ruleLen = 0; 1754 1755 uint32_t chOffset = 0; 1756 uint32_t chLen = 0; 1757 uint32_t exOffset = 0; 1758 uint32_t exLen = 0; 1759 uint32_t prefixOffset = 0; 1760 uint32_t prefixLen = 0; 1761 UBool startOfRules = TRUE; 1762 UColOptionSet opts; 1763 1764 UColTokenParser src; 1765 uint32_t strength = 0; 1766 uint16_t specs = 0; 1767 if(getTestOption(QUICK_OPTION)) { 1768 loc = locale[count]; 1769 } else { 1770 loc = uloc_getAvailable(count); 1771 if(!hasCollationElements(loc)) { 1772 count++; 1773 continue; 1774 } 1775 } 1776 1777 uprv_memset(&src, 0, sizeof(UColTokenParser)); 1778 1779 log_verbose("Testing CEs for %s\n", loc); 1780 1781 coll = ucol_open(loc, &status); 1782 if (U_FAILURE(status)) { 1783 log_err("%s collator creation failed\n", loc); 1784 return; 1785 } 1786 1787 src.opts = &opts; 1788 rules = ucol_getRules(coll, &ruleLen); 1789 1790 if (ruleLen > 0) { 1791 rulesCopy = (UChar *)uprv_malloc((ruleLen + 1792 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar)); 1793 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar)); 1794 src.current = src.source = rulesCopy; 1795 src.end = rulesCopy + ruleLen; 1796 src.extraCurrent = src.end; 1797 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 1798 1799 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to 1800 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ 1801 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) { 1802 strength = src.parsedToken.strength; 1803 chOffset = src.parsedToken.charsOffset; 1804 chLen = src.parsedToken.charsLen; 1805 exOffset = src.parsedToken.extensionOffset; 1806 exLen = src.parsedToken.extensionLen; 1807 prefixOffset = src.parsedToken.prefixOffset; 1808 prefixLen = src.parsedToken.prefixLen; 1809 specs = src.parsedToken.flags; 1810 1811 startOfRules = FALSE; 1812 uprv_memcpy(codepoints, src.source + chOffset, 1813 chLen * sizeof(UChar)); 1814 codepoints[chLen] = 0; 1815 checkCEValidity(coll, codepoints, chLen); 1816 } 1817 uprv_free(src.source); 1818 } 1819 1820 ucol_close(coll); 1821 count ++; 1822 } 1823 T_FileStream_close(file); 1824 } 1825 1826 static void printSortKeyError(const UChar *codepoints, int length, 1827 uint8_t *sortkey, int sklen) 1828 { 1829 int count = 0; 1830 log_err("Sortkey not valid for "); 1831 while (length > 0) { 1832 log_err("0x%04x ", *codepoints); 1833 length --; 1834 codepoints ++; 1835 } 1836 log_err("\nSortkey : "); 1837 while (count < sklen) { 1838 log_err("0x%02x ", sortkey[count]); 1839 count ++; 1840 } 1841 log_err("\n"); 1842 } 1843 1844 /** 1845 * Checking sort key validity for all levels 1846 */ 1847 static UBool checkSortKeyValidity(UCollator *coll, 1848 const UChar *codepoints, 1849 int length) 1850 { 1851 UErrorCode status = U_ZERO_ERROR; 1852 UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY, 1853 UCOL_TERTIARY, UCOL_QUATERNARY, 1854 UCOL_IDENTICAL}; 1855 int strengthlen = 5; 1856 int strengthIndex = 0; 1857 int caselevel = 0; 1858 1859 while (caselevel < 1) { 1860 if (caselevel == 0) { 1861 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status); 1862 } 1863 else { 1864 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status); 1865 } 1866 1867 while (strengthIndex < strengthlen) { 1868 int count01 = 0; 1869 uint32_t count = 0; 1870 uint8_t sortkey[128]; 1871 uint32_t sklen; 1872 1873 ucol_setStrength(coll, strength[strengthIndex]); 1874 sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128); 1875 while (sortkey[count] != 0) { 1876 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) { 1877 printSortKeyError(codepoints, length, sortkey, sklen); 1878 return FALSE; 1879 } 1880 if (sortkey[count] == 1) { 1881 count01 ++; 1882 } 1883 count ++; 1884 } 1885 1886 if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) { 1887 printSortKeyError(codepoints, length, sortkey, sklen); 1888 return FALSE; 1889 } 1890 strengthIndex ++; 1891 } 1892 caselevel ++; 1893 } 1894 return TRUE; 1895 } 1896 1897 static void TestSortKeyValidity(void) 1898 { 1899 /* testing UCA collation elements */ 1900 UErrorCode status = U_ZERO_ERROR; 1901 /* en_US has no tailorings */ 1902 UCollator *coll = ucol_open("en_US", &status); 1903 /* tailored locales */ 1904 char locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"}; 1905 FileStream *file = NULL; 1906 char line[2048]; 1907 UChar codepoints[10]; 1908 int count = 0; 1909 UChar contextCPs[5]; 1910 UParseError parseError; 1911 if (U_FAILURE(status)) { 1912 log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status)); 1913 return; 1914 } 1915 log_verbose("Testing UCA elements\n"); 1916 file = getFractionalUCA(); 1917 if (file == NULL) { 1918 log_err("Fractional UCA data can not be opened\n"); 1919 return; 1920 } 1921 1922 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) { 1923 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' || 1924 line[0] == 0x000D || line[0] == '[') { 1925 continue; 1926 } 1927 1928 getCodePoints(line, codepoints, contextCPs); 1929 if(codepoints[0] == 0xFFFE) { 1930 /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */ 1931 continue; 1932 } 1933 checkSortKeyValidity(coll, codepoints, u_strlen(codepoints)); 1934 } 1935 1936 log_verbose("Testing UCA elements for the whole range of unicode characters\n"); 1937 codepoints[0] = 0; 1938 1939 while (codepoints[0] < 0xFFFF) { 1940 if (u_isdefined((UChar32)codepoints[0])) { 1941 checkSortKeyValidity(coll, codepoints, 1); 1942 } 1943 codepoints[0] ++; 1944 } 1945 1946 ucol_close(coll); 1947 1948 /* testing tailored collation elements */ 1949 log_verbose("Testing tailored elements\n"); 1950 while (count < 5) { 1951 const UChar *rules = NULL, 1952 *current = NULL; 1953 UChar *rulesCopy = NULL; 1954 int32_t ruleLen = 0; 1955 1956 uint32_t chOffset = 0; 1957 uint32_t chLen = 0; 1958 uint32_t exOffset = 0; 1959 uint32_t exLen = 0; 1960 uint32_t prefixOffset = 0; 1961 uint32_t prefixLen = 0; 1962 UBool startOfRules = TRUE; 1963 UColOptionSet opts; 1964 1965 UColTokenParser src; 1966 uint32_t strength = 0; 1967 uint16_t specs = 0; 1968 1969 uprv_memset(&src, 0, sizeof(UColTokenParser)); 1970 1971 coll = ucol_open(locale[count], &status); 1972 if (U_FAILURE(status)) { 1973 log_err("%s collator creation failed\n", locale[count]); 1974 return; 1975 } 1976 1977 src.opts = &opts; 1978 rules = ucol_getRules(coll, &ruleLen); 1979 1980 if (ruleLen > 0) { 1981 rulesCopy = (UChar *)uprv_malloc((ruleLen + 1982 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar)); 1983 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar)); 1984 src.current = src.source = rulesCopy; 1985 src.end = rulesCopy + ruleLen; 1986 src.extraCurrent = src.end; 1987 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 1988 1989 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to 1990 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ 1991 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) { 1992 strength = src.parsedToken.strength; 1993 chOffset = src.parsedToken.charsOffset; 1994 chLen = src.parsedToken.charsLen; 1995 exOffset = src.parsedToken.extensionOffset; 1996 exLen = src.parsedToken.extensionLen; 1997 prefixOffset = src.parsedToken.prefixOffset; 1998 prefixLen = src.parsedToken.prefixLen; 1999 specs = src.parsedToken.flags; 2000 2001 startOfRules = FALSE; 2002 uprv_memcpy(codepoints, src.source + chOffset, 2003 chLen * sizeof(UChar)); 2004 codepoints[chLen] = 0; 2005 if(codepoints[0] == 0xFFFE) { 2006 /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */ 2007 continue; 2008 } 2009 checkSortKeyValidity(coll, codepoints, chLen); 2010 } 2011 uprv_free(src.source); 2012 } 2013 2014 ucol_close(coll); 2015 count ++; 2016 } 2017 T_FileStream_close(file); 2018 } 2019 2020 #endif /* #if !UCONFIG_NO_COLLATION */ 2021