1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2011, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************** 7 * 8 * File CITERTST.C 9 * 10 * Modification History: 11 * Date Name Description 12 * Madhu Katragadda Ported for C API 13 * 02/19/01 synwee Modified test case for new collation iterator 14 *********************************************************************************/ 15 /* 16 * Collation Iterator tests. 17 * (Let me reiterate my position...) 18 */ 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_COLLATION 23 24 #include "unicode/ucol.h" 25 #include "unicode/ucoleitr.h" 26 #include "unicode/uloc.h" 27 #include "unicode/uchar.h" 28 #include "unicode/ustring.h" 29 #include "unicode/putil.h" 30 #include "callcoll.h" 31 #include "cmemory.h" 32 #include "cintltst.h" 33 #include "citertst.h" 34 #include "ccolltst.h" 35 #include "filestrm.h" 36 #include "cstring.h" 37 #include "ucol_imp.h" 38 #include "ucol_tok.h" 39 #include "uparse.h" 40 #include <stdio.h> 41 42 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *); 43 44 void addCollIterTest(TestNode** root) 45 { 46 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious"); 47 addTest(root, &TestOffset, "tscoll/citertst/TestOffset"); 48 addTest(root, &TestSetText, "tscoll/citertst/TestSetText"); 49 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion"); 50 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar"); 51 addTest(root, &TestNormalizedUnicodeChar, 52 "tscoll/citertst/TestNormalizedUnicodeChar"); 53 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization"); 54 addTest(root, &TestBug672, "tscoll/citertst/TestBug672"); 55 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize"); 56 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer"); 57 addTest(root, &TestCEs, "tscoll/citertst/TestCEs"); 58 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos"); 59 addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow"); 60 addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity"); 61 addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity"); 62 addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements"); 63 } 64 65 /* The locales we support */ 66 67 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"}; 68 69 static void TestBug672() { 70 UErrorCode status = U_ZERO_ERROR; 71 UChar pattern[20]; 72 UChar text[50]; 73 int i; 74 int result[3][3]; 75 76 u_uastrcpy(pattern, "resume"); 77 u_uastrcpy(text, "Time to resume updating my resume."); 78 79 for (i = 0; i < 3; ++ i) { 80 UCollator *coll = ucol_open(LOCALES[i], &status); 81 UCollationElements *pitr = ucol_openElements(coll, pattern, -1, 82 &status); 83 UCollationElements *titer = ucol_openElements(coll, text, -1, 84 &status); 85 if (U_FAILURE(status)) { 86 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n", 87 myErrorName(status)); 88 return; 89 } 90 91 log_verbose("locale tested %s\n", LOCALES[i]); 92 93 while (ucol_next(pitr, &status) != UCOL_NULLORDER && 94 U_SUCCESS(status)) { 95 } 96 if (U_FAILURE(status)) { 97 log_err("ERROR: reversing collation iterator :%s\n", 98 myErrorName(status)); 99 return; 100 } 101 ucol_reset(pitr); 102 103 ucol_setOffset(titer, u_strlen(pattern), &status); 104 if (U_FAILURE(status)) { 105 log_err("ERROR: setting offset in collator :%s\n", 106 myErrorName(status)); 107 return; 108 } 109 result[i][0] = ucol_getOffset(titer); 110 log_verbose("Text iterator set to offset %d\n", result[i][0]); 111 112 /* Use previous() */ 113 ucol_previous(titer, &status); 114 result[i][1] = ucol_getOffset(titer); 115 log_verbose("Current offset %d after previous\n", result[i][1]); 116 117 /* Add one to index */ 118 log_verbose("Adding one to current offset...\n"); 119 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status); 120 if (U_FAILURE(status)) { 121 log_err("ERROR: setting offset in collator :%s\n", 122 myErrorName(status)); 123 return; 124 } 125 result[i][2] = ucol_getOffset(titer); 126 log_verbose("Current offset in text = %d\n", result[i][2]); 127 ucol_closeElements(pitr); 128 ucol_closeElements(titer); 129 ucol_close(coll); 130 } 131 132 if (uprv_memcmp(result[0], result[1], 3) != 0 || 133 uprv_memcmp(result[1], result[2], 3) != 0) { 134 log_err("ERROR: Different locales have different offsets at the same character\n"); 135 } 136 } 137 138 139 140 /* Running this test with normalization enabled showed up a bug in the incremental 141 normalization code. */ 142 static void TestBug672Normalize() { 143 UErrorCode status = U_ZERO_ERROR; 144 UChar pattern[20]; 145 UChar text[50]; 146 int i; 147 int result[3][3]; 148 149 u_uastrcpy(pattern, "resume"); 150 u_uastrcpy(text, "Time to resume updating my resume."); 151 152 for (i = 0; i < 3; ++ i) { 153 UCollator *coll = ucol_open(LOCALES[i], &status); 154 UCollationElements *pitr = NULL; 155 UCollationElements *titer = NULL; 156 157 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 158 159 pitr = ucol_openElements(coll, pattern, -1, &status); 160 titer = ucol_openElements(coll, text, -1, &status); 161 if (U_FAILURE(status)) { 162 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n", 163 myErrorName(status)); 164 return; 165 } 166 167 log_verbose("locale tested %s\n", LOCALES[i]); 168 169 while (ucol_next(pitr, &status) != UCOL_NULLORDER && 170 U_SUCCESS(status)) { 171 } 172 if (U_FAILURE(status)) { 173 log_err("ERROR: reversing collation iterator :%s\n", 174 myErrorName(status)); 175 return; 176 } 177 ucol_reset(pitr); 178 179 ucol_setOffset(titer, u_strlen(pattern), &status); 180 if (U_FAILURE(status)) { 181 log_err("ERROR: setting offset in collator :%s\n", 182 myErrorName(status)); 183 return; 184 } 185 result[i][0] = ucol_getOffset(titer); 186 log_verbose("Text iterator set to offset %d\n", result[i][0]); 187 188 /* Use previous() */ 189 ucol_previous(titer, &status); 190 result[i][1] = ucol_getOffset(titer); 191 log_verbose("Current offset %d after previous\n", result[i][1]); 192 193 /* Add one to index */ 194 log_verbose("Adding one to current offset...\n"); 195 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status); 196 if (U_FAILURE(status)) { 197 log_err("ERROR: setting offset in collator :%s\n", 198 myErrorName(status)); 199 return; 200 } 201 result[i][2] = ucol_getOffset(titer); 202 log_verbose("Current offset in text = %d\n", result[i][2]); 203 ucol_closeElements(pitr); 204 ucol_closeElements(titer); 205 ucol_close(coll); 206 } 207 208 if (uprv_memcmp(result[0], result[1], 3) != 0 || 209 uprv_memcmp(result[1], result[2], 3) != 0) { 210 log_err("ERROR: Different locales have different offsets at the same character\n"); 211 } 212 } 213 214 215 216 217 /** 218 * Test for CollationElementIterator previous and next for the whole set of 219 * unicode characters. 220 */ 221 static void TestUnicodeChar() 222 { 223 UChar source[0x100]; 224 UCollator *en_us; 225 UCollationElements *iter; 226 UErrorCode status = U_ZERO_ERROR; 227 UChar codepoint; 228 229 UChar *test; 230 en_us = ucol_open("en_US", &status); 231 if (U_FAILURE(status)){ 232 log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n", 233 myErrorName(status)); 234 return; 235 } 236 237 for (codepoint = 1; codepoint < 0xFFFE;) 238 { 239 test = source; 240 241 while (codepoint % 0xFF != 0) 242 { 243 if (u_isdefined(codepoint)) 244 *(test ++) = codepoint; 245 codepoint ++; 246 } 247 248 if (u_isdefined(codepoint)) 249 *(test ++) = codepoint; 250 251 if (codepoint != 0xFFFF) 252 codepoint ++; 253 254 *test = 0; 255 iter=ucol_openElements(en_us, source, u_strlen(source), &status); 256 if(U_FAILURE(status)){ 257 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 258 myErrorName(status)); 259 ucol_close(en_us); 260 return; 261 } 262 /* A basic test to see if it's working at all */ 263 log_verbose("codepoint testing %x\n", codepoint); 264 backAndForth(iter); 265 ucol_closeElements(iter); 266 267 /* null termination test */ 268 iter=ucol_openElements(en_us, source, -1, &status); 269 if(U_FAILURE(status)){ 270 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 271 myErrorName(status)); 272 ucol_close(en_us); 273 return; 274 } 275 /* A basic test to see if it's working at all */ 276 backAndForth(iter); 277 ucol_closeElements(iter); 278 } 279 280 ucol_close(en_us); 281 } 282 283 /** 284 * Test for CollationElementIterator previous and next for the whole set of 285 * unicode characters with normalization on. 286 */ 287 static void TestNormalizedUnicodeChar() 288 { 289 UChar source[0x100]; 290 UCollator *th_th; 291 UCollationElements *iter; 292 UErrorCode status = U_ZERO_ERROR; 293 UChar codepoint; 294 295 UChar *test; 296 /* thai should have normalization on */ 297 th_th = ucol_open("th_TH", &status); 298 if (U_FAILURE(status)){ 299 log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n", 300 myErrorName(status)); 301 return; 302 } 303 304 for (codepoint = 1; codepoint < 0xFFFE;) 305 { 306 test = source; 307 308 while (codepoint % 0xFF != 0) 309 { 310 if (u_isdefined(codepoint)) 311 *(test ++) = codepoint; 312 codepoint ++; 313 } 314 315 if (u_isdefined(codepoint)) 316 *(test ++) = codepoint; 317 318 if (codepoint != 0xFFFF) 319 codepoint ++; 320 321 *test = 0; 322 iter=ucol_openElements(th_th, source, u_strlen(source), &status); 323 if(U_FAILURE(status)){ 324 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 325 myErrorName(status)); 326 ucol_close(th_th); 327 return; 328 } 329 330 backAndForth(iter); 331 ucol_closeElements(iter); 332 333 iter=ucol_openElements(th_th, source, -1, &status); 334 if(U_FAILURE(status)){ 335 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 336 myErrorName(status)); 337 ucol_close(th_th); 338 return; 339 } 340 341 backAndForth(iter); 342 ucol_closeElements(iter); 343 } 344 345 ucol_close(th_th); 346 } 347 348 /** 349 * Test the incremental normalization 350 */ 351 static void TestNormalization() 352 { 353 UErrorCode status = U_ZERO_ERROR; 354 const char *str = 355 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315"; 356 UCollator *coll; 357 UChar rule[50]; 358 int rulelen = u_unescape(str, rule, 50); 359 int count = 0; 360 const char *testdata[] = 361 {"\\u1ED9", "o\\u0323\\u0302", 362 "\\u0300\\u0315", "\\u0315\\u0300", 363 "A\\u0300\\u0315B", "A\\u0315\\u0300B", 364 "A\\u0316\\u0315B", "A\\u0315\\u0316B", 365 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316", 366 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B", 367 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"}; 368 int32_t srclen; 369 UChar source[10]; 370 UCollationElements *iter; 371 372 coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status); 373 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 374 if (U_FAILURE(status)){ 375 log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n", 376 myErrorName(status)); 377 return; 378 } 379 380 srclen = u_unescape(testdata[0], source, 10); 381 iter = ucol_openElements(coll, source, srclen, &status); 382 backAndForth(iter); 383 ucol_closeElements(iter); 384 385 srclen = u_unescape(testdata[1], source, 10); 386 iter = ucol_openElements(coll, source, srclen, &status); 387 backAndForth(iter); 388 ucol_closeElements(iter); 389 390 while (count < 12) { 391 srclen = u_unescape(testdata[count], source, 10); 392 iter = ucol_openElements(coll, source, srclen, &status); 393 394 if (U_FAILURE(status)){ 395 log_err("ERROR: in creation of collator element iterator\n %s\n", 396 myErrorName(status)); 397 return; 398 } 399 backAndForth(iter); 400 ucol_closeElements(iter); 401 402 iter = ucol_openElements(coll, source, -1, &status); 403 404 if (U_FAILURE(status)){ 405 log_err("ERROR: in creation of collator element iterator\n %s\n", 406 myErrorName(status)); 407 return; 408 } 409 backAndForth(iter); 410 ucol_closeElements(iter); 411 count ++; 412 } 413 ucol_close(coll); 414 } 415 416 /** 417 * Test for CollationElementIterator.previous() 418 * 419 * @bug 4108758 - Make sure it works with contracting characters 420 * 421 */ 422 static void TestPrevious() 423 { 424 UCollator *coll=NULL; 425 UChar rule[50]; 426 UChar *source; 427 UCollator *c1, *c2, *c3; 428 UCollationElements *iter; 429 UErrorCode status = U_ZERO_ERROR; 430 UChar test1[50]; 431 UChar test2[50]; 432 433 u_uastrcpy(test1, "What subset of all possible test cases?"); 434 u_uastrcpy(test2, "has the highest probability of detecting"); 435 coll = ucol_open("en_US", &status); 436 437 iter=ucol_openElements(coll, test1, u_strlen(test1), &status); 438 log_verbose("English locale testing back and forth\n"); 439 if(U_FAILURE(status)){ 440 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 441 myErrorName(status)); 442 ucol_close(coll); 443 return; 444 } 445 /* A basic test to see if it's working at all */ 446 backAndForth(iter); 447 ucol_closeElements(iter); 448 ucol_close(coll); 449 450 /* Test with a contracting character sequence */ 451 u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH"); 452 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status); 453 454 log_verbose("Contraction rule testing back and forth with no normalization\n"); 455 456 if (c1 == NULL || U_FAILURE(status)) 457 { 458 log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n", 459 myErrorName(status)); 460 return; 461 } 462 source=(UChar*)malloc(sizeof(UChar) * 20); 463 u_uastrcpy(source, "abchdcba"); 464 iter=ucol_openElements(c1, source, u_strlen(source), &status); 465 if(U_FAILURE(status)){ 466 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 467 myErrorName(status)); 468 return; 469 } 470 backAndForth(iter); 471 ucol_closeElements(iter); 472 ucol_close(c1); 473 474 /* Test with an expanding character sequence */ 475 u_uastrcpy(rule, "&a < b < c/abd < d"); 476 c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status); 477 log_verbose("Expansion rule testing back and forth with no normalization\n"); 478 if (c2 == NULL || U_FAILURE(status)) 479 { 480 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n", 481 myErrorName(status)); 482 return; 483 } 484 u_uastrcpy(source, "abcd"); 485 iter=ucol_openElements(c2, source, u_strlen(source), &status); 486 if(U_FAILURE(status)){ 487 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 488 myErrorName(status)); 489 return; 490 } 491 backAndForth(iter); 492 ucol_closeElements(iter); 493 ucol_close(c2); 494 /* Now try both */ 495 u_uastrcpy(rule, "&a < b < c/aba < d < z < ch"); 496 c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status); 497 log_verbose("Expansion/contraction rule testing back and forth with no normalization\n"); 498 499 if (c3 == NULL || U_FAILURE(status)) 500 { 501 log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n", 502 myErrorName(status)); 503 return; 504 } 505 u_uastrcpy(source, "abcdbchdc"); 506 iter=ucol_openElements(c3, source, u_strlen(source), &status); 507 if(U_FAILURE(status)){ 508 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 509 myErrorName(status)); 510 return; 511 } 512 backAndForth(iter); 513 ucol_closeElements(iter); 514 ucol_close(c3); 515 source[0] = 0x0e41; 516 source[1] = 0x0e02; 517 source[2] = 0x0e41; 518 source[3] = 0x0e02; 519 source[4] = 0x0e27; 520 source[5] = 0x61; 521 source[6] = 0x62; 522 source[7] = 0x63; 523 source[8] = 0; 524 525 coll = ucol_open("th_TH", &status); 526 log_verbose("Thai locale testing back and forth with normalization\n"); 527 iter=ucol_openElements(coll, source, u_strlen(source), &status); 528 if(U_FAILURE(status)){ 529 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 530 myErrorName(status)); 531 return; 532 } 533 backAndForth(iter); 534 ucol_closeElements(iter); 535 ucol_close(coll); 536 537 /* prev test */ 538 source[0] = 0x0061; 539 source[1] = 0x30CF; 540 source[2] = 0x3099; 541 source[3] = 0x30FC; 542 source[4] = 0; 543 544 coll = ucol_open("ja_JP", &status); 545 log_verbose("Japanese locale testing back and forth with normalization\n"); 546 iter=ucol_openElements(coll, source, u_strlen(source), &status); 547 if(U_FAILURE(status)){ 548 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 549 myErrorName(status)); 550 return; 551 } 552 backAndForth(iter); 553 ucol_closeElements(iter); 554 ucol_close(coll); 555 556 free(source); 557 } 558 559 /** 560 * Test for getOffset() and setOffset() 561 */ 562 static void TestOffset() 563 { 564 UErrorCode status= U_ZERO_ERROR; 565 UCollator *en_us=NULL; 566 UCollationElements *iter, *pristine; 567 int32_t offset; 568 OrderAndOffset *orders; 569 int32_t orderLength=0; 570 int count = 0; 571 UChar test1[50]; 572 UChar test2[50]; 573 574 u_uastrcpy(test1, "What subset of all possible test cases?"); 575 u_uastrcpy(test2, "has the highest probability of detecting"); 576 en_us = ucol_open("en_US", &status); 577 log_verbose("Testing getOffset and setOffset for collations\n"); 578 iter = ucol_openElements(en_us, test1, u_strlen(test1), &status); 579 if(U_FAILURE(status)){ 580 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 581 myErrorName(status)); 582 ucol_close(en_us); 583 return; 584 } 585 586 /* testing boundaries */ 587 ucol_setOffset(iter, 0, &status); 588 if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) { 589 log_err("Error: After setting offset to 0, we should be at the end " 590 "of the backwards iteration"); 591 } 592 ucol_setOffset(iter, u_strlen(test1), &status); 593 if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) { 594 log_err("Error: After setting offset to end of the string, we should " 595 "be at the end of the backwards iteration"); 596 } 597 598 /* Run all the way through the iterator, then get the offset */ 599 600 orders = getOrders(iter, &orderLength); 601 602 offset = ucol_getOffset(iter); 603 604 if (offset != u_strlen(test1)) 605 { 606 log_err("offset at end != length %d vs %d\n", offset, 607 u_strlen(test1) ); 608 } 609 610 /* Now set the offset back to the beginning and see if it works */ 611 pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status); 612 if(U_FAILURE(status)){ 613 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 614 myErrorName(status)); 615 ucol_close(en_us); 616 return; 617 } 618 status = U_ZERO_ERROR; 619 620 ucol_setOffset(iter, 0, &status); 621 if (U_FAILURE(status)) 622 { 623 log_err("setOffset failed. %s\n", myErrorName(status)); 624 } 625 else 626 { 627 assertEqual(iter, pristine); 628 } 629 630 ucol_closeElements(pristine); 631 ucol_closeElements(iter); 632 free(orders); 633 634 /* testing offsets in normalization buffer */ 635 test1[0] = 0x61; 636 test1[1] = 0x300; 637 test1[2] = 0x316; 638 test1[3] = 0x62; 639 test1[4] = 0; 640 ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 641 iter = ucol_openElements(en_us, test1, 4, &status); 642 if(U_FAILURE(status)){ 643 log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n", 644 myErrorName(status)); 645 ucol_close(en_us); 646 return; 647 } 648 649 count = 0; 650 while (ucol_next(iter, &status) != UCOL_NULLORDER && 651 U_SUCCESS(status)) { 652 switch (count) { 653 case 0: 654 if (ucol_getOffset(iter) != 1) { 655 log_err("ERROR: Offset of iteration should be 1\n"); 656 } 657 break; 658 case 3: 659 if (ucol_getOffset(iter) != 4) { 660 log_err("ERROR: Offset of iteration should be 4\n"); 661 } 662 break; 663 default: 664 if (ucol_getOffset(iter) != 3) { 665 log_err("ERROR: Offset of iteration should be 3\n"); 666 } 667 } 668 count ++; 669 } 670 671 ucol_reset(iter); 672 count = 0; 673 while (ucol_previous(iter, &status) != UCOL_NULLORDER && 674 U_SUCCESS(status)) { 675 switch (count) { 676 case 0: 677 case 1: 678 if (ucol_getOffset(iter) != 3) { 679 log_err("ERROR: Offset of iteration should be 3\n"); 680 } 681 break; 682 case 2: 683 if (ucol_getOffset(iter) != 1) { 684 log_err("ERROR: Offset of iteration should be 1\n"); 685 } 686 break; 687 default: 688 if (ucol_getOffset(iter) != 0) { 689 log_err("ERROR: Offset of iteration should be 0\n"); 690 } 691 } 692 count ++; 693 } 694 695 if(U_FAILURE(status)){ 696 log_err("ERROR: in iterating collation elements %s\n", 697 myErrorName(status)); 698 } 699 700 ucol_closeElements(iter); 701 ucol_close(en_us); 702 } 703 704 /** 705 * Test for setText() 706 */ 707 static void TestSetText() 708 { 709 int32_t c,i; 710 UErrorCode status = U_ZERO_ERROR; 711 UCollator *en_us=NULL; 712 UCollationElements *iter1, *iter2; 713 UChar test1[50]; 714 UChar test2[50]; 715 716 u_uastrcpy(test1, "What subset of all possible test cases?"); 717 u_uastrcpy(test2, "has the highest probability of detecting"); 718 en_us = ucol_open("en_US", &status); 719 log_verbose("testing setText for Collation elements\n"); 720 iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status); 721 if(U_FAILURE(status)){ 722 log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n", 723 myErrorName(status)); 724 ucol_close(en_us); 725 return; 726 } 727 iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status); 728 if(U_FAILURE(status)){ 729 log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n", 730 myErrorName(status)); 731 ucol_close(en_us); 732 return; 733 } 734 735 /* Run through the second iterator just to exercise it */ 736 c = ucol_next(iter2, &status); 737 i = 0; 738 739 while ( ++i < 10 && (c != UCOL_NULLORDER)) 740 { 741 if (U_FAILURE(status)) 742 { 743 log_err("iter2->next() returned an error. %s\n", myErrorName(status)); 744 ucol_closeElements(iter2); 745 ucol_closeElements(iter1); 746 ucol_close(en_us); 747 return; 748 } 749 750 c = ucol_next(iter2, &status); 751 } 752 753 /* Now set it to point to the same string as the first iterator */ 754 ucol_setText(iter2, test1, u_strlen(test1), &status); 755 if (U_FAILURE(status)) 756 { 757 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status)); 758 } 759 else 760 { 761 assertEqual(iter1, iter2); 762 } 763 764 /* Now set it to point to a null string with fake length*/ 765 ucol_setText(iter2, NULL, 2, &status); 766 if (U_FAILURE(status)) 767 { 768 log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status)); 769 } 770 else 771 { 772 if (ucol_next(iter2, &status) != UCOL_NULLORDER) { 773 log_err("iter2 with null text expected to return UCOL_NULLORDER\n"); 774 } 775 } 776 777 ucol_closeElements(iter2); 778 ucol_closeElements(iter1); 779 ucol_close(en_us); 780 } 781 782 /** @bug 4108762 783 * Test for getMaxExpansion() 784 */ 785 static void TestMaxExpansion() 786 { 787 UErrorCode status = U_ZERO_ERROR; 788 UCollator *coll ;/*= ucol_open("en_US", &status);*/ 789 UChar ch = 0; 790 UChar32 unassigned = 0xEFFFD; 791 UChar supplementary[2]; 792 uint32_t stringOffset = 0; 793 UBool isError = FALSE; 794 uint32_t sorder = 0; 795 UCollationElements *iter ;/*= ucol_openElements(coll, &ch, 1, &status);*/ 796 uint32_t temporder = 0; 797 798 UChar rule[256]; 799 u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch"); 800 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, 801 UCOL_DEFAULT_STRENGTH,NULL, &status); 802 if(U_SUCCESS(status) && coll) { 803 iter = ucol_openElements(coll, &ch, 1, &status); 804 805 while (ch < 0xFFFF && U_SUCCESS(status)) { 806 int count = 1; 807 uint32_t order; 808 int32_t size = 0; 809 810 ch ++; 811 812 ucol_setText(iter, &ch, 1, &status); 813 order = ucol_previous(iter, &status); 814 815 /* thai management */ 816 if (order == 0) 817 order = ucol_previous(iter, &status); 818 819 while (U_SUCCESS(status) && 820 ucol_previous(iter, &status) != UCOL_NULLORDER) { 821 count ++; 822 } 823 824 size = ucol_getMaxExpansion(iter, order); 825 if (U_FAILURE(status) || size < count) { 826 log_err("Failure at codepoint %d, maximum expansion count < %d\n", 827 ch, count); 828 } 829 } 830 831 /* testing for exact max expansion */ 832 ch = 0; 833 while (ch < 0x61) { 834 uint32_t order; 835 int32_t size; 836 ucol_setText(iter, &ch, 1, &status); 837 order = ucol_previous(iter, &status); 838 size = ucol_getMaxExpansion(iter, order); 839 if (U_FAILURE(status) || size != 1) { 840 log_err("Failure at codepoint %d, maximum expansion count < %d\n", 841 ch, 1); 842 } 843 ch ++; 844 } 845 846 ch = 0x63; 847 ucol_setText(iter, &ch, 1, &status); 848 temporder = ucol_previous(iter, &status); 849 850 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) { 851 log_err("Failure at codepoint %d, maximum expansion count != %d\n", 852 ch, 3); 853 } 854 855 ch = 0x64; 856 ucol_setText(iter, &ch, 1, &status); 857 temporder = ucol_previous(iter, &status); 858 859 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) { 860 log_err("Failure at codepoint %d, maximum expansion count != %d\n", 861 ch, 3); 862 } 863 864 U16_APPEND(supplementary, stringOffset, 2, unassigned, isError); 865 ucol_setText(iter, supplementary, 2, &status); 866 sorder = ucol_previous(iter, &status); 867 868 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) { 869 log_err("Failure at codepoint %d, maximum expansion count < %d\n", 870 ch, 2); 871 } 872 873 /* testing jamo */ 874 ch = 0x1165; 875 876 ucol_setText(iter, &ch, 1, &status); 877 temporder = ucol_previous(iter, &status); 878 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) { 879 log_err("Failure at codepoint %d, maximum expansion count > %d\n", 880 ch, 3); 881 } 882 883 ucol_closeElements(iter); 884 ucol_close(coll); 885 886 /* testing special jamo &a<\u1160 */ 887 rule[0] = 0x26; 888 rule[1] = 0x71; 889 rule[2] = 0x3c; 890 rule[3] = 0x1165; 891 rule[4] = 0x2f; 892 rule[5] = 0x71; 893 rule[6] = 0x71; 894 rule[7] = 0x71; 895 rule[8] = 0x71; 896 rule[9] = 0; 897 898 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, 899 UCOL_DEFAULT_STRENGTH,NULL, &status); 900 iter = ucol_openElements(coll, &ch, 1, &status); 901 902 temporder = ucol_previous(iter, &status); 903 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) { 904 log_err("Failure at codepoint %d, maximum expansion count > %d\n", 905 ch, 5); 906 } 907 908 ucol_closeElements(iter); 909 ucol_close(coll); 910 } else { 911 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status)); 912 } 913 914 } 915 916 917 static void assertEqual(UCollationElements *i1, UCollationElements *i2) 918 { 919 int32_t c1, c2; 920 int32_t count = 0; 921 UErrorCode status = U_ZERO_ERROR; 922 923 do 924 { 925 c1 = ucol_next(i1, &status); 926 c2 = ucol_next(i2, &status); 927 928 if (c1 != c2) 929 { 930 log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count, c1, c2); 931 break; 932 } 933 934 count += 1; 935 } 936 while (c1 != UCOL_NULLORDER); 937 } 938 939 /** 940 * Testing iterators with extremely small buffers 941 */ 942 static void TestSmallBuffer() 943 { 944 UErrorCode status = U_ZERO_ERROR; 945 UCollator *coll; 946 UCollationElements *testiter, 947 *iter; 948 int32_t count = 0; 949 OrderAndOffset *testorders, 950 *orders; 951 952 UChar teststr[500]; 953 UChar str[] = {0x300, 0x31A, 0}; 954 /* 955 creating a long string of decomposable characters, 956 since by default the writable buffer is of size 256 957 */ 958 while (count < 500) { 959 if ((count & 1) == 0) { 960 teststr[count ++] = 0x300; 961 } 962 else { 963 teststr[count ++] = 0x31A; 964 } 965 } 966 967 coll = ucol_open("th_TH", &status); 968 if(U_SUCCESS(status) && coll) { 969 testiter = ucol_openElements(coll, teststr, 500, &status); 970 iter = ucol_openElements(coll, str, 2, &status); 971 972 orders = getOrders(iter, &count); 973 if (count != 2) { 974 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n"); 975 } 976 977 /* 978 this will rearrange the string data to 250 characters of 0x300 first then 979 250 characters of 0x031A 980 */ 981 testorders = getOrders(testiter, &count); 982 983 if (count != 500) { 984 log_err("Error decomposition does not give the right sized collation elements\n"); 985 } 986 987 while (count != 0) { 988 /* UCA collation element for 0x0F76 */ 989 if ((count > 250 && testorders[-- count].order != orders[1].order) || 990 (count <= 250 && testorders[-- count].order != orders[0].order)) { 991 log_err("Error decomposition does not give the right collation element at %d count\n", count); 992 break; 993 } 994 } 995 996 free(testorders); 997 free(orders); 998 999 ucol_reset(testiter); 1000 1001 /* ensures closing of elements done properly to clear writable buffer */ 1002 ucol_next(testiter, &status); 1003 ucol_next(testiter, &status); 1004 ucol_closeElements(testiter); 1005 ucol_closeElements(iter); 1006 ucol_close(coll); 1007 } else { 1008 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status)); 1009 } 1010 } 1011 1012 /** 1013 * Sniplets of code from genuca 1014 */ 1015 static int32_t hex2num(char hex) { 1016 if(hex>='0' && hex <='9') { 1017 return hex-'0'; 1018 } else if(hex>='a' && hex<='f') { 1019 return hex-'a'+10; 1020 } else if(hex>='A' && hex<='F') { 1021 return hex-'A'+10; 1022 } else { 1023 return 0; 1024 } 1025 } 1026 1027 /** 1028 * Getting codepoints from a string 1029 * @param str character string contain codepoints seperated by space and ended 1030 * by a semicolon 1031 * @param codepoints array for storage, assuming size > 5 1032 * @return position at the end of the codepoint section 1033 */ 1034 static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) { 1035 UErrorCode errorCode = U_ZERO_ERROR; 1036 char *semi = uprv_strchr(str, ';'); 1037 char *pipe = uprv_strchr(str, '|'); 1038 char *s; 1039 *codepoints = 0; 1040 *contextCPs = 0; 1041 if(semi == NULL) { 1042 log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str); 1043 return str; 1044 } 1045 if(pipe != NULL) { 1046 int32_t contextLength; 1047 *pipe = 0; 1048 contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode); 1049 *pipe = '|'; 1050 if(U_FAILURE(errorCode)) { 1051 log_err("error parsing precontext string from FractionalUCA.txt %s\n", str); 1052 return str; 1053 } 1054 /* prepend the precontext string to the codepoints */ 1055 u_memcpy(codepoints, contextCPs, contextLength); 1056 codepoints += contextLength; 1057 /* start of the code point string */ 1058 s = pipe + 1; 1059 } else { 1060 s = str; 1061 } 1062 u_parseString(s, codepoints, 99, NULL, &errorCode); 1063 if(U_FAILURE(errorCode)) { 1064 log_err("error parsing code point string from FractionalUCA.txt %s\n", str); 1065 return str; 1066 } 1067 return semi + 1; 1068 } 1069 1070 /** 1071 * Sniplets of code from genuca 1072 */ 1073 static int32_t 1074 readElement(char **from, char *to, char separator, UErrorCode *status) 1075 { 1076 if (U_SUCCESS(*status)) { 1077 char buffer[1024]; 1078 int32_t i = 0; 1079 while (**from != separator) { 1080 if (**from != ' ') { 1081 *(buffer+i++) = **from; 1082 } 1083 (*from)++; 1084 } 1085 (*from)++; 1086 *(buffer + i) = 0; 1087 strcpy(to, buffer); 1088 return i/2; 1089 } 1090 1091 return 0; 1092 } 1093 1094 /** 1095 * Sniplets of code from genuca 1096 */ 1097 static uint32_t 1098 getSingleCEValue(char *primary, char *secondary, char *tertiary, 1099 UErrorCode *status) 1100 { 1101 if (U_SUCCESS(*status)) { 1102 uint32_t value = 0; 1103 char primsave = '\0'; 1104 char secsave = '\0'; 1105 char tersave = '\0'; 1106 char *primend = primary+4; 1107 char *secend = secondary+2; 1108 char *terend = tertiary+2; 1109 uint32_t primvalue; 1110 uint32_t secvalue; 1111 uint32_t tervalue; 1112 1113 if (uprv_strlen(primary) > 4) { 1114 primsave = *primend; 1115 *primend = '\0'; 1116 } 1117 1118 if (uprv_strlen(secondary) > 2) { 1119 secsave = *secend; 1120 *secend = '\0'; 1121 } 1122 1123 if (uprv_strlen(tertiary) > 2) { 1124 tersave = *terend; 1125 *terend = '\0'; 1126 } 1127 1128 primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0; 1129 secvalue = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0; 1130 tervalue = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0; 1131 if(primvalue <= 0xFF) { 1132 primvalue <<= 8; 1133 } 1134 1135 value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK) 1136 | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK) 1137 | (tervalue & UCOL_TERTIARYORDERMASK); 1138 1139 if(primsave!='\0') { 1140 *primend = primsave; 1141 } 1142 if(secsave!='\0') { 1143 *secend = secsave; 1144 } 1145 if(tersave!='\0') { 1146 *terend = tersave; 1147 } 1148 return value; 1149 } 1150 return 0; 1151 } 1152 1153 /** 1154 * Getting collation elements generated from a string 1155 * @param str character string contain collation elements contained in [] and 1156 * seperated by space 1157 * @param ce array for storage, assuming size > 20 1158 * @param status error status 1159 * @return position at the end of the codepoint section 1160 */ 1161 static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) { 1162 char *pStartCP = uprv_strchr(str, '['); 1163 int count = 0; 1164 char *pEndCP; 1165 char primary[100]; 1166 char secondary[100]; 1167 char tertiary[100]; 1168 1169 while (*pStartCP == '[') { 1170 uint32_t primarycount = 0; 1171 uint32_t secondarycount = 0; 1172 uint32_t tertiarycount = 0; 1173 uint32_t CEi = 1; 1174 pEndCP = strchr(pStartCP, ']'); 1175 if(pEndCP == NULL) { 1176 break; 1177 } 1178 pStartCP ++; 1179 1180 primarycount = readElement(&pStartCP, primary, ',', status); 1181 secondarycount = readElement(&pStartCP, secondary, ',', status); 1182 tertiarycount = readElement(&pStartCP, tertiary, ']', status); 1183 1184 /* I want to get the CEs entered right here, including continuation */ 1185 ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status); 1186 if (U_FAILURE(*status)) { 1187 break; 1188 } 1189 1190 while (2 * CEi < primarycount || CEi < secondarycount || 1191 CEi < tertiarycount) { 1192 uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */ 1193 if (2 * CEi < primarycount) { 1194 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28); 1195 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24); 1196 } 1197 1198 if (2 * CEi + 1 < primarycount) { 1199 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20); 1200 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16); 1201 } 1202 1203 if (CEi < secondarycount) { 1204 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12); 1205 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8); 1206 } 1207 1208 if (CEi < tertiarycount) { 1209 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4); 1210 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF); 1211 } 1212 1213 CEi ++; 1214 ces[count ++] = value; 1215 } 1216 1217 pStartCP = pEndCP + 1; 1218 } 1219 ces[count] = 0; 1220 return pStartCP; 1221 } 1222 1223 /** 1224 * Getting the FractionalUCA.txt file stream 1225 */ 1226 static FileStream * getFractionalUCA(void) 1227 { 1228 char newPath[256]; 1229 char backupPath[256]; 1230 FileStream *result = NULL; 1231 1232 /* Look inside ICU_DATA first */ 1233 uprv_strcpy(newPath, ctest_dataSrcDir()); 1234 uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING ); 1235 uprv_strcat(newPath, "FractionalUCA.txt"); 1236 1237 /* As a fallback, try to guess where the source data was located 1238 * at the time ICU was built, and look there. 1239 */ 1240 #if defined (U_TOPSRCDIR) 1241 strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data"); 1242 #else 1243 { 1244 UErrorCode errorCode = U_ZERO_ERROR; 1245 strcpy(backupPath, loadTestData(&errorCode)); 1246 strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data"); 1247 } 1248 #endif 1249 strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt"); 1250 1251 result = T_FileStream_open(newPath, "rb"); 1252 1253 if (result == NULL) { 1254 result = T_FileStream_open(backupPath, "rb"); 1255 if (result == NULL) { 1256 log_err("Failed to open either %s or %s\n", newPath, backupPath); 1257 } 1258 } 1259 return result; 1260 } 1261 1262 /** 1263 * Testing the CEs returned by the iterator 1264 */ 1265 static void TestCEs() { 1266 FileStream *file = NULL; 1267 char line[2048]; 1268 char *str; 1269 UChar codepoints[10]; 1270 uint32_t ces[20]; 1271 UErrorCode status = U_ZERO_ERROR; 1272 UCollator *coll = ucol_open("", &status); 1273 uint32_t lineNo = 0; 1274 UChar contextCPs[5]; 1275 1276 if (U_FAILURE(status)) { 1277 log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status)); 1278 return; 1279 } 1280 1281 file = getFractionalUCA(); 1282 1283 if (file == NULL) { 1284 log_err("*** unable to open input FractionalUCA.txt file ***\n"); 1285 return; 1286 } 1287 1288 1289 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) { 1290 int count = 0; 1291 UCollationElements *iter; 1292 int32_t preContextCeLen=0; 1293 lineNo++; 1294 /* skip this line if it is empty or a comment or is a return value 1295 or start of some variable section */ 1296 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' || 1297 line[0] == 0x000D || line[0] == '[') { 1298 continue; 1299 } 1300 1301 str = getCodePoints(line, codepoints, contextCPs); 1302 1303 /* these are 'fake' codepoints in the fractional UCA, and are used just 1304 * for positioning of indirect values. They should not go through this 1305 * test. 1306 */ 1307 if(*codepoints == 0xFDD0) { 1308 continue; 1309 } 1310 if (*contextCPs != 0) { 1311 iter = ucol_openElements(coll, contextCPs, -1, &status); 1312 if (U_FAILURE(status)) { 1313 log_err("Error in opening collation elements\n"); 1314 break; 1315 } 1316 while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) { 1317 preContextCeLen++; 1318 } 1319 ucol_closeElements(iter); 1320 } 1321 1322 getCEs(str, ces+preContextCeLen, &status); 1323 if (U_FAILURE(status)) { 1324 log_err("Error in parsing collation elements in FractionalUCA.txt\n"); 1325 break; 1326 } 1327 iter = ucol_openElements(coll, codepoints, -1, &status); 1328 if (U_FAILURE(status)) { 1329 log_err("Error in opening collation elements\n"); 1330 break; 1331 } 1332 for (;;) { 1333 uint32_t ce = (uint32_t)ucol_next(iter, &status); 1334 if (ce == 0xFFFFFFFF) { 1335 ce = 0; 1336 } 1337 /* we now unconditionally reorder Thai/Lao prevowels, so this 1338 * test would fail if we don't skip here. 1339 */ 1340 if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) { 1341 continue; 1342 } 1343 if (ce != ces[count] || U_FAILURE(status)) { 1344 log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n"); 1345 break; 1346 } 1347 if (ces[count] == 0) { 1348 break; 1349 } 1350 count ++; 1351 } 1352 ucol_closeElements(iter); 1353 } 1354 1355 T_FileStream_close(file); 1356 ucol_close(coll); 1357 } 1358 1359 /** 1360 * Testing the discontigous contractions 1361 */ 1362 static void TestDiscontiguos() { 1363 const char *rulestr = 1364 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315"; 1365 UChar rule[50]; 1366 int rulelen = u_unescape(rulestr, rule, 50); 1367 const char *src[] = { 1368 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC", 1369 /* base character blocked */ 1370 "XD\\u0300", "XD\\u0300\\u0315", 1371 /* non blocking combining character */ 1372 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315", 1373 /* blocking combining character */ 1374 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315", 1375 /* contraction prefix */ 1376 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315", 1377 "X\\u0300\\u031A\\u0315", 1378 /* ends not with a contraction character */ 1379 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D", 1380 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D" 1381 }; 1382 const char *tgt[] = { 1383 /* non blocking combining character */ 1384 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC", 1385 /* base character blocked */ 1386 "X D \\u0300", "X D \\u0300\\u0315", 1387 /* non blocking combining character */ 1388 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319", 1389 /* blocking combining character */ 1390 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315", 1391 /* contraction prefix */ 1392 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319", 1393 "X\\u0300 \\u031A \\u0315", 1394 /* ends not with a contraction character */ 1395 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D", 1396 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D" 1397 }; 1398 int size = 20; 1399 UCollator *coll; 1400 UErrorCode status = U_ZERO_ERROR; 1401 int count = 0; 1402 UCollationElements *iter; 1403 UCollationElements *resultiter; 1404 1405 coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); 1406 iter = ucol_openElements(coll, rule, 1, &status); 1407 resultiter = ucol_openElements(coll, rule, 1, &status); 1408 1409 if (U_FAILURE(status)) { 1410 log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status)); 1411 return; 1412 } 1413 1414 while (count < size) { 1415 UChar str[20]; 1416 UChar tstr[20]; 1417 int strLen = u_unescape(src[count], str, 20); 1418 UChar *s; 1419 1420 ucol_setText(iter, str, strLen, &status); 1421 if (U_FAILURE(status)) { 1422 log_err("Error opening collation iterator\n"); 1423 return; 1424 } 1425 1426 u_unescape(tgt[count], tstr, 20); 1427 s = tstr; 1428 1429 log_verbose("count %d\n", count); 1430 1431 for (;;) { 1432 uint32_t ce; 1433 UChar *e = u_strchr(s, 0x20); 1434 if (e == 0) { 1435 e = u_strchr(s, 0); 1436 } 1437 ucol_setText(resultiter, s, (int32_t)(e - s), &status); 1438 ce = ucol_next(resultiter, &status); 1439 if (U_FAILURE(status)) { 1440 log_err("Error manipulating collation iterator\n"); 1441 return; 1442 } 1443 while (ce != UCOL_NULLORDER) { 1444 if (ce != (uint32_t)ucol_next(iter, &status) || 1445 U_FAILURE(status)) { 1446 log_err("Discontiguos contraction test mismatch\n"); 1447 return; 1448 } 1449 ce = ucol_next(resultiter, &status); 1450 if (U_FAILURE(status)) { 1451 log_err("Error getting next collation element\n"); 1452 return; 1453 } 1454 } 1455 s = e + 1; 1456 if (*e == 0) { 1457 break; 1458 } 1459 } 1460 ucol_reset(iter); 1461 backAndForth(iter); 1462 count ++; 1463 } 1464 ucol_closeElements(resultiter); 1465 ucol_closeElements(iter); 1466 ucol_close(coll); 1467 } 1468 1469 static void TestCEBufferOverflow() 1470 { 1471 UChar str[UCOL_EXPAND_CE_BUFFER_SIZE + 1]; 1472 UErrorCode status = U_ZERO_ERROR; 1473 UChar rule[10]; 1474 UCollator *coll; 1475 UCollationElements *iter; 1476 1477 u_uastrcpy(rule, "&z < AB"); 1478 coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status); 1479 if (U_FAILURE(status)) { 1480 log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status)); 1481 return; 1482 } 1483 1484 /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic 1485 test. this will cause an overflow in getPrev */ 1486 str[0] = 0x0041; /* 'A' */ 1487 /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/ 1488 uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE); 1489 str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042; /* 'B' */ 1490 iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1, 1491 &status); 1492 if (ucol_previous(iter, &status) == UCOL_NULLORDER || 1493 status == U_BUFFER_OVERFLOW_ERROR) { 1494 log_err("CE buffer should not overflow with long string of trail surrogates\n"); 1495 } 1496 ucol_closeElements(iter); 1497 ucol_close(coll); 1498 } 1499 1500 /** 1501 * Checking collation element validity. 1502 */ 1503 #define MAX_CODEPOINTS_TO_SHOW 10 1504 static void showCodepoints(const UChar *codepoints, int length, char * codepointText) { 1505 int i, lengthToUse = length; 1506 if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) { 1507 lengthToUse = MAX_CODEPOINTS_TO_SHOW; 1508 } 1509 for (i = 0; i < lengthToUse; ++i) { 1510 int bytesWritten = sprintf(codepointText, " %04X", *codepoints++); 1511 if (bytesWritten <= 0) { 1512 break; 1513 } 1514 codepointText += bytesWritten; 1515 } 1516 if (i < length) { 1517 sprintf(codepointText, " ..."); 1518 } 1519 } 1520 1521 static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints, 1522 int length) 1523 { 1524 UErrorCode status = U_ZERO_ERROR; 1525 UCollationElements *iter = ucol_openElements(coll, codepoints, length, 1526 &status); 1527 UBool result = FALSE; 1528 UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE; 1529 const char * collLocale; 1530 1531 if (U_FAILURE(status)) { 1532 log_err("Error creating iterator for testing validity\n"); 1533 return FALSE; 1534 } 1535 collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status); 1536 if (U_FAILURE(status) || collLocale==NULL) { 1537 status = U_ZERO_ERROR; 1538 collLocale = "?"; 1539 } 1540 1541 for (;;) { 1542 uint32_t ce = ucol_next(iter, &status); 1543 uint32_t primary, p1, p2, secondary, tertiary; 1544 if (ce == UCOL_NULLORDER) { 1545 result = TRUE; 1546 break; 1547 } 1548 if (ce == 0) { 1549 continue; 1550 } 1551 if (ce == 0x02000202) { 1552 /* special CE for merge-sort character */ 1553 if (*codepoints == 0xFFFE /* && length == 1 */) { 1554 /* 1555 * Note: We should check for length==1 but the token parser appears 1556 * to give us trailing NUL characters. 1557 * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet() 1558 * rather than the internal collation rule parser 1559 */ 1560 continue; 1561 } else { 1562 log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n", 1563 (int)*codepoints, (int)length); 1564 break; 1565 } 1566 } 1567 primary = UCOL_PRIMARYORDER(ce); 1568 p1 = primary >> 8; 1569 p2 = primary & 0xFF; 1570 secondary = UCOL_SECONDARYORDER(ce); 1571 tertiary = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION; 1572 1573 if (!isContinuation(ce)) { 1574 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) { 1575 log_err("Empty CE %08lX except for case bits\n", (long)ce); 1576 break; 1577 } 1578 if (p1 == 0) { 1579 if (p2 != 0) { 1580 log_err("Primary 00 xx in %08lX\n", (long)ce); 1581 break; 1582 } 1583 primaryDone = TRUE; 1584 } else { 1585 if (p1 <= 2 || p1 >= 0xF0) { 1586 /* Primary first bytes F0..FF are specials. */ 1587 log_err("Primary first byte of %08lX out of range\n", (long)ce); 1588 break; 1589 } 1590 if (p2 == 0) { 1591 primaryDone = TRUE; 1592 } else { 1593 if (p2 <= 3 || p2 >= 0xFF) { 1594 /* Primary second bytes 03 and FF are sort key compression terminators. */ 1595 log_err("Primary second byte of %08lX out of range\n", (long)ce); 1596 break; 1597 } 1598 primaryDone = FALSE; 1599 } 1600 } 1601 if (secondary == 0) { 1602 if (primary != 0) { 1603 log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce); 1604 break; 1605 } 1606 secondaryDone = TRUE; 1607 } else { 1608 if (secondary <= 2 || 1609 (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80)) 1610 ) { 1611 /* Secondary first bytes common+1..+0x80 are used for sort key compression. */ 1612 log_err("Secondary byte of %08lX out of range\n", (long)ce); 1613 break; 1614 } 1615 secondaryDone = FALSE; 1616 } 1617 if (tertiary == 0) { 1618 /* We know that ce != 0. */ 1619 log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce); 1620 break; 1621 } 1622 if (tertiary <= 2) { 1623 log_err("Tertiary byte of %08lX out of range\n", (long)ce); 1624 break; 1625 } 1626 tertiaryDone = FALSE; 1627 } else { 1628 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) { 1629 log_err("Empty continuation %08lX\n", (long)ce); 1630 break; 1631 } 1632 if (primaryDone && primary != 0) { 1633 log_err("Primary was done but continues in %08lX\n", (long)ce); 1634 break; 1635 } 1636 if (p1 == 0) { 1637 if (p2 != 0) { 1638 log_err("Primary 00 xx in %08lX\n", (long)ce); 1639 break; 1640 } 1641 primaryDone = TRUE; 1642 } else { 1643 if (p1 <= 2) { 1644 log_err("Primary first byte of %08lX out of range\n", (long)ce); 1645 break; 1646 } 1647 if (p2 == 0) { 1648 primaryDone = TRUE; 1649 } else { 1650 if (p2 <= 3) { 1651 log_err("Primary second byte of %08lX out of range\n", (long)ce); 1652 break; 1653 } 1654 } 1655 } 1656 if (secondaryDone && secondary != 0) { 1657 log_err("Secondary was done but continues in %08lX\n", (long)ce); 1658 break; 1659 } 1660 if (secondary == 0) { 1661 secondaryDone = TRUE; 1662 } else { 1663 if (secondary <= 2) { 1664 log_err("Secondary byte of %08lX out of range\n", (long)ce); 1665 break; 1666 } 1667 } 1668 if (tertiaryDone && tertiary != 0) { 1669 log_err("Tertiary was done but continues in %08lX\n", (long)ce); 1670 break; 1671 } 1672 if (tertiary == 0) { 1673 tertiaryDone = TRUE; 1674 } else if (tertiary <= 2) { 1675 log_err("Tertiary byte of %08lX out of range\n", (long)ce); 1676 break; 1677 } 1678 } 1679 } 1680 if (!result) { 1681 char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5]; 1682 showCodepoints(codepoints, length, codepointText); 1683 log_err("Locale: %s Code point string: %s\n", collLocale, codepointText); 1684 } 1685 ucol_closeElements(iter); 1686 return result; 1687 } 1688 1689 static void TestCEValidity() 1690 { 1691 /* testing UCA collation elements */ 1692 UErrorCode status = U_ZERO_ERROR; 1693 /* en_US has no tailorings */ 1694 UCollator *coll = ucol_open("root", &status); 1695 /* tailored locales */ 1696 char locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"}; 1697 const char *loc; 1698 FileStream *file = NULL; 1699 char line[2048]; 1700 UChar codepoints[11]; 1701 int count = 0; 1702 int maxCount = 0; 1703 UChar contextCPs[3]; 1704 UChar32 c; 1705 UParseError parseError; 1706 if (U_FAILURE(status)) { 1707 log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status)); 1708 return; 1709 } 1710 log_verbose("Testing UCA elements\n"); 1711 file = getFractionalUCA(); 1712 if (file == NULL) { 1713 log_err("Fractional UCA data can not be opened\n"); 1714 return; 1715 } 1716 1717 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) { 1718 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' || 1719 line[0] == 0x000D || line[0] == '[') { 1720 continue; 1721 } 1722 1723 getCodePoints(line, codepoints, contextCPs); 1724 checkCEValidity(coll, codepoints, u_strlen(codepoints)); 1725 } 1726 1727 log_verbose("Testing UCA elements for the whole range of unicode characters\n"); 1728 for (c = 0; c <= 0xffff; ++c) { 1729 if (u_isdefined(c)) { 1730 codepoints[0] = (UChar)c; 1731 checkCEValidity(coll, codepoints, 1); 1732 } 1733 } 1734 for (; c <= 0x10ffff; ++c) { 1735 if (u_isdefined(c)) { 1736 int32_t i = 0; 1737 U16_APPEND_UNSAFE(codepoints, i, c); 1738 checkCEValidity(coll, codepoints, i); 1739 } 1740 } 1741 1742 ucol_close(coll); 1743 1744 /* testing tailored collation elements */ 1745 log_verbose("Testing tailored elements\n"); 1746 if(getTestOption(QUICK_OPTION)) { 1747 maxCount = sizeof(locale)/sizeof(locale[0]); 1748 } else { 1749 maxCount = uloc_countAvailable(); 1750 } 1751 while (count < maxCount) { 1752 const UChar *rules = NULL, 1753 *current = NULL; 1754 UChar *rulesCopy = NULL; 1755 int32_t ruleLen = 0; 1756 1757 uint32_t chOffset = 0; 1758 uint32_t chLen = 0; 1759 uint32_t exOffset = 0; 1760 uint32_t exLen = 0; 1761 uint32_t prefixOffset = 0; 1762 uint32_t prefixLen = 0; 1763 UBool startOfRules = TRUE; 1764 UColOptionSet opts; 1765 1766 UColTokenParser src; 1767 uint32_t strength = 0; 1768 uint16_t specs = 0; 1769 if(getTestOption(QUICK_OPTION)) { 1770 loc = locale[count]; 1771 } else { 1772 loc = uloc_getAvailable(count); 1773 if(!hasCollationElements(loc)) { 1774 count++; 1775 continue; 1776 } 1777 } 1778 1779 uprv_memset(&src, 0, sizeof(UColTokenParser)); 1780 1781 log_verbose("Testing CEs for %s\n", loc); 1782 1783 coll = ucol_open(loc, &status); 1784 if (U_FAILURE(status)) { 1785 log_err("%s collator creation failed\n", loc); 1786 return; 1787 } 1788 1789 src.opts = &opts; 1790 rules = ucol_getRules(coll, &ruleLen); 1791 1792 if (ruleLen > 0) { 1793 rulesCopy = (UChar *)uprv_malloc((ruleLen + 1794 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar)); 1795 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar)); 1796 src.current = src.source = rulesCopy; 1797 src.end = rulesCopy + ruleLen; 1798 src.extraCurrent = src.end; 1799 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 1800 1801 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to 1802 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ 1803 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) { 1804 strength = src.parsedToken.strength; 1805 chOffset = src.parsedToken.charsOffset; 1806 chLen = src.parsedToken.charsLen; 1807 exOffset = src.parsedToken.extensionOffset; 1808 exLen = src.parsedToken.extensionLen; 1809 prefixOffset = src.parsedToken.prefixOffset; 1810 prefixLen = src.parsedToken.prefixLen; 1811 specs = src.parsedToken.flags; 1812 1813 startOfRules = FALSE; 1814 uprv_memcpy(codepoints, src.source + chOffset, 1815 chLen * sizeof(UChar)); 1816 codepoints[chLen] = 0; 1817 checkCEValidity(coll, codepoints, chLen); 1818 } 1819 uprv_free(src.source); 1820 } 1821 1822 ucol_close(coll); 1823 count ++; 1824 } 1825 T_FileStream_close(file); 1826 } 1827 1828 static void printSortKeyError(const UChar *codepoints, int length, 1829 uint8_t *sortkey, int sklen) 1830 { 1831 int count = 0; 1832 log_err("Sortkey not valid for "); 1833 while (length > 0) { 1834 log_err("0x%04x ", *codepoints); 1835 length --; 1836 codepoints ++; 1837 } 1838 log_err("\nSortkey : "); 1839 while (count < sklen) { 1840 log_err("0x%02x ", sortkey[count]); 1841 count ++; 1842 } 1843 log_err("\n"); 1844 } 1845 1846 /** 1847 * Checking sort key validity for all levels 1848 */ 1849 static UBool checkSortKeyValidity(UCollator *coll, 1850 const UChar *codepoints, 1851 int length) 1852 { 1853 UErrorCode status = U_ZERO_ERROR; 1854 UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY, 1855 UCOL_TERTIARY, UCOL_QUATERNARY, 1856 UCOL_IDENTICAL}; 1857 int strengthlen = 5; 1858 int strengthIndex = 0; 1859 int caselevel = 0; 1860 1861 while (caselevel < 1) { 1862 if (caselevel == 0) { 1863 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status); 1864 } 1865 else { 1866 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status); 1867 } 1868 1869 while (strengthIndex < strengthlen) { 1870 int count01 = 0; 1871 uint32_t count = 0; 1872 uint8_t sortkey[128]; 1873 uint32_t sklen; 1874 1875 ucol_setStrength(coll, strength[strengthIndex]); 1876 sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128); 1877 while (sortkey[count] != 0) { 1878 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) { 1879 printSortKeyError(codepoints, length, sortkey, sklen); 1880 return FALSE; 1881 } 1882 if (sortkey[count] == 1) { 1883 count01 ++; 1884 } 1885 count ++; 1886 } 1887 1888 if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) { 1889 printSortKeyError(codepoints, length, sortkey, sklen); 1890 return FALSE; 1891 } 1892 strengthIndex ++; 1893 } 1894 caselevel ++; 1895 } 1896 return TRUE; 1897 } 1898 1899 static void TestSortKeyValidity(void) 1900 { 1901 /* testing UCA collation elements */ 1902 UErrorCode status = U_ZERO_ERROR; 1903 /* en_US has no tailorings */ 1904 UCollator *coll = ucol_open("en_US", &status); 1905 /* tailored locales */ 1906 char locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"}; 1907 FileStream *file = NULL; 1908 char line[2048]; 1909 UChar codepoints[10]; 1910 int count = 0; 1911 UChar contextCPs[5]; 1912 UParseError parseError; 1913 if (U_FAILURE(status)) { 1914 log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status)); 1915 return; 1916 } 1917 log_verbose("Testing UCA elements\n"); 1918 file = getFractionalUCA(); 1919 if (file == NULL) { 1920 log_err("Fractional UCA data can not be opened\n"); 1921 return; 1922 } 1923 1924 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) { 1925 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' || 1926 line[0] == 0x000D || line[0] == '[') { 1927 continue; 1928 } 1929 1930 getCodePoints(line, codepoints, contextCPs); 1931 if(codepoints[0] == 0xFFFE) { 1932 /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */ 1933 continue; 1934 } 1935 checkSortKeyValidity(coll, codepoints, u_strlen(codepoints)); 1936 } 1937 1938 log_verbose("Testing UCA elements for the whole range of unicode characters\n"); 1939 codepoints[0] = 0; 1940 1941 while (codepoints[0] < 0xFFFF) { 1942 if (u_isdefined((UChar32)codepoints[0])) { 1943 checkSortKeyValidity(coll, codepoints, 1); 1944 } 1945 codepoints[0] ++; 1946 } 1947 1948 ucol_close(coll); 1949 1950 /* testing tailored collation elements */ 1951 log_verbose("Testing tailored elements\n"); 1952 while (count < 5) { 1953 const UChar *rules = NULL, 1954 *current = NULL; 1955 UChar *rulesCopy = NULL; 1956 int32_t ruleLen = 0; 1957 1958 uint32_t chOffset = 0; 1959 uint32_t chLen = 0; 1960 uint32_t exOffset = 0; 1961 uint32_t exLen = 0; 1962 uint32_t prefixOffset = 0; 1963 uint32_t prefixLen = 0; 1964 UBool startOfRules = TRUE; 1965 UColOptionSet opts; 1966 1967 UColTokenParser src; 1968 uint32_t strength = 0; 1969 uint16_t specs = 0; 1970 1971 uprv_memset(&src, 0, sizeof(UColTokenParser)); 1972 1973 coll = ucol_open(locale[count], &status); 1974 if (U_FAILURE(status)) { 1975 log_err("%s collator creation failed\n", locale[count]); 1976 return; 1977 } 1978 1979 src.opts = &opts; 1980 rules = ucol_getRules(coll, &ruleLen); 1981 1982 if (ruleLen > 0) { 1983 rulesCopy = (UChar *)uprv_malloc((ruleLen + 1984 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar)); 1985 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar)); 1986 src.current = src.source = rulesCopy; 1987 src.end = rulesCopy + ruleLen; 1988 src.extraCurrent = src.end; 1989 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 1990 1991 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to 1992 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ 1993 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) { 1994 strength = src.parsedToken.strength; 1995 chOffset = src.parsedToken.charsOffset; 1996 chLen = src.parsedToken.charsLen; 1997 exOffset = src.parsedToken.extensionOffset; 1998 exLen = src.parsedToken.extensionLen; 1999 prefixOffset = src.parsedToken.prefixOffset; 2000 prefixLen = src.parsedToken.prefixLen; 2001 specs = src.parsedToken.flags; 2002 2003 startOfRules = FALSE; 2004 uprv_memcpy(codepoints, src.source + chOffset, 2005 chLen * sizeof(UChar)); 2006 codepoints[chLen] = 0; 2007 if(codepoints[0] == 0xFFFE) { 2008 /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */ 2009 continue; 2010 } 2011 checkSortKeyValidity(coll, codepoints, chLen); 2012 } 2013 uprv_free(src.source); 2014 } 2015 2016 ucol_close(coll); 2017 count ++; 2018 } 2019 T_FileStream_close(file); 2020 } 2021 2022 /** 2023 * TestSearchCollatorElements tests iterator behavior (forwards and backwards) with 2024 * normalization on AND jamo tailoring, among other things. 2025 */ 2026 static const UChar tsceText[] = { /* Nothing in here should be ignorable */ 2027 0x0020, 0xAC00, /* simple LV Hangul */ 2028 0x0020, 0xAC01, /* simple LVT Hangul */ 2029 0x0020, 0xAC0F, /* LVTT, last jamo expands for search */ 2030 0x0020, 0xAFFF, /* LLVVVTT, every jamo expands for search */ 2031 0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */ 2032 0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */ 2033 0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */ 2034 0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */ 2035 0x0020, 0x00E6, /* small letter ae, expands */ 2036 0x0020, 0x1E4D, /* small letter o with tilde and acute, decomposes */ 2037 0x0020 2038 }; 2039 enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) }; 2040 2041 static const int32_t rootStandardOffsets[] = { 2042 0, 1,2, 2043 2, 3,4,4, 2044 4, 5,6,6, 2045 6, 7,8,8, 2046 8, 9,10,11, 2047 12, 13,14,15, 2048 16, 17,18,19, 2049 20, 21,22,23, 2050 24, 25,26,26,26, 2051 26, 27,28,28, 2052 28, 2053 29 2054 }; 2055 enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) }; 2056 2057 static const int32_t rootSearchOffsets[] = { 2058 0, 1,2, 2059 2, 3,4,4, 2060 4, 5,6,6,6, 2061 6, 7,8,8,8,8,8,8, 2062 8, 9,10,11, 2063 12, 13,14,15, 2064 16, 17,18,19,20, 2065 20, 21,22,22,23,23,23,24, 2066 24, 25,26,26,26, 2067 26, 27,28,28, 2068 28, 2069 29 2070 }; 2071 enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) }; 2072 2073 typedef struct { 2074 const char * locale; 2075 const int32_t * offsets; 2076 int32_t offsetsLen; 2077 } TSCEItem; 2078 2079 static const TSCEItem tsceItems[] = { 2080 { "root", rootStandardOffsets, kLen_rootStandardOffsets }, 2081 { "root@collation=search", rootSearchOffsets, kLen_rootSearchOffsets }, 2082 { NULL, NULL, 0 } 2083 }; 2084 2085 static void TestSearchCollatorElements(void) 2086 { 2087 const TSCEItem * tsceItemPtr; 2088 for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) { 2089 UErrorCode status = U_ZERO_ERROR; 2090 UCollator* ucol = ucol_open(tsceItemPtr->locale, &status); 2091 if ( U_SUCCESS(status) ) { 2092 UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status); 2093 if ( U_SUCCESS(status) ) { 2094 int32_t offset, element; 2095 const int32_t * nextOffsetPtr; 2096 const int32_t * limitOffsetPtr; 2097 2098 nextOffsetPtr = tsceItemPtr->offsets; 2099 limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen; 2100 do { 2101 offset = ucol_getOffset(uce); 2102 element = ucol_next(uce, &status); 2103 if ( element == 0 ) { 2104 log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale ); 2105 } 2106 if ( nextOffsetPtr < limitOffsetPtr ) { 2107 if (offset != *nextOffsetPtr) { 2108 log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n", 2109 tsceItemPtr->locale, *nextOffsetPtr, offset ); 2110 nextOffsetPtr = limitOffsetPtr; 2111 break; 2112 } 2113 nextOffsetPtr++; 2114 } else { 2115 log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale ); 2116 } 2117 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER ); 2118 if ( nextOffsetPtr < limitOffsetPtr ) { 2119 log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale ); 2120 } 2121 2122 ucol_setOffset(uce, kLen_tsceText, &status); 2123 status = U_ZERO_ERROR; 2124 nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen; 2125 limitOffsetPtr = tsceItemPtr->offsets; 2126 do { 2127 offset = ucol_getOffset(uce); 2128 element = ucol_previous(uce, &status); 2129 if ( element == 0 ) { 2130 log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale ); 2131 } 2132 if ( nextOffsetPtr > limitOffsetPtr ) { 2133 nextOffsetPtr--; 2134 if (offset != *nextOffsetPtr) { 2135 log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n", 2136 tsceItemPtr->locale, *nextOffsetPtr, offset ); 2137 nextOffsetPtr = limitOffsetPtr; 2138 break; 2139 } 2140 } else { 2141 log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale ); 2142 } 2143 } while ( U_SUCCESS(status) && element != UCOL_NULLORDER ); 2144 if ( nextOffsetPtr > limitOffsetPtr ) { 2145 log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale ); 2146 } 2147 2148 ucol_closeElements(uce); 2149 } else { 2150 log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) ); 2151 } 2152 ucol_close(ucol); 2153 } else { 2154 log_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) ); 2155 } 2156 } 2157 } 2158 2159 #endif /* #if !UCONFIG_NO_COLLATION */ 2160