1 2 /******************************************************************** 3 * COPYRIGHT: 4 * Copyright (c) 2001-2013, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ********************************************************************/ 7 /******************************************************************************* 8 * 9 * File cmsccoll.C 10 * 11 *******************************************************************************/ 12 /** 13 * These are the tests specific to ICU 1.8 and above, that I didn't know where 14 * to fit. 15 */ 16 17 #include <stdio.h> 18 19 #include "unicode/utypes.h" 20 21 #if !UCONFIG_NO_COLLATION 22 23 #include "unicode/ucol.h" 24 #include "unicode/ucoleitr.h" 25 #include "unicode/uloc.h" 26 #include "cintltst.h" 27 #include "ccolltst.h" 28 #include "callcoll.h" 29 #include "unicode/ustring.h" 30 #include "string.h" 31 #include "ucol_imp.h" 32 #include "ucol_tok.h" 33 #include "cmemory.h" 34 #include "cstring.h" 35 #include "uassert.h" 36 #include "unicode/parseerr.h" 37 #include "unicode/ucnv.h" 38 #include "unicode/ures.h" 39 #include "unicode/uscript.h" 40 #include "unicode/utf16.h" 41 #include "uparse.h" 42 #include "putilimp.h" 43 44 45 #define LEN(a) (sizeof(a)/sizeof(a[0])) 46 47 #define MAX_TOKEN_LEN 16 48 49 typedef UCollationResult tst_strcoll(void *collator, const int object, 50 const UChar *source, const int sLen, 51 const UChar *target, const int tLen); 52 53 54 55 const static char cnt1[][10] = { 56 57 "AA", 58 "AC", 59 "AZ", 60 "AQ", 61 "AB", 62 "ABZ", 63 "ABQ", 64 "Z", 65 "ABC", 66 "Q", 67 "B" 68 }; 69 70 const static char cnt2[][10] = { 71 "DA", 72 "DAD", 73 "DAZ", 74 "MAR", 75 "Z", 76 "DAVIS", 77 "MARK", 78 "DAV", 79 "DAVI" 80 }; 81 82 static void IncompleteCntTest(void) 83 { 84 UErrorCode status = U_ZERO_ERROR; 85 UChar temp[90]; 86 UChar t1[90]; 87 UChar t2[90]; 88 89 UCollator *coll = NULL; 90 uint32_t i = 0, j = 0; 91 uint32_t size = 0; 92 93 u_uastrcpy(temp, " & Z < ABC < Q < B"); 94 95 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status); 96 97 if(U_SUCCESS(status)) { 98 size = sizeof(cnt1)/sizeof(cnt1[0]); 99 for(i = 0; i < size-1; i++) { 100 for(j = i+1; j < size; j++) { 101 UCollationElements *iter; 102 u_uastrcpy(t1, cnt1[i]); 103 u_uastrcpy(t2, cnt1[j]); 104 doTest(coll, t1, t2, UCOL_LESS); 105 /* synwee : added collation element iterator test */ 106 iter = ucol_openElements(coll, t2, u_strlen(t2), &status); 107 if (U_FAILURE(status)) { 108 log_err("Creation of iterator failed\n"); 109 break; 110 } 111 backAndForth(iter); 112 ucol_closeElements(iter); 113 } 114 } 115 } 116 117 ucol_close(coll); 118 119 120 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV"); 121 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); 122 123 if(U_SUCCESS(status)) { 124 size = sizeof(cnt2)/sizeof(cnt2[0]); 125 for(i = 0; i < size-1; i++) { 126 for(j = i+1; j < size; j++) { 127 UCollationElements *iter; 128 u_uastrcpy(t1, cnt2[i]); 129 u_uastrcpy(t2, cnt2[j]); 130 doTest(coll, t1, t2, UCOL_LESS); 131 132 /* synwee : added collation element iterator test */ 133 iter = ucol_openElements(coll, t2, u_strlen(t2), &status); 134 if (U_FAILURE(status)) { 135 log_err("Creation of iterator failed\n"); 136 break; 137 } 138 backAndForth(iter); 139 ucol_closeElements(iter); 140 } 141 } 142 } 143 144 ucol_close(coll); 145 146 147 } 148 149 const static char shifted[][20] = { 150 "black bird", 151 "black-bird", 152 "blackbird", 153 "black Bird", 154 "black-Bird", 155 "blackBird", 156 "black birds", 157 "black-birds", 158 "blackbirds" 159 }; 160 161 const static UCollationResult shiftedTert[] = { 162 UCOL_EQUAL, 163 UCOL_EQUAL, 164 UCOL_EQUAL, 165 UCOL_LESS, 166 UCOL_EQUAL, 167 UCOL_EQUAL, 168 UCOL_LESS, 169 UCOL_EQUAL, 170 UCOL_EQUAL 171 }; 172 173 const static char nonignorable[][20] = { 174 "black bird", 175 "black Bird", 176 "black birds", 177 "black-bird", 178 "black-Bird", 179 "black-birds", 180 "blackbird", 181 "blackBird", 182 "blackbirds" 183 }; 184 185 static void BlackBirdTest(void) { 186 UErrorCode status = U_ZERO_ERROR; 187 UChar t1[90]; 188 UChar t2[90]; 189 190 uint32_t i = 0, j = 0; 191 uint32_t size = 0; 192 UCollator *coll = ucol_open("en_US", &status); 193 194 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status); 195 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status); 196 197 if(U_SUCCESS(status)) { 198 size = sizeof(nonignorable)/sizeof(nonignorable[0]); 199 for(i = 0; i < size-1; i++) { 200 for(j = i+1; j < size; j++) { 201 u_uastrcpy(t1, nonignorable[i]); 202 u_uastrcpy(t2, nonignorable[j]); 203 doTest(coll, t1, t2, UCOL_LESS); 204 } 205 } 206 } 207 208 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 209 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status); 210 211 if(U_SUCCESS(status)) { 212 size = sizeof(shifted)/sizeof(shifted[0]); 213 for(i = 0; i < size-1; i++) { 214 for(j = i+1; j < size; j++) { 215 u_uastrcpy(t1, shifted[i]); 216 u_uastrcpy(t2, shifted[j]); 217 doTest(coll, t1, t2, UCOL_LESS); 218 } 219 } 220 } 221 222 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status); 223 if(U_SUCCESS(status)) { 224 size = sizeof(shifted)/sizeof(shifted[0]); 225 for(i = 1; i < size; i++) { 226 u_uastrcpy(t1, shifted[i-1]); 227 u_uastrcpy(t2, shifted[i]); 228 doTest(coll, t1, t2, shiftedTert[i]); 229 } 230 } 231 232 ucol_close(coll); 233 } 234 235 const static UChar testSourceCases[][MAX_TOKEN_LEN] = { 236 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000}, 237 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000}, 238 {0x0041/*'A'*/, 0x0300, 0x0000}, 239 {0x00C0, 0x0301, 0x0000}, 240 /* this would work with forced normalization */ 241 {0x00C0, 0x0316, 0x0000} 242 }; 243 244 const static UChar testTargetCases[][MAX_TOKEN_LEN] = { 245 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000}, 246 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}, 247 {0x00C0, 0}, 248 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000}, 249 /* this would work with forced normalization */ 250 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000} 251 }; 252 253 const static UCollationResult results[] = { 254 UCOL_GREATER, 255 UCOL_EQUAL, 256 UCOL_EQUAL, 257 UCOL_GREATER, 258 UCOL_EQUAL 259 }; 260 261 static void FunkyATest(void) 262 { 263 264 int32_t i; 265 UErrorCode status = U_ZERO_ERROR; 266 UCollator *myCollation; 267 myCollation = ucol_open("en_US", &status); 268 if(U_FAILURE(status)){ 269 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 270 return; 271 } 272 log_verbose("Testing some A letters, for some reason\n"); 273 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 274 ucol_setStrength(myCollation, UCOL_TERTIARY); 275 for (i = 0; i < 4 ; i++) 276 { 277 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); 278 } 279 ucol_close(myCollation); 280 } 281 282 UColAttributeValue caseFirst[] = { 283 UCOL_OFF, 284 UCOL_LOWER_FIRST, 285 UCOL_UPPER_FIRST 286 }; 287 288 289 UColAttributeValue alternateHandling[] = { 290 UCOL_NON_IGNORABLE, 291 UCOL_SHIFTED 292 }; 293 294 UColAttributeValue caseLevel[] = { 295 UCOL_OFF, 296 UCOL_ON 297 }; 298 299 UColAttributeValue strengths[] = { 300 UCOL_PRIMARY, 301 UCOL_SECONDARY, 302 UCOL_TERTIARY, 303 UCOL_QUATERNARY, 304 UCOL_IDENTICAL 305 }; 306 307 #if 0 308 static const char * strengthsC[] = { 309 "UCOL_PRIMARY", 310 "UCOL_SECONDARY", 311 "UCOL_TERTIARY", 312 "UCOL_QUATERNARY", 313 "UCOL_IDENTICAL" 314 }; 315 316 static const char * caseFirstC[] = { 317 "UCOL_OFF", 318 "UCOL_LOWER_FIRST", 319 "UCOL_UPPER_FIRST" 320 }; 321 322 323 static const char * alternateHandlingC[] = { 324 "UCOL_NON_IGNORABLE", 325 "UCOL_SHIFTED" 326 }; 327 328 static const char * caseLevelC[] = { 329 "UCOL_OFF", 330 "UCOL_ON" 331 }; 332 333 /* not used currently - does not test only prints */ 334 static void PrintMarkDavis(void) 335 { 336 UErrorCode status = U_ZERO_ERROR; 337 UChar m[256]; 338 uint8_t sortkey[256]; 339 UCollator *coll = ucol_open("en_US", &status); 340 uint32_t h,i,j,k, sortkeysize; 341 uint32_t sizem = 0; 342 char buffer[512]; 343 uint32_t len = 512; 344 345 log_verbose("PrintMarkDavis"); 346 347 u_uastrcpy(m, "Mark Davis"); 348 sizem = u_strlen(m); 349 350 351 m[1] = 0xe4; 352 353 for(i = 0; i<sizem; i++) { 354 fprintf(stderr, "\\u%04X ", m[i]); 355 } 356 fprintf(stderr, "\n"); 357 358 for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) { 359 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status); 360 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]); 361 362 for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) { 363 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status); 364 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]); 365 366 for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) { 367 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status); 368 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]); 369 370 for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) { 371 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status); 372 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256); 373 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]); 374 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len)); 375 } 376 377 } 378 379 } 380 381 } 382 } 383 #endif 384 385 static void BillFairmanTest(void) { 386 /* 387 ** check for actual locale via ICU resource bundles 388 ** 389 ** lp points to the original locale ("fr_FR_....") 390 */ 391 392 UResourceBundle *lr,*cr; 393 UErrorCode lec = U_ZERO_ERROR; 394 const char *lp = "fr_FR_you_ll_never_find_this_locale"; 395 396 log_verbose("BillFairmanTest\n"); 397 398 lr = ures_open(NULL,lp,&lec); 399 if (lr) { 400 cr = ures_getByKey(lr,"collations",0,&lec); 401 if (cr) { 402 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec); 403 if (lp) { 404 if (U_SUCCESS(lec)) { 405 if(strcmp(lp, "fr") != 0) { 406 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp); 407 } 408 } 409 } 410 ures_close(cr); 411 } 412 ures_close(lr); 413 } 414 } 415 416 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){ 417 UChar source[256] = { '\0'}; 418 UChar target[256] = { '\0'}; 419 UChar preP = 0x31a3; 420 UChar preQ = 0x310d; 421 /* 422 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491; 423 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413; 424 */ 425 /*log_verbose("Testing primary\n");*/ 426 427 doTest(col, p, q, UCOL_LESS); 428 /* 429 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q)); 430 431 if(result!=UCOL_LESS){ 432 aescstrdup(p,utfSource,256); 433 aescstrdup(q,utfTarget,256); 434 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget); 435 } 436 */ 437 source[0] = preP; 438 u_strcpy(source+1,p); 439 target[0] = preQ; 440 u_strcpy(target+1,q); 441 doTest(col, source, target, UCOL_LESS); 442 /* 443 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget); 444 */ 445 } 446 447 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){ 448 UChar source[256] = { '\0'}; 449 UChar target[256] = { '\0'}; 450 451 /*log_verbose("Testing secondary\n");*/ 452 453 doTest(col, p, q, UCOL_LESS); 454 /* 455 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget); 456 */ 457 source[0] = 0x0053; 458 u_strcpy(source+1,p); 459 target[0]= 0x0073; 460 u_strcpy(target+1,q); 461 462 doTest(col, source, target, UCOL_LESS); 463 /* 464 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget); 465 */ 466 467 468 u_strcpy(source,p); 469 source[u_strlen(p)] = 0x62; 470 source[u_strlen(p)+1] = 0; 471 472 473 u_strcpy(target,q); 474 target[u_strlen(q)] = 0x61; 475 target[u_strlen(q)+1] = 0; 476 477 doTest(col, source, target, UCOL_GREATER); 478 479 /* 480 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget); 481 */ 482 } 483 484 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){ 485 UChar source[256] = { '\0'}; 486 UChar target[256] = { '\0'}; 487 488 /*log_verbose("Testing tertiary\n");*/ 489 490 doTest(col, p, q, UCOL_LESS); 491 /* 492 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget); 493 */ 494 source[0] = 0x0020; 495 u_strcpy(source+1,p); 496 target[0]= 0x002D; 497 u_strcpy(target+1,q); 498 499 doTest(col, source, target, UCOL_LESS); 500 /* 501 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget); 502 */ 503 504 u_strcpy(source,p); 505 source[u_strlen(p)] = 0xE0; 506 source[u_strlen(p)+1] = 0; 507 508 u_strcpy(target,q); 509 target[u_strlen(q)] = 0x61; 510 target[u_strlen(q)+1] = 0; 511 512 doTest(col, source, target, UCOL_GREATER); 513 514 /* 515 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget); 516 */ 517 } 518 519 static void testEquality(UCollator* col, const UChar* p,const UChar* q){ 520 /* 521 UChar source[256] = { '\0'}; 522 UChar target[256] = { '\0'}; 523 */ 524 525 doTest(col, p, q, UCOL_EQUAL); 526 /* 527 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget); 528 */ 529 } 530 531 static void testCollator(UCollator *coll, UErrorCode *status) { 532 const UChar *rules = NULL, *current = NULL; 533 int32_t ruleLen = 0; 534 uint32_t strength = 0; 535 uint32_t chOffset = 0; uint32_t chLen = 0; 536 uint32_t exOffset = 0; uint32_t exLen = 0; 537 uint32_t prefixOffset = 0; uint32_t prefixLen = 0; 538 uint32_t firstEx = 0; 539 /* uint32_t rExpsLen = 0; */ 540 uint32_t firstLen = 0; 541 UBool varT = FALSE; UBool top_ = TRUE; 542 uint16_t specs = 0; 543 UBool startOfRules = TRUE; 544 UBool lastReset = FALSE; 545 UBool before = FALSE; 546 uint32_t beforeStrength = 0; 547 UColTokenParser src; 548 UColOptionSet opts; 549 550 UChar first[256]; 551 UChar second[256]; 552 UChar tempB[256]; 553 uint32_t tempLen; 554 UChar *rulesCopy = NULL; 555 UParseError parseError; 556 557 uprv_memset(&src, 0, sizeof(UColTokenParser)); 558 559 src.opts = &opts; 560 561 rules = ucol_getRules(coll, &ruleLen); 562 if(U_SUCCESS(*status) && ruleLen > 0) { 563 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); 564 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); 565 src.current = src.source = rulesCopy; 566 src.end = rulesCopy+ruleLen; 567 src.extraCurrent = src.end; 568 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 569 *first = *second = 0; 570 571 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to 572 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ 573 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) { 574 strength = src.parsedToken.strength; 575 chOffset = src.parsedToken.charsOffset; 576 chLen = src.parsedToken.charsLen; 577 exOffset = src.parsedToken.extensionOffset; 578 exLen = src.parsedToken.extensionLen; 579 prefixOffset = src.parsedToken.prefixOffset; 580 prefixLen = src.parsedToken.prefixLen; 581 specs = src.parsedToken.flags; 582 583 startOfRules = FALSE; 584 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); 585 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); 586 if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */ 587 second[0] = 0; 588 } else { 589 u_strncpy(second,src.source+chOffset, chLen); 590 second[chLen] = 0; 591 592 if(exLen > 0 && firstEx == 0) { 593 u_strncat(first, src.source+exOffset, exLen); 594 first[firstLen+exLen] = 0; 595 } 596 597 if(lastReset == TRUE && prefixLen != 0) { 598 u_strncpy(first+prefixLen, first, firstLen); 599 u_strncpy(first, src.source+prefixOffset, prefixLen); 600 first[firstLen+prefixLen] = 0; 601 firstLen = firstLen+prefixLen; 602 } 603 604 if(before == TRUE) { /* swap first and second */ 605 u_strcpy(tempB, first); 606 u_strcpy(first, second); 607 u_strcpy(second, tempB); 608 609 tempLen = firstLen; 610 firstLen = chLen; 611 chLen = tempLen; 612 613 tempLen = firstEx; 614 firstEx = exLen; 615 exLen = tempLen; 616 if(beforeStrength < strength) { 617 strength = beforeStrength; 618 } 619 } 620 } 621 lastReset = FALSE; 622 623 switch(strength){ 624 case UCOL_IDENTICAL: 625 testEquality(coll,first,second); 626 break; 627 case UCOL_PRIMARY: 628 testPrimary(coll,first,second); 629 break; 630 case UCOL_SECONDARY: 631 testSecondary(coll,first,second); 632 break; 633 case UCOL_TERTIARY: 634 testTertiary(coll,first,second); 635 break; 636 case UCOL_TOK_RESET: 637 lastReset = TRUE; 638 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0); 639 if(before) { 640 beforeStrength = (specs & UCOL_TOK_BEFORE)-1; 641 } 642 break; 643 default: 644 break; 645 } 646 647 if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */ 648 before = FALSE; 649 } else { 650 firstLen = chLen; 651 firstEx = exLen; 652 u_strcpy(first, second); 653 } 654 } 655 uprv_free(src.source); 656 uprv_free(src.reorderCodes); 657 } 658 } 659 660 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) { 661 UCollator *UCA = (UCollator *)collator; 662 return ucol_strcoll(UCA, source, sLen, target, tLen); 663 } 664 665 /* 666 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) { 667 #if U_PLATFORM_HAS_WIN32_API 668 LCID lcid = (LCID)collator; 669 return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen); 670 #else 671 return 0; 672 #endif 673 } 674 */ 675 676 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts, 677 UChar s1, UChar s2, 678 const UChar *s, const uint32_t sLen, 679 const UChar *t, const uint32_t tLen) { 680 UChar source[256] = {0}; 681 UChar target[256] = {0}; 682 683 source[0] = s1; 684 u_strcpy(source+1, s); 685 target[0] = s2; 686 u_strcpy(target+1, t); 687 688 return func(collator, opts, source, sLen+1, target, tLen+1); 689 } 690 691 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts, 692 UChar s1, UChar s2, 693 const UChar *s, const uint32_t sLen, 694 const UChar *t, const uint32_t tLen) { 695 UChar source[256] = {0}; 696 UChar target[256] = {0}; 697 698 u_strcpy(source, s); 699 source[sLen] = s1; 700 u_strcpy(target, t); 701 target[tLen] = s2; 702 703 return func(collator, opts, source, sLen+1, target, tLen+1); 704 } 705 706 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts, 707 const UChar *s, const uint32_t sLen, 708 const UChar *t, const uint32_t tLen, 709 UCollationResult result) { 710 /*UChar fPrimary = 0x6d;*/ 711 /*UChar sPrimary = 0x6e;*/ 712 UChar fSecondary = 0x310d; 713 UChar sSecondary = 0x31a3; 714 UChar fTertiary = 0x310f; 715 UChar sTertiary = 0x31b7; 716 717 UCollationResult oposite; 718 if(result == UCOL_EQUAL) { 719 return UCOL_IDENTICAL; 720 } else if(result == UCOL_GREATER) { 721 oposite = UCOL_LESS; 722 } else { 723 oposite = UCOL_GREATER; 724 } 725 726 if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) { 727 return UCOL_PRIMARY; 728 } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) && 729 (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) { 730 return UCOL_SECONDARY; 731 } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) && 732 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) { 733 return UCOL_TERTIARY; 734 } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) && 735 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) { 736 return UCOL_QUATERNARY; 737 } else { 738 return UCOL_IDENTICAL; 739 } 740 } 741 742 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) { 743 uint32_t i = 0; 744 745 if(res == UCOL_EQUAL || strength == 0xdeadbeef) { 746 buffer[0] = '='; 747 buffer[1] = '='; 748 buffer[2] = '\0'; 749 } else if(res == UCOL_GREATER) { 750 for(i = 0; i<strength+1; i++) { 751 buffer[i] = '>'; 752 } 753 buffer[strength+1] = '\0'; 754 } else { 755 for(i = 0; i<strength+1; i++) { 756 buffer[i] = '<'; 757 } 758 buffer[strength+1] = '\0'; 759 } 760 761 return buffer; 762 } 763 764 765 766 static void logFailure (const char *platform, const char *test, 767 const UChar *source, const uint32_t sLen, 768 const UChar *target, const uint32_t tLen, 769 UCollationResult realRes, uint32_t realStrength, 770 UCollationResult expRes, uint32_t expStrength, UBool error) { 771 772 uint32_t i = 0; 773 774 char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256]; 775 static int32_t maxOutputLength = 0; 776 int32_t outputLength; 777 778 *sEsc = *tEsc = *s = *t = 0; 779 if(error == TRUE) { 780 log_err("Difference between expected and generated order. Run test with -v for more info\n"); 781 } else if(getTestOption(VERBOSITY_OPTION) == 0) { 782 return; 783 } 784 for(i = 0; i<sLen; i++) { 785 sprintf(b, "%04X", source[i]); 786 strcat(sEsc, "\\u"); 787 strcat(sEsc, b); 788 strcat(s, b); 789 strcat(s, " "); 790 if(source[i] < 0x80) { 791 sprintf(b, "(%c)", source[i]); 792 strcat(sEsc, b); 793 } 794 } 795 for(i = 0; i<tLen; i++) { 796 sprintf(b, "%04X", target[i]); 797 strcat(tEsc, "\\u"); 798 strcat(tEsc, b); 799 strcat(t, b); 800 strcat(t, " "); 801 if(target[i] < 0x80) { 802 sprintf(b, "(%c)", target[i]); 803 strcat(tEsc, b); 804 } 805 } 806 /* 807 strcpy(output, "[[ "); 808 strcat(output, sEsc); 809 strcat(output, getRelationSymbol(expRes, expStrength, relation)); 810 strcat(output, tEsc); 811 812 strcat(output, " : "); 813 814 strcat(output, sEsc); 815 strcat(output, getRelationSymbol(realRes, realStrength, relation)); 816 strcat(output, tEsc); 817 strcat(output, " ]] "); 818 819 log_verbose("%s", output); 820 */ 821 822 823 strcpy(output, "DIFF: "); 824 825 strcat(output, s); 826 strcat(output, " : "); 827 strcat(output, t); 828 829 strcat(output, test); 830 strcat(output, ": "); 831 832 strcat(output, sEsc); 833 strcat(output, getRelationSymbol(expRes, expStrength, relation)); 834 strcat(output, tEsc); 835 836 strcat(output, " "); 837 838 strcat(output, platform); 839 strcat(output, ": "); 840 841 strcat(output, sEsc); 842 strcat(output, getRelationSymbol(realRes, realStrength, relation)); 843 strcat(output, tEsc); 844 845 outputLength = (int32_t)strlen(output); 846 if(outputLength > maxOutputLength) { 847 maxOutputLength = outputLength; 848 U_ASSERT(outputLength < sizeof(output)); 849 } 850 851 log_verbose("%s\n", output); 852 853 } 854 855 /* 856 static void printOutRules(const UChar *rules) { 857 uint32_t len = u_strlen(rules); 858 uint32_t i = 0; 859 char toPrint; 860 uint32_t line = 0; 861 862 fprintf(stdout, "Rules:"); 863 864 for(i = 0; i<len; i++) { 865 if(rules[i]<0x7f && rules[i]>=0x20) { 866 toPrint = (char)rules[i]; 867 if(toPrint == '&') { 868 line = 1; 869 fprintf(stdout, "\n&"); 870 } else if(toPrint == ';') { 871 fprintf(stdout, "<<"); 872 line+=2; 873 } else if(toPrint == ',') { 874 fprintf(stdout, "<<<"); 875 line+=3; 876 } else { 877 fprintf(stdout, "%c", toPrint); 878 line++; 879 } 880 } else if(rules[i]<0x3400 || rules[i]>=0xa000) { 881 fprintf(stdout, "\\u%04X", rules[i]); 882 line+=6; 883 } 884 if(line>72) { 885 fprintf(stdout, "\n"); 886 line = 0; 887 } 888 } 889 890 log_verbose("\n"); 891 892 } 893 */ 894 895 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) { 896 uint32_t diffs = 0; 897 UCollationResult realResult; 898 uint32_t realStrength; 899 900 uint32_t sLen = u_strlen(first); 901 uint32_t tLen = u_strlen(second); 902 903 realResult = func(collator, opts, first, sLen, second, tLen); 904 realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult); 905 906 if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) { 907 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error); 908 diffs++; 909 } else if(realResult != UCOL_LESS || realStrength != strength) { 910 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error); 911 diffs++; 912 } 913 return diffs; 914 } 915 916 917 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) { 918 const UChar *rules = NULL, *current = NULL; 919 int32_t ruleLen = 0; 920 uint32_t strength = 0; 921 uint32_t chOffset = 0; uint32_t chLen = 0; 922 uint32_t exOffset = 0; uint32_t exLen = 0; 923 uint32_t prefixOffset = 0; uint32_t prefixLen = 0; 924 /* uint32_t rExpsLen = 0; */ 925 uint32_t firstLen = 0, secondLen = 0; 926 UBool varT = FALSE; UBool top_ = TRUE; 927 uint16_t specs = 0; 928 UBool startOfRules = TRUE; 929 UColTokenParser src; 930 UColOptionSet opts; 931 932 UChar first[256]; 933 UChar second[256]; 934 UChar *rulesCopy = NULL; 935 936 uint32_t UCAdiff = 0; 937 uint32_t Windiff = 1; 938 UParseError parseError; 939 940 uprv_memset(&src, 0, sizeof(UColTokenParser)); 941 src.opts = &opts; 942 943 rules = ucol_getRules(coll, &ruleLen); 944 945 /*printOutRules(rules);*/ 946 947 if(U_SUCCESS(*status) && ruleLen > 0) { 948 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); 949 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); 950 src.current = src.source = rulesCopy; 951 src.end = rulesCopy+ruleLen; 952 src.extraCurrent = src.end; 953 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 954 *first = *second = 0; 955 956 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to 957 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ 958 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) { 959 strength = src.parsedToken.strength; 960 chOffset = src.parsedToken.charsOffset; 961 chLen = src.parsedToken.charsLen; 962 exOffset = src.parsedToken.extensionOffset; 963 exLen = src.parsedToken.extensionLen; 964 prefixOffset = src.parsedToken.prefixOffset; 965 prefixLen = src.parsedToken.prefixLen; 966 specs = src.parsedToken.flags; 967 968 startOfRules = FALSE; 969 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); 970 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); 971 972 u_strncpy(second,src.source+chOffset, chLen); 973 second[chLen] = 0; 974 secondLen = chLen; 975 976 if(exLen > 0) { 977 u_strncat(first, src.source+exOffset, exLen); 978 first[firstLen+exLen] = 0; 979 firstLen += exLen; 980 } 981 982 if(strength != UCOL_TOK_RESET) { 983 if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) { 984 UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error); 985 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/ 986 } 987 } 988 989 990 firstLen = chLen; 991 u_strcpy(first, second); 992 993 } 994 if(UCAdiff != 0 && Windiff != 0) { 995 log_verbose("\n"); 996 } 997 if(UCAdiff == 0) { 998 log_verbose("No immediate difference with %s!\n", refName); 999 } 1000 if(Windiff == 0) { 1001 log_verbose("No immediate difference with Win32!\n"); 1002 } 1003 uprv_free(src.source); 1004 uprv_free(src.reorderCodes); 1005 } 1006 } 1007 1008 /* 1009 * Takes two CEs (lead and continuation) and 1010 * compares them as CEs should be compared: 1011 * primary vs. primary, secondary vs. secondary 1012 * tertiary vs. tertiary 1013 */ 1014 static int32_t compareCEs(uint32_t s1, uint32_t s2, 1015 uint32_t t1, uint32_t t2) { 1016 uint32_t s = 0, t = 0; 1017 if(s1 == t1 && s2 == t2) { 1018 return 0; 1019 } 1020 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16); 1021 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16); 1022 if(s < t) { 1023 return -1; 1024 } else if(s > t) { 1025 return 1; 1026 } else { 1027 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8; 1028 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8; 1029 if(s < t) { 1030 return -1; 1031 } else if(s > t) { 1032 return 1; 1033 } else { 1034 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF); 1035 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF); 1036 if(s < t) { 1037 return -1; 1038 } else { 1039 return 1; 1040 } 1041 } 1042 } 1043 } 1044 1045 typedef struct { 1046 uint32_t startCE; 1047 uint32_t startContCE; 1048 uint32_t limitCE; 1049 uint32_t limitContCE; 1050 } indirectBoundaries; 1051 1052 /* these values are used for finding CE values for indirect positioning. */ 1053 /* Indirect positioning is a mechanism for allowing resets on symbolic */ 1054 /* values. It only works for resets and you cannot tailor indirect names */ 1055 /* An indirect name can define either an anchor point or a range. An */ 1056 /* anchor point behaves in exactly the same way as a code point in reset */ 1057 /* would, except that it cannot be tailored. A range (we currently only */ 1058 /* know for the [top] range will explicitly set the upper bound for */ 1059 /* generated CEs, thus allowing for better control over how many CEs can */ 1060 /* be squeezed between in the range without performance penalty. */ 1061 /* In that respect, we use [top] for tailoring of locales that use CJK */ 1062 /* characters. Other indirect values are currently a pure convenience, */ 1063 /* they can be used to assure that the CEs will be always positioned in */ 1064 /* the same place relative to a point with known properties (e.g. first */ 1065 /* primary ignorable). */ 1066 static indirectBoundaries ucolIndirectBoundaries[15]; 1067 static UBool indirectBoundariesSet = FALSE; 1068 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) { 1069 /* Set values for the top - TODO: once we have values for all the indirects, we are going */ 1070 /* to initalize here. */ 1071 ucolIndirectBoundaries[indexR].startCE = start[0]; 1072 ucolIndirectBoundaries[indexR].startContCE = start[1]; 1073 if(end) { 1074 ucolIndirectBoundaries[indexR].limitCE = end[0]; 1075 ucolIndirectBoundaries[indexR].limitContCE = end[1]; 1076 } else { 1077 ucolIndirectBoundaries[indexR].limitCE = 0; 1078 ucolIndirectBoundaries[indexR].limitContCE = 0; 1079 } 1080 } 1081 1082 static void testCEs(UCollator *coll, UErrorCode *status) { 1083 const UChar *rules = NULL, *current = NULL; 1084 int32_t ruleLen = 0; 1085 1086 uint32_t strength = 0; 1087 uint32_t maxStrength = UCOL_IDENTICAL; 1088 uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE; 1089 uint32_t lastCE; 1090 uint32_t lastContCE; 1091 1092 int32_t result = 0; 1093 uint32_t chOffset = 0; uint32_t chLen = 0; 1094 uint32_t exOffset = 0; uint32_t exLen = 0; 1095 uint32_t prefixOffset = 0; uint32_t prefixLen = 0; 1096 uint32_t oldOffset = 0; 1097 1098 /* uint32_t rExpsLen = 0; */ 1099 /* uint32_t firstLen = 0; */ 1100 uint16_t specs = 0; 1101 UBool varT = FALSE; UBool top_ = TRUE; 1102 UBool startOfRules = TRUE; 1103 UBool before = FALSE; 1104 UColTokenParser src; 1105 UColOptionSet opts; 1106 UParseError parseError; 1107 UChar *rulesCopy = NULL; 1108 collIterate *c = uprv_new_collIterate(status); 1109 UCAConstants *consts = NULL; 1110 uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */ 1111 UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT; 1112 const char *colLoc; 1113 UCollator *UCA = ucol_open("root", status); 1114 1115 if (U_FAILURE(*status)) { 1116 log_err("Could not open root collator %s\n", u_errorName(*status)); 1117 uprv_delete_collIterate(c); 1118 return; 1119 } 1120 1121 colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status); 1122 if (U_FAILURE(*status)) { 1123 log_err("Could not get collator name: %s\n", u_errorName(*status)); 1124 ucol_close(UCA); 1125 uprv_delete_collIterate(c); 1126 return; 1127 } 1128 1129 uprv_memset(&src, 0, sizeof(UColTokenParser)); 1130 1131 consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts); 1132 UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0]; 1133 /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */ 1134 UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0]; 1135 UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1]; 1136 1137 baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND; 1138 1139 src.opts = &opts; 1140 1141 rules = ucol_getRules(coll, &ruleLen); 1142 1143 src.invUCA = ucol_initInverseUCA(status); 1144 1145 if(indirectBoundariesSet == FALSE) { 1146 /* UCOL_RESET_TOP_VALUE */ 1147 setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT); 1148 /* UCOL_FIRST_PRIMARY_IGNORABLE */ 1149 setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0); 1150 /* UCOL_LAST_PRIMARY_IGNORABLE */ 1151 setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0); 1152 /* UCOL_FIRST_SECONDARY_IGNORABLE */ 1153 setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0); 1154 /* UCOL_LAST_SECONDARY_IGNORABLE */ 1155 setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0); 1156 /* UCOL_FIRST_TERTIARY_IGNORABLE */ 1157 setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0); 1158 /* UCOL_LAST_TERTIARY_IGNORABLE */ 1159 setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0); 1160 /* UCOL_FIRST_VARIABLE */ 1161 setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0); 1162 /* UCOL_LAST_VARIABLE */ 1163 setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0); 1164 /* UCOL_FIRST_NON_VARIABLE */ 1165 setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0); 1166 /* UCOL_LAST_NON_VARIABLE */ 1167 setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT); 1168 /* UCOL_FIRST_IMPLICIT */ 1169 setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0); 1170 /* UCOL_LAST_IMPLICIT */ 1171 setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING); 1172 /* UCOL_FIRST_TRAILING */ 1173 setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0); 1174 /* UCOL_LAST_TRAILING */ 1175 setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0); 1176 ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24); 1177 indirectBoundariesSet = TRUE; 1178 } 1179 1180 1181 if(U_SUCCESS(*status) && ruleLen > 0) { 1182 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); 1183 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); 1184 src.current = src.source = rulesCopy; 1185 src.end = rulesCopy+ruleLen; 1186 src.extraCurrent = src.end; 1187 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 1188 1189 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to 1190 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ 1191 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) { 1192 strength = src.parsedToken.strength; 1193 chOffset = src.parsedToken.charsOffset; 1194 chLen = src.parsedToken.charsLen; 1195 exOffset = src.parsedToken.extensionOffset; 1196 exLen = src.parsedToken.extensionLen; 1197 prefixOffset = src.parsedToken.prefixOffset; 1198 prefixLen = src.parsedToken.prefixLen; 1199 specs = src.parsedToken.flags; 1200 1201 startOfRules = FALSE; 1202 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); 1203 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); 1204 1205 uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status); 1206 1207 currCE = ucol_getNextCE(coll, c, status); 1208 if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) { 1209 log_verbose("Thai prevowel detected. Will pick next CE\n"); 1210 currCE = ucol_getNextCE(coll, c, status); 1211 } 1212 1213 currContCE = ucol_getNextCE(coll, c, status); 1214 if(!isContinuation(currContCE)) { 1215 currContCE = 0; 1216 } 1217 1218 /* we need to repack CEs here */ 1219 1220 if(strength == UCOL_TOK_RESET) { 1221 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0); 1222 if(top_ == TRUE) { 1223 int32_t tokenIndex = src.parsedToken.indirectIndex; 1224 1225 nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE; 1226 nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE; 1227 } else { 1228 nextCE = baseCE = currCE; 1229 nextContCE = baseContCE = currContCE; 1230 } 1231 maxStrength = UCOL_IDENTICAL; 1232 } else { 1233 if(strength < maxStrength) { 1234 maxStrength = strength; 1235 if(baseCE == UCOL_RESET_TOP_VALUE) { 1236 log_verbose("Resetting to [top]\n"); 1237 nextCE = UCOL_NEXT_TOP_VALUE; 1238 nextContCE = UCOL_NEXT_TOP_CONT; 1239 } else { 1240 result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength); 1241 } 1242 if(result < 0) { 1243 if(ucol_isTailored(coll, *(src.source+oldOffset), status)) { 1244 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset)); 1245 return; 1246 } else { 1247 log_err("%s: couldn't find the CE\n", colLoc); 1248 return; 1249 } 1250 } 1251 } 1252 1253 currCE &= 0xFFFFFF3F; 1254 currContCE &= 0xFFFFFFBF; 1255 1256 if(maxStrength == UCOL_IDENTICAL) { 1257 if(baseCE != currCE || baseContCE != currContCE) { 1258 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc); 1259 } 1260 } else { 1261 if(strength == UCOL_IDENTICAL) { 1262 if(lastCE != currCE || lastContCE != currContCE) { 1263 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc); 1264 } 1265 } else { 1266 if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) { 1267 /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/ 1268 log_err("%s: current CE is not less than base CE\n", colLoc); 1269 } 1270 if(!before) { 1271 if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) { 1272 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/ 1273 log_err("%s: sequence of generated CEs is broken\n", colLoc); 1274 } 1275 } else { 1276 before = FALSE; 1277 if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) { 1278 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/ 1279 log_err("%s: sequence of generated CEs is broken\n", colLoc); 1280 } 1281 } 1282 } 1283 } 1284 1285 } 1286 1287 oldOffset = chOffset; 1288 lastCE = currCE & 0xFFFFFF3F; 1289 lastContCE = currContCE & 0xFFFFFFBF; 1290 } 1291 uprv_free(src.source); 1292 uprv_free(src.reorderCodes); 1293 } 1294 ucol_close(UCA); 1295 uprv_delete_collIterate(c); 1296 } 1297 1298 #if 0 1299 /* these locales are now picked from index RB */ 1300 static const char* localesToTest[] = { 1301 "ar", "bg", "ca", "cs", "da", 1302 "el", "en_BE", "en_US_POSIX", 1303 "es", "et", "fi", "fr", "hi", 1304 "hr", "hu", "is", "iw", "ja", 1305 "ko", "lt", "lv", "mk", "mt", 1306 "nb", "nn", "nn_NO", "pl", "ro", 1307 "ru", "sh", "sk", "sl", "sq", 1308 "sr", "sv", "th", "tr", "uk", 1309 "vi", "zh", "zh_TW" 1310 }; 1311 #endif 1312 1313 static const char* rulesToTest[] = { 1314 /* Funky fa rule */ 1315 "&\\u0622 < \\u0627 << \\u0671 < \\u0621", 1316 /*"& Z < p, P",*/ 1317 /* Cui Mins rules */ 1318 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/ 1319 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/ 1320 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/ 1321 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/ 1322 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/ 1323 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/ 1324 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/ 1325 }; 1326 1327 1328 static void TestCollations(void) { 1329 int32_t noOfLoc = uloc_countAvailable(); 1330 int32_t i = 0, j = 0; 1331 1332 UErrorCode status = U_ZERO_ERROR; 1333 char cName[256]; 1334 UChar name[256]; 1335 int32_t nameSize; 1336 1337 1338 const char *locName = NULL; 1339 UCollator *coll = NULL; 1340 UCollator *UCA = ucol_open("", &status); 1341 UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status); 1342 if (U_FAILURE(status)) { 1343 log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status)); 1344 return; 1345 } 1346 ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status); 1347 1348 for(i = 0; i<noOfLoc; i++) { 1349 status = U_ZERO_ERROR; 1350 locName = uloc_getAvailable(i); 1351 if(uprv_strcmp("ja", locName) == 0) { 1352 log_verbose("Don't know how to test prefixes\n"); 1353 continue; 1354 } 1355 if(hasCollationElements(locName)) { 1356 nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status); 1357 for(j = 0; j<nameSize; j++) { 1358 cName[j] = (char)name[j]; 1359 } 1360 cName[nameSize] = 0; 1361 log_verbose("\nTesting locale %s (%s)\n", locName, cName); 1362 coll = ucol_open(locName, &status); 1363 if(U_SUCCESS(status)) { 1364 testAgainstUCA(coll, UCA, "UCA", FALSE, &status); 1365 ucol_close(coll); 1366 } else { 1367 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status)); 1368 status = U_ZERO_ERROR; 1369 } 1370 } 1371 } 1372 ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status); 1373 ucol_close(UCA); 1374 } 1375 1376 static void RamsRulesTest(void) { 1377 UErrorCode status = U_ZERO_ERROR; 1378 int32_t i = 0; 1379 UCollator *coll = NULL; 1380 UChar rule[2048]; 1381 uint32_t ruleLen; 1382 int32_t noOfLoc = uloc_countAvailable(); 1383 const char *locName = NULL; 1384 1385 log_verbose("RamsRulesTest\n"); 1386 1387 if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) { 1388 /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */ 1389 return; 1390 } 1391 1392 for(i = 0; i<noOfLoc; i++) { 1393 locName = uloc_getAvailable(i); 1394 if(hasCollationElements(locName)) { 1395 if (uprv_strcmp("ja", locName)==0) { 1396 log_verbose("Don't know how to test Japanese because of prefixes\n"); 1397 continue; 1398 } 1399 if (uprv_strcmp("de__PHONEBOOK", locName)==0) { 1400 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n"); 1401 continue; 1402 } 1403 if (uprv_strcmp("bn", locName)==0 || 1404 uprv_strcmp("bs", locName)==0 || /* Add due to import per cldrbug 5647 */ 1405 uprv_strcmp("bs_Cyrl", locName)==0 || /* Add due to import per cldrbug 5647 */ 1406 uprv_strcmp("en_US_POSIX", locName)==0 || 1407 uprv_strcmp("fa_AF", locName)==0 || /* Add due to import per cldrbug 5647 */ 1408 uprv_strcmp("he", locName)==0 || /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */ 1409 uprv_strcmp("he_IL", locName)==0 || /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */ 1410 uprv_strcmp("km", locName)==0 || 1411 uprv_strcmp("km_KH", locName)==0 || 1412 uprv_strcmp("my", locName)==0 || 1413 uprv_strcmp("si", locName)==0 || 1414 uprv_strcmp("si_LK", locName)==0 || 1415 uprv_strcmp("sr_Latn", locName)==0 || /* Add due to import per cldrbug 5647 */ 1416 uprv_strcmp("th", locName)==0 || 1417 uprv_strcmp("th_TH", locName)==0 || 1418 uprv_strcmp("zh", locName)==0 || 1419 uprv_strcmp("zh_Hant", locName)==0 1420 ) { 1421 log_verbose("Don't know how to test %s. " 1422 "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName); 1423 continue; 1424 } 1425 log_verbose("Testing locale %s\n", locName); 1426 status = U_ZERO_ERROR; 1427 coll = ucol_open(locName, &status); 1428 if(U_SUCCESS(status)) { 1429 if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) { 1430 if(coll->image->jamoSpecial == TRUE) { 1431 log_err("%s has special JAMOs\n", locName); 1432 } 1433 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status); 1434 testCollator(coll, &status); 1435 testCEs(coll, &status); 1436 } else { 1437 log_verbose("Skipping %s: %s\n", locName, u_errorName(status)); 1438 } 1439 ucol_close(coll); 1440 } else { 1441 log_err("Could not open %s: %s\n", locName, u_errorName(status)); 1442 } 1443 } 1444 } 1445 1446 for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) { 1447 log_verbose("Testing rule: %s\n", rulesToTest[i]); 1448 ruleLen = u_unescape(rulesToTest[i], rule, 2048); 1449 status = U_ZERO_ERROR; 1450 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 1451 if(U_SUCCESS(status)) { 1452 testCollator(coll, &status); 1453 testCEs(coll, &status); 1454 ucol_close(coll); 1455 } else { 1456 log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]); 1457 } 1458 } 1459 1460 } 1461 1462 static void IsTailoredTest(void) { 1463 UErrorCode status = U_ZERO_ERROR; 1464 uint32_t i = 0; 1465 UCollator *coll = NULL; 1466 UChar rule[2048]; 1467 UChar tailored[2048]; 1468 UChar notTailored[2048]; 1469 uint32_t ruleLen, tailoredLen, notTailoredLen; 1470 1471 log_verbose("IsTailoredTest\n"); 1472 1473 u_uastrcpy(rule, "&Z < A, B, C;c < d"); 1474 ruleLen = u_strlen(rule); 1475 1476 u_uastrcpy(tailored, "ABCcd"); 1477 tailoredLen = u_strlen(tailored); 1478 1479 u_uastrcpy(notTailored, "ZabD"); 1480 notTailoredLen = u_strlen(notTailored); 1481 1482 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 1483 if(U_SUCCESS(status)) { 1484 for(i = 0; i<tailoredLen; i++) { 1485 if(!ucol_isTailored(coll, tailored[i], &status)) { 1486 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]); 1487 } 1488 } 1489 for(i = 0; i<notTailoredLen; i++) { 1490 if(ucol_isTailored(coll, notTailored[i], &status)) { 1491 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]); 1492 } 1493 } 1494 ucol_close(coll); 1495 } 1496 else { 1497 log_err_status(status, "Can't tailor rules\n"); 1498 } 1499 /* Code coverage */ 1500 status = U_ZERO_ERROR; 1501 coll = ucol_open("ja", &status); 1502 if(!ucol_isTailored(coll, 0x4E9C, &status)) { 1503 log_err_status(status, "0x4E9C should be tailored - it is reported as not\n"); 1504 } 1505 ucol_close(coll); 1506 } 1507 1508 1509 const static char chTest[][20] = { 1510 "c", 1511 "C", 1512 "ca", "cb", "cx", "cy", "CZ", 1513 "c\\u030C", "C\\u030C", 1514 "h", 1515 "H", 1516 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY", 1517 "ch", "cH", "Ch", "CH", 1518 "cha", "charly", "che", "chh", "chch", "chr", 1519 "i", "I", "iarly", 1520 "r", "R", 1521 "r\\u030C", "R\\u030C", 1522 "s", 1523 "S", 1524 "s\\u030C", "S\\u030C", 1525 "z", "Z", 1526 "z\\u030C", "Z\\u030C" 1527 }; 1528 1529 static void TestChMove(void) { 1530 UChar t1[256] = {0}; 1531 UChar t2[256] = {0}; 1532 1533 uint32_t i = 0, j = 0; 1534 uint32_t size = 0; 1535 UErrorCode status = U_ZERO_ERROR; 1536 1537 UCollator *coll = ucol_open("cs", &status); 1538 1539 if(U_SUCCESS(status)) { 1540 size = sizeof(chTest)/sizeof(chTest[0]); 1541 for(i = 0; i < size-1; i++) { 1542 for(j = i+1; j < size; j++) { 1543 u_unescape(chTest[i], t1, 256); 1544 u_unescape(chTest[j], t2, 256); 1545 doTest(coll, t1, t2, UCOL_LESS); 1546 } 1547 } 1548 } 1549 else { 1550 log_data_err("Can't open collator"); 1551 } 1552 ucol_close(coll); 1553 } 1554 1555 1556 1557 1558 const static char impTest[][20] = { 1559 "\\u4e00", 1560 "a", 1561 "A", 1562 "b", 1563 "B", 1564 "\\u4e01" 1565 }; 1566 1567 1568 static void TestImplicitTailoring(void) { 1569 static const struct { 1570 const char *rules; 1571 const char *data[10]; 1572 const uint32_t len; 1573 } tests[] = { 1574 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 }, 1575 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 }, 1576 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3}, 1577 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3} 1578 }; 1579 1580 int32_t i = 0; 1581 1582 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { 1583 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 1584 } 1585 1586 /* 1587 UChar t1[256] = {0}; 1588 UChar t2[256] = {0}; 1589 1590 const char *rule = "&\\u4e00 < a <<< A < b <<< B"; 1591 1592 uint32_t i = 0, j = 0; 1593 uint32_t size = 0; 1594 uint32_t ruleLen = 0; 1595 UErrorCode status = U_ZERO_ERROR; 1596 UCollator *coll = NULL; 1597 ruleLen = u_unescape(rule, t1, 256); 1598 1599 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status); 1600 1601 if(U_SUCCESS(status)) { 1602 size = sizeof(impTest)/sizeof(impTest[0]); 1603 for(i = 0; i < size-1; i++) { 1604 for(j = i+1; j < size; j++) { 1605 u_unescape(impTest[i], t1, 256); 1606 u_unescape(impTest[j], t2, 256); 1607 doTest(coll, t1, t2, UCOL_LESS); 1608 } 1609 } 1610 } 1611 else { 1612 log_err("Can't open collator"); 1613 } 1614 ucol_close(coll); 1615 */ 1616 } 1617 1618 static void TestFCDProblem(void) { 1619 UChar t1[256] = {0}; 1620 UChar t2[256] = {0}; 1621 1622 const char *s1 = "\\u0430\\u0306\\u0325"; 1623 const char *s2 = "\\u04D1\\u0325"; 1624 1625 UErrorCode status = U_ZERO_ERROR; 1626 UCollator *coll = ucol_open("", &status); 1627 u_unescape(s1, t1, 256); 1628 u_unescape(s2, t2, 256); 1629 1630 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status); 1631 doTest(coll, t1, t2, UCOL_EQUAL); 1632 1633 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 1634 doTest(coll, t1, t2, UCOL_EQUAL); 1635 1636 ucol_close(coll); 1637 } 1638 1639 /* 1640 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC 1641 We're only using NFC/NFD in this test. 1642 */ 1643 #define NORM_BUFFER_TEST_LEN 18 1644 typedef struct { 1645 UChar32 u; 1646 UChar NFC[NORM_BUFFER_TEST_LEN]; 1647 UChar NFD[NORM_BUFFER_TEST_LEN]; 1648 } tester; 1649 1650 static void TestComposeDecompose(void) { 1651 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */ 1652 static const UChar UNICODESET_STR[] = { 1653 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61, 1654 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72, 1655 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0 1656 }; 1657 int32_t noOfLoc; 1658 int32_t i = 0, j = 0; 1659 1660 UErrorCode status = U_ZERO_ERROR; 1661 const char *locName = NULL; 1662 uint32_t nfcSize; 1663 uint32_t nfdSize; 1664 tester **t; 1665 uint32_t noCases = 0; 1666 UCollator *coll = NULL; 1667 UChar32 u = 0; 1668 UChar comp[NORM_BUFFER_TEST_LEN]; 1669 uint32_t len = 0; 1670 UCollationElements *iter; 1671 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status); 1672 int32_t charsToTestSize; 1673 1674 noOfLoc = uloc_countAvailable(); 1675 1676 coll = ucol_open("", &status); 1677 if (U_FAILURE(status)) { 1678 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status)); 1679 return; 1680 } 1681 charsToTestSize = uset_size(charsToTest); 1682 if (charsToTestSize <= 0) { 1683 log_err("Set was zero. Missing data?\n"); 1684 return; 1685 } 1686 t = (tester **)malloc(charsToTestSize * sizeof(tester *)); 1687 t[0] = (tester *)malloc(sizeof(tester)); 1688 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize); 1689 1690 for(u = 0; u < charsToTestSize; u++) { 1691 UChar32 ch = uset_charAt(charsToTest, u); 1692 len = 0; 1693 U16_APPEND_UNSAFE(comp, len, ch); 1694 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status); 1695 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status); 1696 1697 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0) 1698 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) { 1699 t[noCases]->u = ch; 1700 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) { 1701 u_strncpy(t[noCases]->NFC, comp, len); 1702 t[noCases]->NFC[len] = 0; 1703 } 1704 noCases++; 1705 t[noCases] = (tester *)malloc(sizeof(tester)); 1706 uprv_memset(t[noCases], 0, sizeof(tester)); 1707 } 1708 } 1709 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize); 1710 uset_close(charsToTest); 1711 charsToTest = NULL; 1712 1713 for(u=0; u<(UChar32)noCases; u++) { 1714 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) { 1715 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u); 1716 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL); 1717 } 1718 } 1719 /* 1720 for(u = 0; u < charsToTestSize; u++) { 1721 if(!(u&0xFFFF)) { 1722 log_verbose("%08X ", u); 1723 } 1724 uprv_memset(t[noCases], 0, sizeof(tester)); 1725 t[noCases]->u = u; 1726 len = 0; 1727 U16_APPEND_UNSAFE(comp, len, u); 1728 comp[len] = 0; 1729 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status); 1730 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status); 1731 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL); 1732 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL); 1733 } 1734 */ 1735 1736 ucol_close(coll); 1737 1738 log_verbose("Testing locales, number of cases = %i\n", noCases); 1739 for(i = 0; i<noOfLoc; i++) { 1740 status = U_ZERO_ERROR; 1741 locName = uloc_getAvailable(i); 1742 if(hasCollationElements(locName)) { 1743 char cName[256]; 1744 UChar name[256]; 1745 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status); 1746 1747 for(j = 0; j<nameSize; j++) { 1748 cName[j] = (char)name[j]; 1749 } 1750 cName[nameSize] = 0; 1751 log_verbose("\nTesting locale %s (%s)\n", locName, cName); 1752 1753 coll = ucol_open(locName, &status); 1754 ucol_setStrength(coll, UCOL_IDENTICAL); 1755 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status); 1756 1757 for(u=0; u<(UChar32)noCases; u++) { 1758 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) { 1759 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName); 1760 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL); 1761 log_verbose("Testing NFC\n"); 1762 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status); 1763 backAndForth(iter); 1764 log_verbose("Testing NFD\n"); 1765 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status); 1766 backAndForth(iter); 1767 } 1768 } 1769 ucol_closeElements(iter); 1770 ucol_close(coll); 1771 } 1772 } 1773 for(u = 0; u <= (UChar32)noCases; u++) { 1774 free(t[u]); 1775 } 1776 free(t); 1777 } 1778 1779 static void TestEmptyRule(void) { 1780 UErrorCode status = U_ZERO_ERROR; 1781 UChar rulez[] = { 0 }; 1782 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status); 1783 1784 ucol_close(coll); 1785 } 1786 1787 static void TestUCARules(void) { 1788 UErrorCode status = U_ZERO_ERROR; 1789 UChar b[256]; 1790 UChar *rules = b; 1791 uint32_t ruleLen = 0; 1792 UCollator *UCAfromRules = NULL; 1793 UCollator *coll = ucol_open("", &status); 1794 if(status == U_FILE_ACCESS_ERROR) { 1795 log_data_err("Is your data around?\n"); 1796 return; 1797 } else if(U_FAILURE(status)) { 1798 log_err("Error opening collator\n"); 1799 return; 1800 } 1801 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256); 1802 1803 log_verbose("TestUCARules\n"); 1804 if(ruleLen > 256) { 1805 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar)); 1806 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen); 1807 } 1808 log_verbose("Rules length is %d\n", ruleLen); 1809 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 1810 if(U_SUCCESS(status)) { 1811 ucol_close(UCAfromRules); 1812 } else { 1813 log_verbose("Unable to create a collator from UCARules!\n"); 1814 } 1815 /* 1816 u_unescape(blah, b, 256); 1817 ucol_getSortKey(coll, b, 1, res, 256); 1818 */ 1819 ucol_close(coll); 1820 if(rules != b) { 1821 free(rules); 1822 } 1823 } 1824 1825 1826 /* Pinyin tonal order */ 1827 /* 1828 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0) 1829 (w/macron)< (w/acute)< (w/caron)< (w/grave) 1830 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8) 1831 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec) 1832 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2) 1833 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9) 1834 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) < 1835 .. (\u00fc) 1836 1837 However, in testing we got the following order: 1838 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101) 1839 (w/acute)< (w/grave)< (w/caron)< (w/macron) 1840 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) < 1841 .. (\u0113) 1842 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b) 1843 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d) 1844 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) < 1845 .. (\u01d8) 1846 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b) 1847 */ 1848 1849 static void TestBefore(void) { 1850 const static char *data[] = { 1851 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A", 1852 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E", 1853 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I", 1854 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O", 1855 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U", 1856 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc" 1857 }; 1858 genericRulesStarter( 1859 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0" 1860 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8" 1861 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec" 1862 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2" 1863 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9" 1864 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc", 1865 data, sizeof(data)/sizeof(data[0])); 1866 } 1867 1868 #if 0 1869 /* superceded by TestBeforePinyin */ 1870 static void TestJ784(void) { 1871 const static char *data[] = { 1872 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", 1873 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", 1874 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", 1875 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", 1876 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", 1877 "\\u00fc", 1878 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc" 1879 }; 1880 genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0])); 1881 } 1882 #endif 1883 1884 #if 0 1885 /* superceded by the changes to the lv locale */ 1886 static void TestJ831(void) { 1887 const static char *data[] = { 1888 "I", 1889 "i", 1890 "Y", 1891 "y" 1892 }; 1893 genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0])); 1894 } 1895 #endif 1896 1897 static void TestJ815(void) { 1898 const static char *data[] = { 1899 "aa", 1900 "Aa", 1901 "ab", 1902 "Ab", 1903 "ad", 1904 "Ad", 1905 "ae", 1906 "Ae", 1907 "\\u00e6", 1908 "\\u00c6", 1909 "af", 1910 "Af", 1911 "b", 1912 "B" 1913 }; 1914 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0])); 1915 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0])); 1916 } 1917 1918 1919 /* 1920 "& a < b < c < d& r < c", "& a < b < d& r < c", 1921 "& a < b < c < d& c < m", "& a < b < c < m < d", 1922 "& a < b < c < d& a < m", "& a < m < b < c < d", 1923 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d", 1924 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d", 1925 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e", 1926 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e", 1927 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e", 1928 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g", 1929 */ 1930 static void TestRedundantRules(void) { 1931 int32_t i; 1932 1933 static const struct { 1934 const char *rules; 1935 const char *expectedRules; 1936 const char *testdata[8]; 1937 uint32_t testdatalen; 1938 } tests[] = { 1939 /* this test conflicts with positioning of CODAN placeholder */ 1940 /*{ 1941 "& a <<< b <<< c << d <<< e& [before 1] e <<< x", 1942 "&\\u2089<<<x", 1943 {"\\u2089", "x"}, 2 1944 }, */ 1945 /* this test conflicts with the [before x] syntax tightening */ 1946 /*{ 1947 "& b <<< c <<< d << e <<< f& [before 1] f <<< x", 1948 "&\\u0252<<<x", 1949 {"\\u0252", "x"}, 2 1950 }, */ 1951 /* this test conflicts with the [before x] syntax tightening */ 1952 /*{ 1953 "& a < b <<< c << d <<< e& [before 1] e <<< x", 1954 "& a <<< x < b <<< c << d <<< e", 1955 {"a", "x", "b", "c", "d", "e"}, 6 1956 }, */ 1957 { 1958 "& a < b < c < d& [before 1] c < m", 1959 "& a < b < m < c < d", 1960 {"a", "b", "m", "c", "d"}, 5 1961 }, 1962 { 1963 "& a < b <<< c << d <<< e& [before 3] e <<< x", 1964 "& a < b <<< c << d <<< x <<< e", 1965 {"a", "b", "c", "d", "x", "e"}, 6 1966 }, 1967 /* this test conflicts with the [before x] syntax tightening */ 1968 /* { 1969 "& a < b <<< c << d <<< e& [before 2] e <<< x", 1970 "& a < b <<< c <<< x << d <<< e", 1971 {"a", "b", "c", "x", "d", "e"},, 6 1972 }, */ 1973 { 1974 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", 1975 "& a < b <<< c << d <<< e <<< f < x < g", 1976 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8 1977 }, 1978 { 1979 "& a <<< b << c < d& a < m", 1980 "& a <<< b << c < m < d", 1981 {"a", "b", "c", "m", "d"}, 5 1982 }, 1983 { 1984 "&a<b<<b\\u0301 &z<b", 1985 "&a<b\\u0301 &z<b", 1986 {"a", "b\\u0301", "z", "b"}, 4 1987 }, 1988 { 1989 "&z<m<<<q<<<m", 1990 "&z<q<<<m", 1991 {"z", "q", "m"},3 1992 }, 1993 { 1994 "&z<<<m<q<<<m", 1995 "&z<q<<<m", 1996 {"z", "q", "m"}, 3 1997 }, 1998 { 1999 "& a < b < c < d& r < c", 2000 "& a < b < d& r < c", 2001 {"a", "b", "d"}, 3 2002 }, 2003 { 2004 "& a < b < c < d& r < c", 2005 "& a < b < d& r < c", 2006 {"r", "c"}, 2 2007 }, 2008 { 2009 "& a < b < c < d& c < m", 2010 "& a < b < c < m < d", 2011 {"a", "b", "c", "m", "d"}, 5 2012 }, 2013 { 2014 "& a < b < c < d& a < m", 2015 "& a < m < b < c < d", 2016 {"a", "m", "b", "c", "d"}, 5 2017 } 2018 }; 2019 2020 2021 UCollator *credundant = NULL; 2022 UCollator *cresulting = NULL; 2023 UErrorCode status = U_ZERO_ERROR; 2024 UChar rlz[2048] = { 0 }; 2025 uint32_t rlen = 0; 2026 2027 for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) { 2028 log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules); 2029 rlen = u_unescape(tests[i].rules, rlz, 2048); 2030 2031 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status); 2032 if(status == U_FILE_ACCESS_ERROR) { 2033 log_data_err("Is your data around?\n"); 2034 return; 2035 } else if(U_FAILURE(status)) { 2036 log_err("Error opening collator\n"); 2037 return; 2038 } 2039 2040 rlen = u_unescape(tests[i].expectedRules, rlz, 2048); 2041 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status); 2042 2043 testAgainstUCA(cresulting, credundant, "expected", TRUE, &status); 2044 2045 ucol_close(credundant); 2046 ucol_close(cresulting); 2047 2048 log_verbose("testing using data\n"); 2049 2050 genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen); 2051 } 2052 2053 } 2054 2055 static void TestExpansionSyntax(void) { 2056 int32_t i; 2057 2058 const static char *rules[] = { 2059 "&AE <<< a << b <<< c &d <<< f", 2060 "&AE <<< a <<< b << c << d < e < f <<< g", 2061 "&AE <<< B <<< C / D <<< F" 2062 }; 2063 2064 const static char *expectedRules[] = { 2065 "&A <<< a / E << b / E <<< c /E &d <<< f", 2066 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g", 2067 "&A <<< B / E <<< C / ED <<< F / E" 2068 }; 2069 2070 const static char *testdata[][8] = { 2071 {"AE", "a", "b", "c"}, 2072 {"AE", "a", "b", "c", "d", "e", "f", "g"}, 2073 {"AE", "B", "C"} /* / ED <<< F / E"},*/ 2074 }; 2075 2076 const static uint32_t testdatalen[] = { 2077 4, 2078 8, 2079 3 2080 }; 2081 2082 2083 2084 UCollator *credundant = NULL; 2085 UCollator *cresulting = NULL; 2086 UErrorCode status = U_ZERO_ERROR; 2087 UChar rlz[2048] = { 0 }; 2088 uint32_t rlen = 0; 2089 2090 for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) { 2091 log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]); 2092 rlen = u_unescape(rules[i], rlz, 2048); 2093 2094 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 2095 if(status == U_FILE_ACCESS_ERROR) { 2096 log_data_err("Is your data around?\n"); 2097 return; 2098 } else if(U_FAILURE(status)) { 2099 log_err("Error opening collator\n"); 2100 return; 2101 } 2102 rlen = u_unescape(expectedRules[i], rlz, 2048); 2103 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status); 2104 2105 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */ 2106 /* as a hard error test, but only in information mode */ 2107 testAgainstUCA(cresulting, credundant, "expected", FALSE, &status); 2108 2109 ucol_close(credundant); 2110 ucol_close(cresulting); 2111 2112 log_verbose("testing using data\n"); 2113 2114 genericRulesStarter(rules[i], testdata[i], testdatalen[i]); 2115 } 2116 } 2117 2118 static void TestCase(void) 2119 { 2120 const static UChar gRules[MAX_TOKEN_LEN] = 2121 /*" & 0 < 1,\u2461<a,A"*/ 2122 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 }; 2123 2124 const static UChar testCase[][MAX_TOKEN_LEN] = 2125 { 2126 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000}, 2127 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000}, 2128 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000}, 2129 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000} 2130 }; 2131 2132 const static UCollationResult caseTestResults[][9] = 2133 { 2134 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS }, 2135 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }, 2136 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS }, 2137 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER } 2138 }; 2139 2140 const static UColAttributeValue caseTestAttributes[][2] = 2141 { 2142 { UCOL_LOWER_FIRST, UCOL_OFF}, 2143 { UCOL_UPPER_FIRST, UCOL_OFF}, 2144 { UCOL_LOWER_FIRST, UCOL_ON}, 2145 { UCOL_UPPER_FIRST, UCOL_ON} 2146 }; 2147 int32_t i,j,k; 2148 UErrorCode status = U_ZERO_ERROR; 2149 UCollationElements *iter; 2150 UCollator *myCollation; 2151 myCollation = ucol_open("en_US", &status); 2152 2153 if(U_FAILURE(status)){ 2154 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 2155 return; 2156 } 2157 log_verbose("Testing different case settings\n"); 2158 ucol_setStrength(myCollation, UCOL_TERTIARY); 2159 2160 for(k = 0; k<4; k++) { 2161 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status); 2162 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status); 2163 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]); 2164 for (i = 0; i < 3 ; i++) { 2165 for(j = i+1; j<4; j++) { 2166 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]); 2167 } 2168 } 2169 } 2170 ucol_close(myCollation); 2171 2172 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status); 2173 if(U_FAILURE(status)){ 2174 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 2175 return; 2176 } 2177 log_verbose("Testing different case settings with custom rules\n"); 2178 ucol_setStrength(myCollation, UCOL_TERTIARY); 2179 2180 for(k = 0; k<4; k++) { 2181 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status); 2182 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status); 2183 for (i = 0; i < 3 ; i++) { 2184 for(j = i+1; j<4; j++) { 2185 log_verbose("k:%d, i:%d, j:%d\n", k, i, j); 2186 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]); 2187 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status); 2188 backAndForth(iter); 2189 ucol_closeElements(iter); 2190 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status); 2191 backAndForth(iter); 2192 ucol_closeElements(iter); 2193 } 2194 } 2195 } 2196 ucol_close(myCollation); 2197 { 2198 const static char *lowerFirst[] = { 2199 "h", 2200 "H", 2201 "ch", 2202 "Ch", 2203 "CH", 2204 "cha", 2205 "chA", 2206 "Cha", 2207 "ChA", 2208 "CHa", 2209 "CHA", 2210 "i", 2211 "I" 2212 }; 2213 2214 const static char *upperFirst[] = { 2215 "H", 2216 "h", 2217 "CH", 2218 "Ch", 2219 "ch", 2220 "CHA", 2221 "CHa", 2222 "ChA", 2223 "Cha", 2224 "chA", 2225 "cha", 2226 "I", 2227 "i" 2228 }; 2229 log_verbose("mixed case test\n"); 2230 log_verbose("lower first, case level off\n"); 2231 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); 2232 log_verbose("upper first, case level off\n"); 2233 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); 2234 log_verbose("lower first, case level on\n"); 2235 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); 2236 log_verbose("upper first, case level on\n"); 2237 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); 2238 } 2239 2240 } 2241 2242 static void TestIncrementalNormalize(void) { 2243 2244 /*UChar baseA =0x61;*/ 2245 UChar baseA =0x41; 2246 /* UChar baseB = 0x42;*/ 2247 static const UChar ccMix[] = {0x316, 0x321, 0x300}; 2248 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/ 2249 /* 2250 0x316 is combining grave accent below, cc=220 2251 0x321 is combining palatalized hook below, cc=202 2252 0x300 is combining grave accent, cc=230 2253 */ 2254 2255 #define MAXSLEN 2000 2256 /*int maxSLen = 64000;*/ 2257 int sLen; 2258 int i; 2259 2260 UCollator *coll; 2261 UErrorCode status = U_ZERO_ERROR; 2262 UCollationResult result; 2263 2264 int32_t myQ = getTestOption(QUICK_OPTION); 2265 2266 if(getTestOption(QUICK_OPTION) < 0) { 2267 setTestOption(QUICK_OPTION, 1); 2268 } 2269 2270 { 2271 /* Test 1. Run very long unnormalized strings, to force overflow of*/ 2272 /* most buffers along the way.*/ 2273 UChar strA[MAXSLEN+1]; 2274 UChar strB[MAXSLEN+1]; 2275 2276 coll = ucol_open("en_US", &status); 2277 if(status == U_FILE_ACCESS_ERROR) { 2278 log_data_err("Is your data around?\n"); 2279 return; 2280 } else if(U_FAILURE(status)) { 2281 log_err("Error opening collator\n"); 2282 return; 2283 } 2284 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 2285 2286 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/ 2287 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/ 2288 /*for (sLen = 1000; sLen<1001; sLen++) {*/ 2289 for (sLen = 500; sLen<501; sLen++) { 2290 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/ 2291 strA[0] = baseA; 2292 strB[0] = baseA; 2293 for (i=1; i<=sLen-1; i++) { 2294 strA[i] = ccMix[i % 3]; 2295 strB[sLen-i] = ccMix[i % 3]; 2296 } 2297 strA[sLen] = 0; 2298 strB[sLen] = 0; 2299 2300 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/ 2301 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/ 2302 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/ 2303 doTest(coll, strA, strB, UCOL_EQUAL); 2304 } 2305 } 2306 2307 setTestOption(QUICK_OPTION, myQ); 2308 2309 2310 /* Test 2: Non-normal sequence in a string that extends to the last character*/ 2311 /* of the string. Checks a couple of edge cases.*/ 2312 2313 { 2314 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0}; 2315 static const UChar strB[] = {0x41, 0xc0, 0x316, 0}; 2316 ucol_setStrength(coll, UCOL_TERTIARY); 2317 doTest(coll, strA, strB, UCOL_EQUAL); 2318 } 2319 2320 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/ 2321 2322 { 2323 /* New UCA 3.1.1. 2324 * test below used a code point from Desseret, which sorts differently 2325 * than d800 dc00 2326 */ 2327 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/ 2328 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0}; 2329 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0}; 2330 ucol_setStrength(coll, UCOL_TERTIARY); 2331 doTest(coll, strA, strB, UCOL_GREATER); 2332 } 2333 2334 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/ 2335 2336 { 2337 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00}; 2338 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00}; 2339 char sortKeyA[50]; 2340 char sortKeyAz[50]; 2341 char sortKeyB[50]; 2342 char sortKeyBz[50]; 2343 int r; 2344 2345 /* there used to be -3 here. Hmmmm.... */ 2346 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/ 2347 result = ucol_strcoll(coll, strA, 3, strB, 3); 2348 if (result != UCOL_GREATER) { 2349 log_err("ERROR 1 in test 4\n"); 2350 } 2351 result = ucol_strcoll(coll, strA, -1, strB, -1); 2352 if (result != UCOL_EQUAL) { 2353 log_err("ERROR 2 in test 4\n"); 2354 } 2355 2356 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 2357 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 2358 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 2359 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 2360 2361 r = strcmp(sortKeyA, sortKeyAz); 2362 if (r <= 0) { 2363 log_err("Error 3 in test 4\n"); 2364 } 2365 r = strcmp(sortKeyA, sortKeyB); 2366 if (r <= 0) { 2367 log_err("Error 4 in test 4\n"); 2368 } 2369 r = strcmp(sortKeyAz, sortKeyBz); 2370 if (r != 0) { 2371 log_err("Error 5 in test 4\n"); 2372 } 2373 2374 ucol_setStrength(coll, UCOL_IDENTICAL); 2375 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 2376 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 2377 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 2378 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 2379 2380 r = strcmp(sortKeyA, sortKeyAz); 2381 if (r <= 0) { 2382 log_err("Error 6 in test 4\n"); 2383 } 2384 r = strcmp(sortKeyA, sortKeyB); 2385 if (r <= 0) { 2386 log_err("Error 7 in test 4\n"); 2387 } 2388 r = strcmp(sortKeyAz, sortKeyBz); 2389 if (r != 0) { 2390 log_err("Error 8 in test 4\n"); 2391 } 2392 ucol_setStrength(coll, UCOL_TERTIARY); 2393 } 2394 2395 2396 /* Test 5: Null characters in non-normal source strings.*/ 2397 2398 { 2399 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00}; 2400 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00}; 2401 char sortKeyA[50]; 2402 char sortKeyAz[50]; 2403 char sortKeyB[50]; 2404 char sortKeyBz[50]; 2405 int r; 2406 2407 result = ucol_strcoll(coll, strA, 6, strB, 6); 2408 if (result != UCOL_GREATER) { 2409 log_err("ERROR 1 in test 5\n"); 2410 } 2411 result = ucol_strcoll(coll, strA, -1, strB, -1); 2412 if (result != UCOL_EQUAL) { 2413 log_err("ERROR 2 in test 5\n"); 2414 } 2415 2416 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 2417 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 2418 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 2419 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 2420 2421 r = strcmp(sortKeyA, sortKeyAz); 2422 if (r <= 0) { 2423 log_err("Error 3 in test 5\n"); 2424 } 2425 r = strcmp(sortKeyA, sortKeyB); 2426 if (r <= 0) { 2427 log_err("Error 4 in test 5\n"); 2428 } 2429 r = strcmp(sortKeyAz, sortKeyBz); 2430 if (r != 0) { 2431 log_err("Error 5 in test 5\n"); 2432 } 2433 2434 ucol_setStrength(coll, UCOL_IDENTICAL); 2435 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 2436 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 2437 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 2438 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 2439 2440 r = strcmp(sortKeyA, sortKeyAz); 2441 if (r <= 0) { 2442 log_err("Error 6 in test 5\n"); 2443 } 2444 r = strcmp(sortKeyA, sortKeyB); 2445 if (r <= 0) { 2446 log_err("Error 7 in test 5\n"); 2447 } 2448 r = strcmp(sortKeyAz, sortKeyBz); 2449 if (r != 0) { 2450 log_err("Error 8 in test 5\n"); 2451 } 2452 ucol_setStrength(coll, UCOL_TERTIARY); 2453 } 2454 2455 2456 /* Test 6: Null character as base of a non-normal combining sequence.*/ 2457 2458 { 2459 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00}; 2460 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00}; 2461 2462 result = ucol_strcoll(coll, strA, 5, strB, 5); 2463 if (result != UCOL_LESS) { 2464 log_err("Error 1 in test 6\n"); 2465 } 2466 result = ucol_strcoll(coll, strA, -1, strB, -1); 2467 if (result != UCOL_EQUAL) { 2468 log_err("Error 2 in test 6\n"); 2469 } 2470 } 2471 2472 ucol_close(coll); 2473 } 2474 2475 2476 2477 #if 0 2478 static void TestGetCaseBit(void) { 2479 static const char *caseBitData[] = { 2480 "a", "A", "ch", "Ch", "CH", 2481 "\\uFF9E", "\\u0009" 2482 }; 2483 2484 static const uint8_t results[] = { 2485 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE, 2486 UCOL_UPPER_CASE, UCOL_LOWER_CASE 2487 }; 2488 2489 uint32_t i, blen = 0; 2490 UChar b[256] = {0}; 2491 UErrorCode status = U_ZERO_ERROR; 2492 UCollator *UCA = ucol_open("", &status); 2493 uint8_t res = 0; 2494 2495 for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) { 2496 blen = u_unescape(caseBitData[i], b, 256); 2497 res = ucol_uprv_getCaseBits(UCA, b, blen, &status); 2498 if(results[i] != res) { 2499 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]); 2500 } 2501 } 2502 } 2503 #endif 2504 2505 static void TestHangulTailoring(void) { 2506 static const char *koreanData[] = { 2507 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475", 2508 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef", 2509 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888", 2510 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5", 2511 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E", 2512 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C" 2513 }; 2514 2515 const char *rules = 2516 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 " 2517 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef " 2518 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 " 2519 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 " 2520 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E " 2521 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C"; 2522 2523 2524 UErrorCode status = U_ZERO_ERROR; 2525 UChar rlz[2048] = { 0 }; 2526 uint32_t rlen = u_unescape(rules, rlz, 2048); 2527 2528 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); 2529 if(status == U_FILE_ACCESS_ERROR) { 2530 log_data_err("Is your data around?\n"); 2531 return; 2532 } else if(U_FAILURE(status)) { 2533 log_err("Error opening collator\n"); 2534 return; 2535 } 2536 2537 log_verbose("Using start of korean rules\n"); 2538 2539 if(U_SUCCESS(status)) { 2540 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0])); 2541 } else { 2542 log_err("Unable to open collator with rules %s\n", rules); 2543 } 2544 2545 log_verbose("Setting jamoSpecial to TRUE and testing once more\n"); 2546 ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home */ 2547 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0])); 2548 2549 ucol_close(coll); 2550 2551 log_verbose("Using ko__LOTUS locale\n"); 2552 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0])); 2553 } 2554 2555 static void TestCompressOverlap(void) { 2556 UChar secstr[150]; 2557 UChar tertstr[150]; 2558 UErrorCode status = U_ZERO_ERROR; 2559 UCollator *coll; 2560 char result[200]; 2561 uint32_t resultlen; 2562 int count = 0; 2563 char *tempptr; 2564 2565 coll = ucol_open("", &status); 2566 2567 if (U_FAILURE(status)) { 2568 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status)); 2569 return; 2570 } 2571 while (count < 149) { 2572 secstr[count] = 0x0020; /* [06, 05, 05] */ 2573 tertstr[count] = 0x0020; 2574 count ++; 2575 } 2576 2577 /* top down compression ----------------------------------- */ 2578 secstr[count] = 0x0332; /* [, 87, 05] */ 2579 tertstr[count] = 0x3000; /* [06, 05, 07] */ 2580 2581 /* no compression secstr should have 150 secondary bytes, tertstr should 2582 have 150 tertiary bytes. 2583 with correct overlapping compression, secstr should have 4 secondary 2584 bytes, tertstr should have > 2 tertiary bytes */ 2585 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250); 2586 tempptr = uprv_strchr(result, 1) + 1; 2587 while (*(tempptr + 1) != 1) { 2588 /* the last secondary collation element is not checked since it is not 2589 part of the compression */ 2590 if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) { 2591 log_err("Secondary compression overlapped\n"); 2592 } 2593 tempptr ++; 2594 } 2595 2596 /* tertiary top/bottom/common for en_US is similar to the secondary 2597 top/bottom/common */ 2598 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250); 2599 tempptr = uprv_strrchr(result, 1) + 1; 2600 while (*(tempptr + 1) != 0) { 2601 /* the last secondary collation element is not checked since it is not 2602 part of the compression */ 2603 if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) { 2604 log_err("Tertiary compression overlapped\n"); 2605 } 2606 tempptr ++; 2607 } 2608 2609 /* bottom up compression ------------------------------------- */ 2610 secstr[count] = 0; 2611 tertstr[count] = 0; 2612 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250); 2613 tempptr = uprv_strchr(result, 1) + 1; 2614 while (*(tempptr + 1) != 1) { 2615 /* the last secondary collation element is not checked since it is not 2616 part of the compression */ 2617 if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) { 2618 log_err("Secondary compression overlapped\n"); 2619 } 2620 tempptr ++; 2621 } 2622 2623 /* tertiary top/bottom/common for en_US is similar to the secondary 2624 top/bottom/common */ 2625 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250); 2626 tempptr = uprv_strrchr(result, 1) + 1; 2627 while (*(tempptr + 1) != 0) { 2628 /* the last secondary collation element is not checked since it is not 2629 part of the compression */ 2630 if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) { 2631 log_err("Tertiary compression overlapped\n"); 2632 } 2633 tempptr ++; 2634 } 2635 2636 ucol_close(coll); 2637 } 2638 2639 static void TestCyrillicTailoring(void) { 2640 static const char *test[] = { 2641 "\\u0410b", 2642 "\\u0410\\u0306a", 2643 "\\u04d0A" 2644 }; 2645 2646 /* Russian overrides contractions, so this test is not valid anymore */ 2647 /*genericLocaleStarter("ru", test, 3);*/ 2648 2649 genericLocaleStarter("root", test, 3); 2650 genericRulesStarter("&\\u0410 = \\u0410", test, 3); 2651 genericRulesStarter("&Z < \\u0410", test, 3); 2652 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3); 2653 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3); 2654 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3); 2655 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3); 2656 } 2657 2658 static void TestSuppressContractions(void) { 2659 2660 static const char *testNoCont2[] = { 2661 "\\u0410\\u0302a", 2662 "\\u0410\\u0306b", 2663 "\\u0410c" 2664 }; 2665 static const char *testNoCont[] = { 2666 "a\\u0410", 2667 "A\\u0410\\u0306", 2668 "\\uFF21\\u0410\\u0302" 2669 }; 2670 2671 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3); 2672 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3); 2673 } 2674 2675 static void TestContraction(void) { 2676 const static char *testrules[] = { 2677 "&A = AB / B", 2678 "&A = A\\u0306/\\u0306", 2679 "&c = ch / h" 2680 }; 2681 const static UChar testdata[][2] = { 2682 {0x0041 /* 'A' */, 0x0042 /* 'B' */}, 2683 {0x0041 /* 'A' */, 0x0306 /* combining breve */}, 2684 {0x0063 /* 'c' */, 0x0068 /* 'h' */} 2685 }; 2686 const static UChar testdata2[][2] = { 2687 {0x0063 /* 'c' */, 0x0067 /* 'g' */}, 2688 {0x0063 /* 'c' */, 0x0068 /* 'h' */}, 2689 {0x0063 /* 'c' */, 0x006C /* 'l' */} 2690 }; 2691 const static char *testrules3[] = { 2692 "&z < xyz &xyzw << B", 2693 "&z < xyz &xyz << B / w", 2694 "&z < ch &achm << B", 2695 "&z < ch &a << B / chm", 2696 "&\\ud800\\udc00w << B", 2697 "&\\ud800\\udc00 << B / w", 2698 "&a\\ud800\\udc00m << B", 2699 "&a << B / \\ud800\\udc00m", 2700 }; 2701 2702 UErrorCode status = U_ZERO_ERROR; 2703 UCollator *coll; 2704 UChar rule[256] = {0}; 2705 uint32_t rlen = 0; 2706 int i; 2707 2708 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) { 2709 UCollationElements *iter1; 2710 int j = 0; 2711 log_verbose("Rule %s for testing\n", testrules[i]); 2712 rlen = u_unescape(testrules[i], rule, 32); 2713 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2714 if (U_FAILURE(status)) { 2715 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status)); 2716 return; 2717 } 2718 iter1 = ucol_openElements(coll, testdata[i], 2, &status); 2719 if (U_FAILURE(status)) { 2720 log_err("Collation iterator creation failed\n"); 2721 return; 2722 } 2723 while (j < 2) { 2724 UCollationElements *iter2 = ucol_openElements(coll, 2725 &(testdata[i][j]), 2726 1, &status); 2727 uint32_t ce; 2728 if (U_FAILURE(status)) { 2729 log_err("Collation iterator creation failed\n"); 2730 return; 2731 } 2732 ce = ucol_next(iter2, &status); 2733 while (ce != UCOL_NULLORDER) { 2734 if ((uint32_t)ucol_next(iter1, &status) != ce) { 2735 log_err("Collation elements in contraction split does not match\n"); 2736 return; 2737 } 2738 ce = ucol_next(iter2, &status); 2739 } 2740 j ++; 2741 ucol_closeElements(iter2); 2742 } 2743 if (ucol_next(iter1, &status) != UCOL_NULLORDER) { 2744 log_err("Collation elements not exhausted\n"); 2745 return; 2746 } 2747 ucol_closeElements(iter1); 2748 ucol_close(coll); 2749 } 2750 2751 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256); 2752 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2753 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) { 2754 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n", 2755 testdata2[0][0], testdata2[0][1], testdata2[1][0], 2756 testdata2[1][1]); 2757 return; 2758 } 2759 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) { 2760 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n", 2761 testdata2[1][0], testdata2[1][1], testdata2[2][0], 2762 testdata2[2][1]); 2763 return; 2764 } 2765 ucol_close(coll); 2766 2767 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) { 2768 UCollator *coll1, 2769 *coll2; 2770 UCollationElements *iter1, 2771 *iter2; 2772 UChar ch = 0x0042 /* 'B' */; 2773 uint32_t ce; 2774 rlen = u_unescape(testrules3[i], rule, 32); 2775 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2776 rlen = u_unescape(testrules3[i + 1], rule, 32); 2777 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2778 if (U_FAILURE(status)) { 2779 log_err("Collator creation failed %s\n", testrules[i]); 2780 return; 2781 } 2782 iter1 = ucol_openElements(coll1, &ch, 1, &status); 2783 iter2 = ucol_openElements(coll2, &ch, 1, &status); 2784 if (U_FAILURE(status)) { 2785 log_err("Collation iterator creation failed\n"); 2786 return; 2787 } 2788 ce = ucol_next(iter1, &status); 2789 if (U_FAILURE(status)) { 2790 log_err("Retrieving ces failed\n"); 2791 return; 2792 } 2793 while (ce != UCOL_NULLORDER) { 2794 if (ce != (uint32_t)ucol_next(iter2, &status)) { 2795 log_err("CEs does not match\n"); 2796 return; 2797 } 2798 ce = ucol_next(iter1, &status); 2799 if (U_FAILURE(status)) { 2800 log_err("Retrieving ces failed\n"); 2801 return; 2802 } 2803 } 2804 if (ucol_next(iter2, &status) != UCOL_NULLORDER) { 2805 log_err("CEs not exhausted\n"); 2806 return; 2807 } 2808 ucol_closeElements(iter1); 2809 ucol_closeElements(iter2); 2810 ucol_close(coll1); 2811 ucol_close(coll2); 2812 } 2813 } 2814 2815 static void TestExpansion(void) { 2816 const static char *testrules[] = { 2817 "&J << K / B & K << M", 2818 "&J << K / B << M" 2819 }; 2820 const static UChar testdata[][3] = { 2821 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0}, 2822 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0}, 2823 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0}, 2824 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0}, 2825 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0}, 2826 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0} 2827 }; 2828 2829 UErrorCode status = U_ZERO_ERROR; 2830 UCollator *coll; 2831 UChar rule[256] = {0}; 2832 uint32_t rlen = 0; 2833 int i; 2834 2835 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) { 2836 int j = 0; 2837 log_verbose("Rule %s for testing\n", testrules[i]); 2838 rlen = u_unescape(testrules[i], rule, 32); 2839 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2840 if (U_FAILURE(status)) { 2841 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status)); 2842 return; 2843 } 2844 2845 for (j = 0; j < 5; j ++) { 2846 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS); 2847 } 2848 ucol_close(coll); 2849 } 2850 } 2851 2852 #if 0 2853 /* this test tests the current limitations of the engine */ 2854 /* it always fail, so it is disabled by default */ 2855 static void TestLimitations(void) { 2856 /* recursive expansions */ 2857 { 2858 static const char *rule = "&a=b/c&d=c/e"; 2859 static const char *tlimit01[] = {"add","b","adf"}; 2860 static const char *tlimit02[] = {"aa","b","af"}; 2861 log_verbose("recursive expansions\n"); 2862 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0])); 2863 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0])); 2864 } 2865 /* contractions spanning expansions */ 2866 { 2867 static const char *rule = "&a<<<c/e&g<<<eh"; 2868 static const char *tlimit01[] = {"ad","c","af","f","ch","h"}; 2869 static const char *tlimit02[] = {"ad","c","ch","af","f","h"}; 2870 log_verbose("contractions spanning expansions\n"); 2871 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0])); 2872 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0])); 2873 } 2874 /* normalization: nulls in contractions */ 2875 { 2876 static const char *rule = "&a<<<\\u0000\\u0302"; 2877 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"}; 2878 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"}; 2879 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE }; 2880 static const UColAttributeValue valOn[] = { UCOL_ON }; 2881 static const UColAttributeValue valOff[] = { UCOL_OFF }; 2882 2883 log_verbose("NULL in contractions\n"); 2884 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1); 2885 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1); 2886 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1); 2887 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1); 2888 2889 } 2890 /* normalization: contractions spanning normalization */ 2891 { 2892 static const char *rule = "&a<<<\\u0000\\u0302"; 2893 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"}; 2894 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"}; 2895 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE }; 2896 static const UColAttributeValue valOn[] = { UCOL_ON }; 2897 static const UColAttributeValue valOff[] = { UCOL_OFF }; 2898 2899 log_verbose("contractions spanning normalization\n"); 2900 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1); 2901 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1); 2902 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1); 2903 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1); 2904 2905 } 2906 /* variable top: */ 2907 { 2908 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/ 2909 static const char *rule = "&\\u2010<x<[variable top]=z"; 2910 /*static const char *rule3 = "&' '<x<[variable top]=z";*/ 2911 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" }; 2912 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"}; 2913 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" }; 2914 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH }; 2915 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY }; 2916 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY }; 2917 2918 log_verbose("variable top\n"); 2919 genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2920 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2921 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2922 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0])); 2923 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0])); 2924 2925 } 2926 /* case level */ 2927 { 2928 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH"; 2929 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"}; 2930 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"}; 2931 static const UColAttribute att[] = { UCOL_CASE_FIRST}; 2932 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST}; 2933 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/ 2934 log_verbose("case level\n"); 2935 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2936 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2937 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/ 2938 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/ 2939 } 2940 2941 } 2942 #endif 2943 2944 static void TestBocsuCoverage(void) { 2945 UErrorCode status = U_ZERO_ERROR; 2946 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041"; 2947 UChar test[256] = {0}; 2948 uint32_t tlen = u_unescape(testString, test, 32); 2949 uint8_t key[256] = {0}; 2950 uint32_t klen = 0; 2951 2952 UCollator *coll = ucol_open("", &status); 2953 if(U_SUCCESS(status)) { 2954 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status); 2955 2956 klen = ucol_getSortKey(coll, test, tlen, key, 256); 2957 2958 ucol_close(coll); 2959 } else { 2960 log_data_err("Couldn't open UCA\n"); 2961 } 2962 } 2963 2964 static void TestVariableTopSetting(void) { 2965 UErrorCode status = U_ZERO_ERROR; 2966 const UChar *current = NULL; 2967 uint32_t varTopOriginal = 0, varTop1, varTop2; 2968 UCollator *coll = ucol_open("", &status); 2969 if(U_SUCCESS(status)) { 2970 2971 uint32_t strength = 0; 2972 uint16_t specs = 0; 2973 uint32_t chOffset = 0; 2974 uint32_t chLen = 0; 2975 uint32_t exOffset = 0; 2976 uint32_t exLen = 0; 2977 uint32_t oldChOffset = 0; 2978 uint32_t oldChLen = 0; 2979 uint32_t oldExOffset = 0; 2980 uint32_t oldExLen = 0; 2981 uint32_t prefixOffset = 0; 2982 uint32_t prefixLen = 0; 2983 2984 UBool startOfRules = TRUE; 2985 UColTokenParser src; 2986 UColOptionSet opts; 2987 2988 UChar *rulesCopy = NULL; 2989 uint32_t rulesLen; 2990 2991 UCollationResult result; 2992 2993 UChar first[256] = { 0 }; 2994 UChar second[256] = { 0 }; 2995 UParseError parseError; 2996 int32_t myQ = getTestOption(QUICK_OPTION); 2997 2998 uprv_memset(&src, 0, sizeof(UColTokenParser)); 2999 3000 src.opts = &opts; 3001 3002 if(getTestOption(QUICK_OPTION) <= 0) { 3003 setTestOption(QUICK_OPTION, 1); 3004 } 3005 3006 /* this test will fail when normalization is turned on */ 3007 /* therefore we always turn off exhaustive mode for it */ 3008 { /* QUICK > 0*/ 3009 log_verbose("Slide variable top over UCARules\n"); 3010 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0); 3011 rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); 3012 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE); 3013 3014 if(U_SUCCESS(status) && rulesLen > 0) { 3015 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 3016 src.current = src.source = rulesCopy; 3017 src.end = rulesCopy+rulesLen; 3018 src.extraCurrent = src.end; 3019 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 3020 3021 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to 3022 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ 3023 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) { 3024 strength = src.parsedToken.strength; 3025 chOffset = src.parsedToken.charsOffset; 3026 chLen = src.parsedToken.charsLen; 3027 exOffset = src.parsedToken.extensionOffset; 3028 exLen = src.parsedToken.extensionLen; 3029 prefixOffset = src.parsedToken.prefixOffset; 3030 prefixLen = src.parsedToken.prefixLen; 3031 specs = src.parsedToken.flags; 3032 3033 startOfRules = FALSE; 3034 { 3035 log_verbose("%04X %d ", *(src.source+chOffset), chLen); 3036 } 3037 if(strength == UCOL_PRIMARY) { 3038 status = U_ZERO_ERROR; 3039 varTopOriginal = ucol_getVariableTop(coll, &status); 3040 varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status); 3041 if(U_FAILURE(status)) { 3042 char buffer[256]; 3043 char *buf = buffer; 3044 uint32_t i = 0, j; 3045 uint32_t CE = UCOL_NO_MORE_CES; 3046 3047 /* before we start screaming, let's see if there is a problem with the rules */ 3048 UErrorCode collIterateStatus = U_ZERO_ERROR; 3049 collIterate *s = uprv_new_collIterate(&collIterateStatus); 3050 uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus); 3051 3052 CE = ucol_getNextCE(coll, s, &status); 3053 3054 for(i = 0; i < oldChLen; i++) { 3055 j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i)); 3056 buf += j; 3057 } 3058 if(status == U_PRIMARY_TOO_LONG_ERROR) { 3059 log_verbose("= Expected failure for %s =", buffer); 3060 } else { 3061 if(uprv_collIterateAtEnd(s)) { 3062 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n", 3063 oldChOffset, u_errorName(status), buffer); 3064 } else { 3065 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n", 3066 buffer); 3067 } 3068 } 3069 uprv_delete_collIterate(s); 3070 } 3071 varTop2 = ucol_getVariableTop(coll, &status); 3072 if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) { 3073 log_err("cannot retrieve set varTop value!\n"); 3074 continue; 3075 } 3076 3077 if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) { 3078 3079 u_strncpy(first, src.source+oldChOffset, oldChLen); 3080 u_strncpy(first+oldChLen, src.source+chOffset, chLen); 3081 u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen); 3082 first[2*oldChLen+chLen] = 0; 3083 3084 if(oldExLen == 0) { 3085 u_strncpy(second, src.source+chOffset, chLen); 3086 second[chLen] = 0; 3087 } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */ 3088 u_strncpy(second, src.source+oldExOffset, oldExLen); 3089 u_strncpy(second+oldChLen, src.source+chOffset, chLen); 3090 u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen); 3091 second[2*oldExLen+chLen] = 0; 3092 } 3093 result = ucol_strcoll(coll, first, -1, second, -1); 3094 if(result == UCOL_EQUAL) { 3095 doTest(coll, first, second, UCOL_EQUAL); 3096 } else { 3097 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset)); 3098 } 3099 } 3100 } 3101 if(strength != UCOL_TOK_RESET) { 3102 oldChOffset = chOffset; 3103 oldChLen = chLen; 3104 oldExOffset = exOffset; 3105 oldExLen = exLen; 3106 } 3107 } 3108 status = U_ZERO_ERROR; 3109 } 3110 else { 3111 log_err("Unexpected failure getting rules %s\n", u_errorName(status)); 3112 return; 3113 } 3114 if (U_FAILURE(status)) { 3115 log_err("Error parsing rules %s\n", u_errorName(status)); 3116 return; 3117 } 3118 status = U_ZERO_ERROR; 3119 } 3120 3121 setTestOption(QUICK_OPTION, myQ); 3122 3123 log_verbose("Testing setting variable top to contractions\n"); 3124 { 3125 UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos); 3126 int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth; 3127 while(*conts != 0) { 3128 /* 3129 * A continuation is NUL-terminated and NUL-padded 3130 * except if it has the maximum length. 3131 */ 3132 int32_t contractionLength = maxUCAContractionLength; 3133 while(contractionLength > 0 && conts[contractionLength - 1] == 0) { 3134 --contractionLength; 3135 } 3136 if(*(conts+1)==0) { /* pre-context */ 3137 varTop1 = ucol_setVariableTop(coll, conts, 1, &status); 3138 } else { 3139 varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status); 3140 } 3141 if(U_FAILURE(status)) { 3142 if(status == U_PRIMARY_TOO_LONG_ERROR) { 3143 /* ucol_setVariableTop() is documented to not accept 3-byte primaries, 3144 * therefore it is not an error when it complains about them. */ 3145 log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n", 3146 *conts, *(conts+1), *(conts+2)); 3147 } else { 3148 log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n", 3149 *conts, *(conts+1), *(conts+2), u_errorName(status)); 3150 } 3151 status = U_ZERO_ERROR; 3152 } 3153 conts+=maxUCAContractionLength; 3154 } 3155 3156 status = U_ZERO_ERROR; 3157 3158 first[0] = 0x0040; 3159 first[1] = 0x0050; 3160 first[2] = 0x0000; 3161 3162 ucol_setVariableTop(coll, first, -1, &status); 3163 3164 if(U_SUCCESS(status)) { 3165 log_err("Invalid contraction succeded in setting variable top!\n"); 3166 } 3167 3168 } 3169 3170 log_verbose("Test restoring variable top\n"); 3171 3172 status = U_ZERO_ERROR; 3173 ucol_restoreVariableTop(coll, varTopOriginal, &status); 3174 if(varTopOriginal != ucol_getVariableTop(coll, &status)) { 3175 log_err("Couldn't restore old variable top\n"); 3176 } 3177 3178 log_verbose("Testing calling with error set\n"); 3179 3180 status = U_INTERNAL_PROGRAM_ERROR; 3181 varTop1 = ucol_setVariableTop(coll, first, 1, &status); 3182 varTop2 = ucol_getVariableTop(coll, &status); 3183 ucol_restoreVariableTop(coll, varTop2, &status); 3184 varTop1 = ucol_setVariableTop(NULL, first, 1, &status); 3185 varTop2 = ucol_getVariableTop(NULL, &status); 3186 ucol_restoreVariableTop(NULL, varTop2, &status); 3187 if(status != U_INTERNAL_PROGRAM_ERROR) { 3188 log_err("Bad reaction to passed error!\n"); 3189 } 3190 uprv_free(src.source); 3191 ucol_close(coll); 3192 } else { 3193 log_data_err("Couldn't open UCA collator\n"); 3194 } 3195 3196 } 3197 3198 static void TestNonChars(void) { 3199 static const char *test[] = { 3200 "\\u0000", /* ignorable */ 3201 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */ 3202 "\\uFDD0", "\\uFDEF", 3203 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */ 3204 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */ 3205 "\\U0003FFFE", "\\U0003FFFF", 3206 "\\U0004FFFE", "\\U0004FFFF", 3207 "\\U0005FFFE", "\\U0005FFFF", 3208 "\\U0006FFFE", "\\U0006FFFF", 3209 "\\U0007FFFE", "\\U0007FFFF", 3210 "\\U0008FFFE", "\\U0008FFFF", 3211 "\\U0009FFFE", "\\U0009FFFF", 3212 "\\U000AFFFE", "\\U000AFFFF", 3213 "\\U000BFFFE", "\\U000BFFFF", 3214 "\\U000CFFFE", "\\U000CFFFF", 3215 "\\U000DFFFE", "\\U000DFFFF", 3216 "\\U000EFFFE", "\\U000EFFFF", 3217 "\\U000FFFFE", "\\U000FFFFF", 3218 "\\U0010FFFE", "\\U0010FFFF", 3219 "\\uFFFF" /* special character with maximum primary weight */ 3220 }; 3221 UErrorCode status = U_ZERO_ERROR; 3222 UCollator *coll = ucol_open("en_US", &status); 3223 3224 log_verbose("Test non characters\n"); 3225 3226 if(U_SUCCESS(status)) { 3227 genericOrderingTestWithResult(coll, test, 35, UCOL_LESS); 3228 } else { 3229 log_err_status(status, "Unable to open collator\n"); 3230 } 3231 3232 ucol_close(coll); 3233 } 3234 3235 static void TestExtremeCompression(void) { 3236 static char *test[4]; 3237 int32_t j = 0, i = 0; 3238 3239 for(i = 0; i<4; i++) { 3240 test[i] = (char *)malloc(2048*sizeof(char)); 3241 } 3242 3243 for(j = 20; j < 500; j++) { 3244 for(i = 0; i<4; i++) { 3245 uprv_memset(test[i], 'a', (j-1)*sizeof(char)); 3246 test[i][j-1] = (char)('a'+i); 3247 test[i][j] = 0; 3248 } 3249 genericLocaleStarter("en_US", (const char **)test, 4); 3250 } 3251 3252 3253 for(i = 0; i<4; i++) { 3254 free(test[i]); 3255 } 3256 } 3257 3258 #if 0 3259 static void TestExtremeCompression(void) { 3260 static char *test[4]; 3261 int32_t j = 0, i = 0; 3262 UErrorCode status = U_ZERO_ERROR; 3263 UCollator *coll = ucol_open("en_US", status); 3264 for(i = 0; i<4; i++) { 3265 test[i] = (char *)malloc(2048*sizeof(char)); 3266 } 3267 for(j = 10; j < 2048; j++) { 3268 for(i = 0; i<4; i++) { 3269 uprv_memset(test[i], 'a', (j-2)*sizeof(char)); 3270 test[i][j-1] = (char)('a'+i); 3271 test[i][j] = 0; 3272 } 3273 } 3274 genericLocaleStarter("en_US", (const char **)test, 4); 3275 3276 for(j = 10; j < 2048; j++) { 3277 for(i = 0; i<1; i++) { 3278 uprv_memset(test[i], 'a', (j-1)*sizeof(char)); 3279 test[i][j] = 0; 3280 } 3281 } 3282 for(i = 0; i<4; i++) { 3283 free(test[i]); 3284 } 3285 } 3286 #endif 3287 3288 static void TestSurrogates(void) { 3289 static const char *test[] = { 3290 "z","\\ud900\\udc25", "\\ud805\\udc50", 3291 "\\ud800\\udc00y", "\\ud800\\udc00r", 3292 "\\ud800\\udc00f", "\\ud800\\udc00", 3293 "\\ud800\\udc00c", "\\ud800\\udc00b", 3294 "\\ud800\\udc00fa", "\\ud800\\udc00fb", 3295 "\\ud800\\udc00a", 3296 "c", "b" 3297 }; 3298 3299 static const char *rule = 3300 "&z < \\ud900\\udc25 < \\ud805\\udc50" 3301 "< \\ud800\\udc00y < \\ud800\\udc00r" 3302 "< \\ud800\\udc00f << \\ud800\\udc00" 3303 "< \\ud800\\udc00fa << \\ud800\\udc00fb" 3304 "< \\ud800\\udc00a < c < b" ; 3305 3306 genericRulesStarter(rule, test, 14); 3307 } 3308 3309 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */ 3310 static void TestPrefix(void) { 3311 uint32_t i; 3312 3313 static const struct { 3314 const char *rules; 3315 const char *data[50]; 3316 const uint32_t len; 3317 } tests[] = { 3318 { "&z <<< z|a", 3319 {"zz", "za"}, 2 }, 3320 3321 { "&z <<< z| a", 3322 {"zz", "za"}, 2 }, 3323 { "[strength I]" 3324 "&a=\\ud900\\udc25" 3325 "&z<<<\\ud900\\udc25|a", 3326 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 }, 3327 }; 3328 3329 3330 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3331 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 3332 } 3333 } 3334 3335 /* This test uses data suplied by Masashiko Maedera to test the implementation */ 3336 /* JIS X 4061 collation order implementation */ 3337 static void TestNewJapanese(void) { 3338 3339 static const char * const test1[] = { 3340 "\\u30b7\\u30e3\\u30fc\\u30ec", 3341 "\\u30b7\\u30e3\\u30a4", 3342 "\\u30b7\\u30e4\\u30a3", 3343 "\\u30b7\\u30e3\\u30ec", 3344 "\\u3061\\u3087\\u3053", 3345 "\\u3061\\u3088\\u3053", 3346 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8", 3347 "\\u3066\\u30fc\\u305f", 3348 "\\u30c6\\u30fc\\u30bf", 3349 "\\u30c6\\u30a7\\u30bf", 3350 "\\u3066\\u3048\\u305f", 3351 "\\u3067\\u30fc\\u305f", 3352 "\\u30c7\\u30fc\\u30bf", 3353 "\\u30c7\\u30a7\\u30bf", 3354 "\\u3067\\u3048\\u305f", 3355 "\\u3066\\u30fc\\u305f\\u30fc", 3356 "\\u30c6\\u30fc\\u30bf\\u30a1", 3357 "\\u30c6\\u30a7\\u30bf\\u30fc", 3358 "\\u3066\\u3047\\u305f\\u3041", 3359 "\\u3066\\u3048\\u305f\\u30fc", 3360 "\\u3067\\u30fc\\u305f\\u30fc", 3361 "\\u30c7\\u30fc\\u30bf\\u30a1", 3362 "\\u3067\\u30a7\\u305f\\u30a1", 3363 "\\u30c7\\u3047\\u30bf\\u3041", 3364 "\\u30c7\\u30a8\\u30bf\\u30a2", 3365 "\\u3072\\u3086", 3366 "\\u3073\\u3085\\u3042", 3367 "\\u3074\\u3085\\u3042", 3368 "\\u3073\\u3085\\u3042\\u30fc", 3369 "\\u30d3\\u30e5\\u30a2\\u30fc", 3370 "\\u3074\\u3085\\u3042\\u30fc", 3371 "\\u30d4\\u30e5\\u30a2\\u30fc", 3372 "\\u30d2\\u30e5\\u30a6", 3373 "\\u30d2\\u30e6\\u30a6", 3374 "\\u30d4\\u30e5\\u30a6\\u30a2", 3375 "\\u3073\\u3085\\u30fc\\u3042\\u30fc", 3376 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc", 3377 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc", 3378 "\\u3072\\u3085\\u3093", 3379 "\\u3074\\u3085\\u3093", 3380 "\\u3075\\u30fc\\u308a", 3381 "\\u30d5\\u30fc\\u30ea", 3382 "\\u3075\\u3045\\u308a", 3383 "\\u3075\\u30a5\\u308a", 3384 "\\u3075\\u30a5\\u30ea", 3385 "\\u30d5\\u30a6\\u30ea", 3386 "\\u3076\\u30fc\\u308a", 3387 "\\u30d6\\u30fc\\u30ea", 3388 "\\u3076\\u3045\\u308a", 3389 "\\u30d6\\u30a5\\u308a", 3390 "\\u3077\\u3046\\u308a", 3391 "\\u30d7\\u30a6\\u30ea", 3392 "\\u3075\\u30fc\\u308a\\u30fc", 3393 "\\u30d5\\u30a5\\u30ea\\u30fc", 3394 "\\u3075\\u30a5\\u308a\\u30a3", 3395 "\\u30d5\\u3045\\u308a\\u3043", 3396 "\\u30d5\\u30a6\\u30ea\\u30fc", 3397 "\\u3075\\u3046\\u308a\\u3043", 3398 "\\u30d6\\u30a6\\u30ea\\u30a4", 3399 "\\u3077\\u30fc\\u308a\\u30fc", 3400 "\\u3077\\u30a5\\u308a\\u30a4", 3401 "\\u3077\\u3046\\u308a\\u30fc", 3402 "\\u30d7\\u30a6\\u30ea\\u30a4", 3403 "\\u30d5\\u30fd", 3404 "\\u3075\\u309e", 3405 "\\u3076\\u309d", 3406 "\\u3076\\u3075", 3407 "\\u3076\\u30d5", 3408 "\\u30d6\\u3075", 3409 "\\u30d6\\u30d5", 3410 "\\u3076\\u309e", 3411 "\\u3076\\u3077", 3412 "\\u30d6\\u3077", 3413 "\\u3077\\u309d", 3414 "\\u30d7\\u30fd", 3415 "\\u3077\\u3075", 3416 }; 3417 3418 static const char *test2[] = { 3419 "\\u306f\\u309d", /* H\\u309d */ 3420 "\\u30cf\\u30fd", /* K\\u30fd */ 3421 "\\u306f\\u306f", /* HH */ 3422 "\\u306f\\u30cf", /* HK */ 3423 "\\u30cf\\u30cf", /* KK */ 3424 "\\u306f\\u309e", /* H\\u309e */ 3425 "\\u30cf\\u30fe", /* K\\u30fe */ 3426 "\\u306f\\u3070", /* HH\\u309b */ 3427 "\\u30cf\\u30d0", /* KK\\u309b */ 3428 "\\u306f\\u3071", /* HH\\u309c */ 3429 "\\u30cf\\u3071", /* KH\\u309c */ 3430 "\\u30cf\\u30d1", /* KK\\u309c */ 3431 "\\u3070\\u309d", /* H\\u309b\\u309d */ 3432 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */ 3433 "\\u3070\\u306f", /* H\\u309bH */ 3434 "\\u30d0\\u30cf", /* K\\u309bK */ 3435 "\\u3070\\u309e", /* H\\u309b\\u309e */ 3436 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */ 3437 "\\u3070\\u3070", /* H\\u309bH\\u309b */ 3438 "\\u30d0\\u3070", /* K\\u309bH\\u309b */ 3439 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */ 3440 "\\u3070\\u3071", /* H\\u309bH\\u309c */ 3441 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */ 3442 "\\u3071\\u309d", /* H\\u309c\\u309d */ 3443 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */ 3444 "\\u3071\\u306f", /* H\\u309cH */ 3445 "\\u30d1\\u30cf", /* K\\u309cK */ 3446 "\\u3071\\u3070", /* H\\u309cH\\u309b */ 3447 "\\u3071\\u30d0", /* H\\u309cK\\u309b */ 3448 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */ 3449 "\\u3071\\u3071", /* H\\u309cH\\u309c */ 3450 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */ 3451 }; 3452 /* 3453 static const char *test3[] = { 3454 "\\u221er\\u221e", 3455 "\\u221eR#", 3456 "\\u221et\\u221e", 3457 "#r\\u221e", 3458 "#R#", 3459 "#t%", 3460 "#T%", 3461 "8t\\u221e", 3462 "8T\\u221e", 3463 "8t#", 3464 "8T#", 3465 "8t%", 3466 "8T%", 3467 "8t8", 3468 "8T8", 3469 "\\u03c9r\\u221e", 3470 "\\u03a9R%", 3471 "rr\\u221e", 3472 "rR\\u221e", 3473 "Rr\\u221e", 3474 "RR\\u221e", 3475 "RT%", 3476 "rt8", 3477 "tr\\u221e", 3478 "tr8", 3479 "TR8", 3480 "tt8", 3481 "\\u30b7\\u30e3\\u30fc\\u30ec", 3482 }; 3483 */ 3484 static const UColAttribute att[] = { UCOL_STRENGTH }; 3485 static const UColAttributeValue val[] = { UCOL_QUATERNARY }; 3486 3487 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING}; 3488 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED }; 3489 3490 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1); 3491 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1); 3492 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/ 3493 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2); 3494 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2); 3495 } 3496 3497 static void TestStrCollIdenticalPrefix(void) { 3498 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71"; 3499 const char* test[] = { 3500 "ab\\ud9b0\\udc70", 3501 "ab\\ud9b0\\udc71" 3502 }; 3503 genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL); 3504 } 3505 /* Contractions should have all their canonically equivalent */ 3506 /* strings included */ 3507 static void TestContractionClosure(void) { 3508 static const struct { 3509 const char *rules; 3510 const char *data[10]; 3511 const uint32_t len; 3512 } tests[] = { 3513 { "&b=\\u00e4\\u00e4", 3514 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5}, 3515 { "&b=\\u00C5", 3516 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4}, 3517 }; 3518 uint32_t i; 3519 3520 3521 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3522 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL); 3523 } 3524 } 3525 3526 /* This tests also fails*/ 3527 static void TestBeforePrefixFailure(void) { 3528 static const struct { 3529 const char *rules; 3530 const char *data[10]; 3531 const uint32_t len; 3532 } tests[] = { 3533 { "&g <<< a" 3534 "&[before 3]\\uff41 <<< x", 3535 {"x", "\\uff41"}, 2 }, 3536 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 3537 "&\\u30A8=\\u30A8=\\u3048=\\uff74" 3538 "&[before 3]\\u30a7<<<\\u30a9", 3539 {"\\u30a9", "\\u30a7"}, 2 }, 3540 { "&[before 3]\\u30a7<<<\\u30a9" 3541 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 3542 "&\\u30A8=\\u30A8=\\u3048=\\uff74", 3543 {"\\u30a9", "\\u30a7"}, 2 }, 3544 }; 3545 uint32_t i; 3546 3547 3548 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3549 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 3550 } 3551 3552 #if 0 3553 const char* rule1 = 3554 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 3555 "&\\u30A8=\\u30A8=\\u3048=\\uff74" 3556 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"; 3557 const char* rule2 = 3558 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc" 3559 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 3560 "&\\u30A8=\\u30A8=\\u3048=\\uff74"; 3561 const char* test[] = { 3562 "\\u30c6\\u30fc\\u30bf", 3563 "\\u30c6\\u30a7\\u30bf", 3564 }; 3565 genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0])); 3566 genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0])); 3567 /* this piece of code should be in some sort of verbose mode */ 3568 /* it gets the collation elements for elements and prints them */ 3569 /* This is useful when trying to see whether the problem is */ 3570 { 3571 UErrorCode status = U_ZERO_ERROR; 3572 uint32_t i = 0; 3573 UCollationElements *it = NULL; 3574 uint32_t CE; 3575 UChar string[256]; 3576 uint32_t uStringLen; 3577 UCollator *coll = NULL; 3578 3579 uStringLen = u_unescape(rule1, string, 256); 3580 3581 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 3582 3583 /*coll = ucol_open("ja_JP_JIS", &status);*/ 3584 it = ucol_openElements(coll, string, 0, &status); 3585 3586 for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) { 3587 log_verbose("%s\n", test[i]); 3588 uStringLen = u_unescape(test[i], string, 256); 3589 ucol_setText(it, string, uStringLen, &status); 3590 3591 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) { 3592 log_verbose("%08X\n", CE); 3593 } 3594 log_verbose("\n"); 3595 3596 } 3597 3598 ucol_closeElements(it); 3599 ucol_close(coll); 3600 } 3601 #endif 3602 } 3603 3604 static void TestPrefixCompose(void) { 3605 const char* rule1 = 3606 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc"; 3607 /* 3608 const char* test[] = { 3609 "\\u30c6\\u30fc\\u30bf", 3610 "\\u30c6\\u30a7\\u30bf", 3611 }; 3612 */ 3613 { 3614 UErrorCode status = U_ZERO_ERROR; 3615 /*uint32_t i = 0;*/ 3616 /*UCollationElements *it = NULL;*/ 3617 /* uint32_t CE;*/ 3618 UChar string[256]; 3619 uint32_t uStringLen; 3620 UCollator *coll = NULL; 3621 3622 uStringLen = u_unescape(rule1, string, 256); 3623 3624 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 3625 ucol_close(coll); 3626 } 3627 3628 3629 } 3630 3631 /* 3632 [last variable] last variable value 3633 [last primary ignorable] largest CE for primary ignorable 3634 [last secondary ignorable] largest CE for secondary ignorable 3635 [last tertiary ignorable] largest CE for tertiary ignorable 3636 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8) 3637 */ 3638 3639 static void TestRuleOptions(void) { 3640 /* values here are hardcoded and are correct for the current UCA 3641 * when the UCA changes, one might be forced to change these 3642 * values. 3643 */ 3644 3645 /* 3646 * These strings contain the last character before [variable top] 3647 * and the first and second characters (by primary weights) after it. 3648 * See FractionalUCA.txt. For example: 3649 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR 3650 [variable top = 0C FE] 3651 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT 3652 and 3653 00B4; [0D 0C, 05, 05] 3654 * 3655 * Note: Starting with UCA 6.0, the [variable top] collation element 3656 * is not the weight of any character or string, 3657 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable]. 3658 */ 3659 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F" 3660 #define FIRST_REGULAR_CHAR_STRING "\\u0060" 3661 #define SECOND_REGULAR_CHAR_STRING "\\u00B4" 3662 3663 /* 3664 * This string has to match the character that has the [last regular] weight 3665 * which changes with each UCA version. 3666 * See the bottom of FractionalUCA.txt which says something like 3667 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032 3668 * 3669 * Note: Starting with UCA 6.0, the [last regular] collation element 3670 * is not the weight of any character or string, 3671 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular]. 3672 */ 3673 #define LAST_REGULAR_CHAR_STRING "\\U0001342E" 3674 3675 static const struct { 3676 const char *rules; 3677 const char *data[10]; 3678 const uint32_t len; 3679 } tests[] = { 3680 /* - all befores here amount to zero */ 3681 { "&[before 3][first tertiary ignorable]<<<a", 3682 { "\\u0000", "a"}, 2 3683 }, /* you cannot go before first tertiary ignorable */ 3684 3685 { "&[before 3][last tertiary ignorable]<<<a", 3686 { "\\u0000", "a"}, 2 3687 }, /* you cannot go before last tertiary ignorable */ 3688 3689 { "&[before 3][first secondary ignorable]<<<a", 3690 { "\\u0000", "a"}, 2 3691 }, /* you cannot go before first secondary ignorable */ 3692 3693 { "&[before 3][last secondary ignorable]<<<a", 3694 { "\\u0000", "a"}, 2 3695 }, /* you cannot go before first secondary ignorable */ 3696 3697 /* 'normal' befores */ 3698 3699 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a", 3700 { "c", "b", "\\u0332", "a" }, 4 3701 }, 3702 3703 /* we don't have a code point that corresponds to 3704 * the last primary ignorable 3705 */ 3706 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a", 3707 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5 3708 }, 3709 3710 { "&[before 3][first variable]<<<c<<<b &[first variable]<a", 3711 { "c", "b", "\\u0009", "a", "\\u000a" }, 5 3712 }, 3713 3714 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ", 3715 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5 3716 }, 3717 3718 { "&[first regular]<a" 3719 "&[before 1][first regular]<b", 3720 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4 3721 }, 3722 3723 { "&[before 1][last regular]<b" 3724 "&[last regular]<a", 3725 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4 3726 }, 3727 3728 { "&[before 1][first implicit]<b" 3729 "&[first implicit]<a", 3730 { "b", "\\u4e00", "a", "\\u4e01"}, 4 3731 }, 3732 3733 { "&[before 1][last implicit]<b" 3734 "&[last implicit]<a", 3735 { "b", "\\U0010FFFD", "a" }, 3 3736 }, 3737 3738 { "&[last variable]<z" 3739 "&[last primary ignorable]<x" 3740 "&[last secondary ignorable]<<y" 3741 "&[last tertiary ignorable]<<<w" 3742 "&[top]<u", 3743 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7 3744 } 3745 3746 }; 3747 uint32_t i; 3748 3749 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3750 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 3751 } 3752 } 3753 3754 3755 static void TestOptimize(void) { 3756 /* this is not really a test - just trying out 3757 * whether copying of UCA contents will fail 3758 * Cannot really test, since the functionality 3759 * remains the same. 3760 */ 3761 static const struct { 3762 const char *rules; 3763 const char *data[10]; 3764 const uint32_t len; 3765 } tests[] = { 3766 /* - all befores here amount to zero */ 3767 { "[optimize [\\uAC00-\\uD7FF]]", 3768 { "a", "b"}, 2} 3769 }; 3770 uint32_t i; 3771 3772 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3773 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 3774 } 3775 } 3776 3777 /* 3778 cycheng (at) ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator. 3779 weiv ucol_strcollIter? 3780 cycheng (at) ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021 3781 weiv these are the input strings? 3782 cycheng (at) ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2 3783 weiv will check - could be a problem with utf-8 iterator 3784 cycheng (at) ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2 3785 weiv hmmm 3786 cycheng (at) ca.ibm.c... note that we have a standalone high surrogate 3787 weiv that doesn't sound right 3788 cycheng (at) ca.ibm.c... we got the same inconsistent results on AIX and Win2000 3789 weiv so you have two strings, you convert them to utf-8 and to utf-16BE 3790 cycheng (at) ca.ibm.c... yes 3791 weiv and then do the comparison 3792 cycheng (at) ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be 3793 weiv utf-16 strings look like a little endian ones in the example you sent me 3794 weiv It could be a bug - let me try to test it out 3795 cycheng (at) ca.ibm.c... ok 3796 cycheng (at) ca.ibm.c... we can wait till the conf. call 3797 cycheng (at) ca.ibm.c... next weke 3798 weiv that would be great 3799 weiv hmmm 3800 weiv I might be wrong 3801 weiv let me play with it some more 3802 cycheng (at) ca.ibm.c... ok 3803 cycheng (at) ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be 3804 cycheng (at) ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2 3805 cycheng (at) ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be 3806 weiv ok 3807 cycheng (at) ca.ibm.c... i ask sherman to send you more inconsistent data 3808 weiv thanks 3809 cycheng (at) ca.ibm.c... the 4 strings we sent are just samples 3810 */ 3811 #if 0 3812 static void Alexis(void) { 3813 UErrorCode status = U_ZERO_ERROR; 3814 UCollator *coll = ucol_open("", &status); 3815 3816 3817 const char utf16be[2][4] = { 3818 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 }, 3819 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 } 3820 }; 3821 3822 const char utf8[2][4] = { 3823 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 }, 3824 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 }, 3825 }; 3826 3827 UCharIterator iterU161, iterU162; 3828 UCharIterator iterU81, iterU82; 3829 3830 UCollationResult resU16, resU8; 3831 3832 uiter_setUTF16BE(&iterU161, utf16be[0], 4); 3833 uiter_setUTF16BE(&iterU162, utf16be[1], 4); 3834 3835 uiter_setUTF8(&iterU81, utf8[0], 4); 3836 uiter_setUTF8(&iterU82, utf8[1], 4); 3837 3838 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 3839 3840 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status); 3841 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status); 3842 3843 3844 if(resU16 != resU8) { 3845 log_err("different results\n"); 3846 } 3847 3848 ucol_close(coll); 3849 } 3850 #endif 3851 3852 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256 3853 static void Alexis2(void) { 3854 UErrorCode status = U_ZERO_ERROR; 3855 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE]; 3856 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE]; 3857 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE]; 3858 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0; 3859 3860 UConverter *conv = NULL; 3861 3862 UCharIterator U16BEItS, U16BEItT; 3863 UCharIterator U8ItS, U8ItT; 3864 3865 UCollationResult resU16, resU16BE, resU8; 3866 3867 static const char* const pairs[][2] = { 3868 { "\\ud800\\u0021", "\\uFFFC\\u0062"}, 3869 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" }, 3870 { "\\u0E40\\u0021", "\\u00A1\\u0021"}, 3871 { "\\u0E40\\u0021", "\\uFE57\\u0062"}, 3872 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"}, 3873 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"}, 3874 { "\\u0020", "\\u0020\\u0000"} 3875 /* 3876 5F20 (my result here) 3877 5F204E008E3F 3878 5F20 (your result here) 3879 */ 3880 }; 3881 3882 int32_t i = 0; 3883 3884 UCollator *coll = ucol_open("", &status); 3885 if(status == U_FILE_ACCESS_ERROR) { 3886 log_data_err("Is your data around?\n"); 3887 return; 3888 } else if(U_FAILURE(status)) { 3889 log_err("Error opening collator\n"); 3890 return; 3891 } 3892 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 3893 conv = ucnv_open("UTF16BE", &status); 3894 for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) { 3895 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE); 3896 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE); 3897 3898 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT); 3899 3900 log_verbose("Result of strcoll is %i\n", resU16); 3901 3902 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status); 3903 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status); 3904 3905 /* use the original sizes, as the result from converter is in bytes */ 3906 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS); 3907 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT); 3908 3909 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status); 3910 3911 log_verbose("Result of U16BE is %i\n", resU16BE); 3912 3913 if(resU16 != resU16BE) { 3914 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]); 3915 } 3916 3917 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status); 3918 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status); 3919 3920 uiter_setUTF8(&U8ItS, U8Source, U8LenS); 3921 uiter_setUTF8(&U8ItT, U8Target, U8LenT); 3922 3923 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status); 3924 3925 if(resU16 != resU8) { 3926 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]); 3927 } 3928 3929 } 3930 3931 ucol_close(coll); 3932 ucnv_close(conv); 3933 } 3934 3935 static void TestHebrewUCA(void) { 3936 UErrorCode status = U_ZERO_ERROR; 3937 static const char *first[] = { 3938 "d790d6b8d79cd795d6bcd7a9", 3939 "d790d79cd79ed7a7d799d799d7a1", 3940 "d790d6b4d79ed795d6bcd7a9", 3941 }; 3942 3943 char utf8String[3][256]; 3944 UChar utf16String[3][256]; 3945 3946 int32_t i = 0, j = 0; 3947 int32_t sizeUTF8[3]; 3948 int32_t sizeUTF16[3]; 3949 3950 UCollator *coll = ucol_open("", &status); 3951 if (U_FAILURE(status)) { 3952 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status)); 3953 return; 3954 } 3955 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/ 3956 3957 for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) { 3958 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status); 3959 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status); 3960 log_verbose("%i: "); 3961 for(j = 0; j < sizeUTF16[i]; j++) { 3962 /*log_verbose("\\u%04X", utf16String[i][j]);*/ 3963 log_verbose("%04X", utf16String[i][j]); 3964 } 3965 log_verbose("\n"); 3966 } 3967 for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) { 3968 for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) { 3969 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS); 3970 } 3971 } 3972 3973 ucol_close(coll); 3974 3975 } 3976 3977 static void TestPartialSortKeyTermination(void) { 3978 static const char* cases[] = { 3979 "\\u1234\\u1234\\udc00", 3980 "\\udc00\\ud800\\ud800" 3981 }; 3982 3983 int32_t i = sizeof(UCollator); 3984 3985 UErrorCode status = U_ZERO_ERROR; 3986 3987 UCollator *coll = ucol_open("", &status); 3988 3989 UCharIterator iter; 3990 3991 UChar currCase[256]; 3992 int32_t length = 0; 3993 int32_t pKeyLen = 0; 3994 3995 uint8_t key[256]; 3996 3997 for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) { 3998 uint32_t state[2] = {0, 0}; 3999 length = u_unescape(cases[i], currCase, 256); 4000 uiter_setString(&iter, currCase, length); 4001 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status); 4002 4003 log_verbose("Done\n"); 4004 4005 } 4006 ucol_close(coll); 4007 } 4008 4009 static void TestSettings(void) { 4010 static const char* cases[] = { 4011 "apple", 4012 "Apple" 4013 }; 4014 4015 static const char* locales[] = { 4016 "", 4017 "en" 4018 }; 4019 4020 UErrorCode status = U_ZERO_ERROR; 4021 4022 int32_t i = 0, j = 0; 4023 4024 UChar source[256], target[256]; 4025 int32_t sLen = 0, tLen = 0; 4026 4027 UCollator *collateObject = NULL; 4028 for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) { 4029 collateObject = ucol_open(locales[i], &status); 4030 ucol_setStrength(collateObject, UCOL_PRIMARY); 4031 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status); 4032 for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) { 4033 sLen = u_unescape(cases[j-1], source, 256); 4034 source[sLen] = 0; 4035 tLen = u_unescape(cases[j], target, 256); 4036 source[tLen] = 0; 4037 doTest(collateObject, source, target, UCOL_EQUAL); 4038 } 4039 ucol_close(collateObject); 4040 } 4041 } 4042 4043 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) { 4044 UErrorCode status = U_ZERO_ERROR; 4045 int32_t errorNo = 0; 4046 /*const UChar *sourceRules = NULL;*/ 4047 /*int32_t sourceRulesLen = 0;*/ 4048 UColAttributeValue french = UCOL_OFF; 4049 int32_t cloneSize = 0; 4050 4051 if(!ucol_equals(source, target)) { 4052 log_err("Same collators, different address not equal\n"); 4053 errorNo++; 4054 } 4055 ucol_close(target); 4056 if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) { 4057 /* currently, safeClone is implemented through getRules/openRules 4058 * so it is the same as the test below - I will comment that test out. 4059 */ 4060 /* real thing */ 4061 target = ucol_safeClone(source, NULL, &cloneSize, &status); 4062 if(U_FAILURE(status)) { 4063 log_err("Error creating clone\n"); 4064 errorNo++; 4065 return errorNo; 4066 } 4067 if(!ucol_equals(source, target)) { 4068 log_err("Collator different from it's clone\n"); 4069 errorNo++; 4070 } 4071 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status); 4072 if(french == UCOL_ON) { 4073 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status); 4074 } else { 4075 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status); 4076 } 4077 if(U_FAILURE(status)) { 4078 log_err("Error setting attributes\n"); 4079 errorNo++; 4080 return errorNo; 4081 } 4082 if(ucol_equals(source, target)) { 4083 log_err("Collators same even when options changed\n"); 4084 errorNo++; 4085 } 4086 ucol_close(target); 4087 /* commented out since safeClone uses exactly the same technique */ 4088 /* 4089 sourceRules = ucol_getRules(source, &sourceRulesLen); 4090 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 4091 if(U_FAILURE(status)) { 4092 log_err("Error instantiating target from rules\n"); 4093 errorNo++; 4094 return errorNo; 4095 } 4096 if(!ucol_equals(source, target)) { 4097 log_err("Collator different from collator that was created from the same rules\n"); 4098 errorNo++; 4099 } 4100 ucol_close(target); 4101 */ 4102 } 4103 return errorNo; 4104 } 4105 4106 4107 static void TestEquals(void) { 4108 /* ucol_equals is not currently a public API. There is a chance that it will become 4109 * something like this, but currently it is only used by RuleBasedCollator::operator== 4110 */ 4111 /* test whether the two collators instantiated from the same locale are equal */ 4112 UErrorCode status = U_ZERO_ERROR; 4113 UParseError parseError; 4114 int32_t noOfLoc = uloc_countAvailable(); 4115 const char *locName = NULL; 4116 UCollator *source = NULL, *target = NULL; 4117 int32_t i = 0; 4118 4119 const char* rules[] = { 4120 "&l < lj <<< Lj <<< LJ", 4121 "&n < nj <<< Nj <<< NJ", 4122 "&ae <<< \\u00e4", 4123 "&AE <<< \\u00c4" 4124 }; 4125 /* 4126 const char* badRules[] = { 4127 "&l <<< Lj", 4128 "&n < nj <<< nJ <<< NJ", 4129 "&a <<< \\u00e4", 4130 "&AE <<< \\u00c4 <<< x" 4131 }; 4132 */ 4133 4134 UChar sourceRules[1024], targetRules[1024]; 4135 int32_t sourceRulesSize = 0, targetRulesSize = 0; 4136 int32_t rulesSize = sizeof(rules)/sizeof(rules[0]); 4137 4138 for(i = 0; i < rulesSize; i++) { 4139 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize); 4140 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize); 4141 } 4142 4143 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 4144 if(status == U_FILE_ACCESS_ERROR) { 4145 log_data_err("Is your data around?\n"); 4146 return; 4147 } else if(U_FAILURE(status)) { 4148 log_err("Error opening collator\n"); 4149 return; 4150 } 4151 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 4152 if(!ucol_equals(source, target)) { 4153 log_err("Equivalent collators not equal!\n"); 4154 } 4155 ucol_close(source); 4156 ucol_close(target); 4157 4158 source = ucol_open("root", &status); 4159 target = ucol_open("root", &status); 4160 log_verbose("Testing root\n"); 4161 if(!ucol_equals(source, source)) { 4162 log_err("Same collator not equal\n"); 4163 } 4164 if(TestEqualsForCollator(locName, source, target)) { 4165 log_err("Errors for root\n", locName); 4166 } 4167 ucol_close(source); 4168 4169 for(i = 0; i<noOfLoc; i++) { 4170 status = U_ZERO_ERROR; 4171 locName = uloc_getAvailable(i); 4172 /*if(hasCollationElements(locName)) {*/ 4173 log_verbose("Testing equality for locale %s\n", locName); 4174 source = ucol_open(locName, &status); 4175 target = ucol_open(locName, &status); 4176 if (U_FAILURE(status)) { 4177 log_err("Error opening collator for locale %s %s\n", locName, u_errorName(status)); 4178 continue; 4179 } 4180 if(TestEqualsForCollator(locName, source, target)) { 4181 log_err("Errors for locale %s\n", locName); 4182 } 4183 ucol_close(source); 4184 /*}*/ 4185 } 4186 } 4187 4188 static void TestJ2726(void) { 4189 UChar a[2] = { 0x61, 0x00 }; /*"a"*/ 4190 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/ 4191 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/ 4192 UErrorCode status = U_ZERO_ERROR; 4193 UCollator *coll = ucol_open("en", &status); 4194 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 4195 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); 4196 doTest(coll, a, aSpace, UCOL_EQUAL); 4197 doTest(coll, aSpace, a, UCOL_EQUAL); 4198 doTest(coll, a, spaceA, UCOL_EQUAL); 4199 doTest(coll, spaceA, a, UCOL_EQUAL); 4200 doTest(coll, spaceA, aSpace, UCOL_EQUAL); 4201 doTest(coll, aSpace, spaceA, UCOL_EQUAL); 4202 ucol_close(coll); 4203 } 4204 4205 static void NullRule(void) { 4206 UChar r[3] = {0}; 4207 UErrorCode status = U_ZERO_ERROR; 4208 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 4209 if(U_SUCCESS(status)) { 4210 log_err("This should have been an error!\n"); 4211 ucol_close(coll); 4212 } else { 4213 status = U_ZERO_ERROR; 4214 } 4215 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 4216 if(U_FAILURE(status)) { 4217 log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status)); 4218 } else { 4219 ucol_close(coll); 4220 } 4221 } 4222 4223 /** 4224 * Test for CollationElementIterator previous and next for the whole set of 4225 * unicode characters with normalization on. 4226 */ 4227 static void TestNumericCollation(void) 4228 { 4229 UErrorCode status = U_ZERO_ERROR; 4230 4231 const static char *basicTestStrings[]={ 4232 "hello1", 4233 "hello2", 4234 "hello2002", 4235 "hello2003", 4236 "hello123456", 4237 "hello1234567", 4238 "hello10000000", 4239 "hello100000000", 4240 "hello1000000000", 4241 "hello10000000000", 4242 }; 4243 4244 const static char *preZeroTestStrings[]={ 4245 "avery10000", 4246 "avery010000", 4247 "avery0010000", 4248 "avery00010000", 4249 "avery000010000", 4250 "avery0000010000", 4251 "avery00000010000", 4252 "avery000000010000", 4253 }; 4254 4255 const static char *thirtyTwoBitNumericStrings[]={ 4256 "avery42949672960", 4257 "avery42949672961", 4258 "avery42949672962", 4259 "avery429496729610" 4260 }; 4261 4262 const static char *longNumericStrings[]={ 4263 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings. 4264 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that 4265 are treated as multiple collation elements. */ 4266 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */ 4267 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */ 4268 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */ 4269 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */ 4270 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */ 4271 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */ 4272 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */ 4273 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */ 4274 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */ 4275 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */ 4276 }; 4277 4278 const static char *supplementaryDigits[] = { 4279 "\\uD835\\uDFCE", /* 0 */ 4280 "\\uD835\\uDFCF", /* 1 */ 4281 "\\uD835\\uDFD0", /* 2 */ 4282 "\\uD835\\uDFD1", /* 3 */ 4283 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */ 4284 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */ 4285 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */ 4286 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */ 4287 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */ 4288 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */ 4289 }; 4290 4291 const static char *foreignDigits[] = { 4292 "\\u0661", 4293 "\\u0662", 4294 "\\u0663", 4295 "\\u0661\\u0660", 4296 "\\u0661\\u0662", 4297 "\\u0661\\u0663", 4298 "\\u0662\\u0660", 4299 "\\u0662\\u0662", 4300 "\\u0662\\u0663", 4301 "\\u0663\\u0660", 4302 "\\u0663\\u0662", 4303 "\\u0663\\u0663" 4304 }; 4305 4306 const static char *evenZeroes[] = { 4307 "2000", 4308 "2001", 4309 "2002", 4310 "2003" 4311 }; 4312 4313 UColAttribute att = UCOL_NUMERIC_COLLATION; 4314 UColAttributeValue val = UCOL_ON; 4315 4316 /* Open our collator. */ 4317 UCollator* coll = ucol_open("root", &status); 4318 if (U_FAILURE(status)){ 4319 log_err_status(status, "ERROR: in using ucol_open() -> %s\n", 4320 myErrorName(status)); 4321 return; 4322 } 4323 genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1); 4324 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1); 4325 genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1); 4326 genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1); 4327 genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1); 4328 genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1); 4329 4330 /* Setting up our collator to do digits. */ 4331 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); 4332 if (U_FAILURE(status)){ 4333 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n", 4334 myErrorName(status)); 4335 return; 4336 } 4337 4338 /* 4339 Testing that prepended zeroes still yield the correct collation behavior. 4340 We expect that every element in our strings array will be equal. 4341 */ 4342 genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL); 4343 4344 ucol_close(coll); 4345 } 4346 4347 static void TestTibetanConformance(void) 4348 { 4349 const char* test[] = { 4350 "\\u0FB2\\u0591\\u0F71\\u0061", 4351 "\\u0FB2\\u0F71\\u0061" 4352 }; 4353 4354 UErrorCode status = U_ZERO_ERROR; 4355 UCollator *coll = ucol_open("", &status); 4356 UChar source[100]; 4357 UChar target[100]; 4358 int result; 4359 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 4360 if (U_SUCCESS(status)) { 4361 u_unescape(test[0], source, 100); 4362 u_unescape(test[1], target, 100); 4363 doTest(coll, source, target, UCOL_EQUAL); 4364 result = ucol_strcoll(coll, source, -1, target, -1); 4365 log_verbose("result %d\n", result); 4366 if (UCOL_EQUAL != result) { 4367 log_err("Tibetan comparison error\n"); 4368 } 4369 } 4370 ucol_close(coll); 4371 4372 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL); 4373 } 4374 4375 static void TestPinyinProblem(void) { 4376 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" }; 4377 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0])); 4378 } 4379 4380 #define TST_UCOL_MAX_INPUT 0x220001 4381 #define topByte 0xFF000000; 4382 #define bottomByte 0xFF; 4383 #define fourBytes 0xFFFFFFFF; 4384 4385 4386 static void showImplicit(UChar32 i) { 4387 if (i >= 0 && i <= TST_UCOL_MAX_INPUT) { 4388 log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i)); 4389 } 4390 } 4391 4392 static void TestImplicitGeneration(void) { 4393 UErrorCode status = U_ZERO_ERROR; 4394 UChar32 last = 0; 4395 UChar32 current; 4396 UChar32 i = 0, j = 0; 4397 UChar32 roundtrip = 0; 4398 UChar32 lastBottom = 0; 4399 UChar32 currentBottom = 0; 4400 UChar32 lastTop = 0; 4401 UChar32 currentTop = 0; 4402 4403 UCollator *coll = ucol_open("root", &status); 4404 if(U_FAILURE(status)) { 4405 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); 4406 return; 4407 } 4408 4409 uprv_uca_getRawFromImplicit(0xE20303E7); 4410 4411 for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) { 4412 current = uprv_uca_getImplicitFromRaw(i) & fourBytes; 4413 4414 /* check that it round-trips AND that all intervening ones are illegal*/ 4415 roundtrip = uprv_uca_getRawFromImplicit(current); 4416 if (roundtrip != i) { 4417 log_err("No roundtrip %08X\n", i); 4418 } 4419 if (last != 0) { 4420 for (j = last + 1; j < current; ++j) { 4421 roundtrip = uprv_uca_getRawFromImplicit(j); 4422 /* raise an error if it *doesn't* find an error*/ 4423 if (roundtrip != -1) { 4424 log_err("Fails to recognize illegal %08X\n", j); 4425 } 4426 } 4427 } 4428 /* now do other consistency checks*/ 4429 lastBottom = last & bottomByte; 4430 currentBottom = current & bottomByte; 4431 lastTop = last & topByte; 4432 currentTop = current & topByte; 4433 4434 /* print out some values for spot-checking*/ 4435 if (lastTop != currentTop || i == 0x10000 || i == 0x110000) { 4436 showImplicit(i-3); 4437 showImplicit(i-2); 4438 showImplicit(i-1); 4439 showImplicit(i); 4440 showImplicit(i+1); 4441 showImplicit(i+2); 4442 } 4443 last = current; 4444 4445 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) { 4446 log_err("No raw <-> code point roundtrip for 0x%08X\n", i); 4447 } 4448 } 4449 showImplicit(TST_UCOL_MAX_INPUT-2); 4450 showImplicit(TST_UCOL_MAX_INPUT-1); 4451 showImplicit(TST_UCOL_MAX_INPUT); 4452 ucol_close(coll); 4453 } 4454 4455 /** 4456 * Iterate through the given iterator, checking to see that all the strings 4457 * in the expected array are present. 4458 * @param expected array of strings we expect to see, or NULL 4459 * @param expectedCount number of elements of expected, or 0 4460 */ 4461 static int32_t checkUEnumeration(const char* msg, 4462 UEnumeration* iter, 4463 const char** expected, 4464 int32_t expectedCount) { 4465 UErrorCode ec = U_ZERO_ERROR; 4466 int32_t i = 0, n, j, bit; 4467 int32_t seenMask = 0; 4468 4469 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */ 4470 n = uenum_count(iter, &ec); 4471 if (!assertSuccess("count", &ec)) return -1; 4472 log_verbose("%s = [", msg); 4473 for (;; ++i) { 4474 const char* s = uenum_next(iter, NULL, &ec); 4475 if (!assertSuccess("snext", &ec) || s == NULL) break; 4476 if (i != 0) log_verbose(","); 4477 log_verbose("%s", s); 4478 /* check expected list */ 4479 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) { 4480 if ((seenMask&bit) == 0 && 4481 uprv_strcmp(s, expected[j]) == 0) { 4482 seenMask |= bit; 4483 break; 4484 } 4485 } 4486 } 4487 log_verbose("] (%d)\n", i); 4488 assertTrue("count verified", i==n); 4489 /* did we see all expected strings? */ 4490 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) { 4491 if ((seenMask&bit)!=0) { 4492 log_verbose("Ok: \"%s\" seen\n", expected[j]); 4493 } else { 4494 log_err("FAIL: \"%s\" not seen\n", expected[j]); 4495 } 4496 } 4497 return n; 4498 } 4499 4500 /** 4501 * Test new API added for separate collation tree. 4502 */ 4503 static void TestSeparateTrees(void) { 4504 UErrorCode ec = U_ZERO_ERROR; 4505 UEnumeration *e = NULL; 4506 int32_t n = -1; 4507 UBool isAvailable; 4508 char loc[256]; 4509 4510 static const char* AVAIL[] = { "en", "de" }; 4511 4512 static const char* KW[] = { "collation" }; 4513 4514 static const char* KWVAL[] = { "phonebook", "stroke" }; 4515 4516 #if !UCONFIG_NO_SERVICE 4517 e = ucol_openAvailableLocales(&ec); 4518 if (e != NULL) { 4519 assertSuccess("ucol_openAvailableLocales", &ec); 4520 assertTrue("ucol_openAvailableLocales!=0", e!=0); 4521 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL)); 4522 /* Don't need to check n because we check list */ 4523 uenum_close(e); 4524 } else { 4525 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec)); 4526 } 4527 #endif 4528 4529 e = ucol_getKeywords(&ec); 4530 if (e != NULL) { 4531 assertSuccess("ucol_getKeywords", &ec); 4532 assertTrue("ucol_getKeywords!=0", e!=0); 4533 n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW)); 4534 /* Don't need to check n because we check list */ 4535 uenum_close(e); 4536 } else { 4537 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec)); 4538 } 4539 4540 e = ucol_getKeywordValues(KW[0], &ec); 4541 if (e != NULL) { 4542 assertSuccess("ucol_getKeywordValues", &ec); 4543 assertTrue("ucol_getKeywordValues!=0", e!=0); 4544 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL)); 4545 /* Don't need to check n because we check list */ 4546 uenum_close(e); 4547 } else { 4548 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec)); 4549 } 4550 4551 /* Try setting a warning before calling ucol_getKeywordValues */ 4552 ec = U_USING_FALLBACK_WARNING; 4553 e = ucol_getKeywordValues(KW[0], &ec); 4554 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) { 4555 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0); 4556 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL)); 4557 /* Don't need to check n because we check list */ 4558 uenum_close(e); 4559 } 4560 4561 /* 4562 U_DRAFT int32_t U_EXPORT2 4563 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, 4564 const char* locale, UBool* isAvailable, 4565 UErrorCode* status); 4566 } 4567 */ 4568 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de", 4569 &isAvailable, &ec); 4570 if (assertSuccess("getFunctionalEquivalent", &ec)) { 4571 assertEquals("getFunctionalEquivalent(de)", "de", loc); 4572 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE", 4573 isAvailable == TRUE); 4574 } 4575 4576 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE", 4577 &isAvailable, &ec); 4578 if (assertSuccess("getFunctionalEquivalent", &ec)) { 4579 assertEquals("getFunctionalEquivalent(de_DE)", "de", loc); 4580 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE", 4581 isAvailable == TRUE); 4582 } 4583 } 4584 4585 /* supercedes TestJ784 */ 4586 static void TestBeforePinyin(void) { 4587 const static char rules[] = { 4588 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0" 4589 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8" 4590 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC" 4591 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2" 4592 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9" 4593 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC" 4594 }; 4595 4596 const static char *test[] = { 4597 "l\\u0101", 4598 "la", 4599 "l\\u0101n", 4600 "lan ", 4601 "l\\u0113", 4602 "le", 4603 "l\\u0113n", 4604 "len" 4605 }; 4606 4607 const static char *test2[] = { 4608 "x\\u0101", 4609 "x\\u0100", 4610 "X\\u0101", 4611 "X\\u0100", 4612 "x\\u00E1", 4613 "x\\u00C1", 4614 "X\\u00E1", 4615 "X\\u00C1", 4616 "x\\u01CE", 4617 "x\\u01CD", 4618 "X\\u01CE", 4619 "X\\u01CD", 4620 "x\\u00E0", 4621 "x\\u00C0", 4622 "X\\u00E0", 4623 "X\\u00C0", 4624 "xa", 4625 "xA", 4626 "Xa", 4627 "XA", 4628 "x\\u0101x", 4629 "x\\u0100x", 4630 "x\\u00E1x", 4631 "x\\u00C1x", 4632 "x\\u01CEx", 4633 "x\\u01CDx", 4634 "x\\u00E0x", 4635 "x\\u00C0x", 4636 "xax", 4637 "xAx" 4638 }; 4639 4640 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0])); 4641 genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0])); 4642 genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0])); 4643 genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0])); 4644 } 4645 4646 static void TestBeforeTightening(void) { 4647 static const struct { 4648 const char *rules; 4649 UErrorCode expectedStatus; 4650 } tests[] = { 4651 { "&[before 1]a<x", U_ZERO_ERROR }, 4652 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR }, 4653 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR }, 4654 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR }, 4655 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR }, 4656 { "&[before 2]a<<x",U_ZERO_ERROR }, 4657 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR }, 4658 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR }, 4659 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR }, 4660 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR }, 4661 { "&[before 3]a<<<x",U_ZERO_ERROR }, 4662 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR }, 4663 { "&[before I]a = x",U_INVALID_FORMAT_ERROR } 4664 }; 4665 4666 int32_t i = 0; 4667 4668 UErrorCode status = U_ZERO_ERROR; 4669 UChar rlz[RULE_BUFFER_LEN] = { 0 }; 4670 uint32_t rlen = 0; 4671 4672 UCollator *coll = NULL; 4673 4674 4675 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { 4676 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN); 4677 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); 4678 if(status != tests[i].expectedStatus) { 4679 log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n", 4680 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus)); 4681 } 4682 ucol_close(coll); 4683 status = U_ZERO_ERROR; 4684 } 4685 4686 } 4687 4688 /* 4689 &m < a 4690 &[before 1] a < x <<< X << q <<< Q < z 4691 assert: m <<< M < x <<< X << q <<< Q < z < a < n 4692 4693 &m < a 4694 &[before 2] a << x <<< X << q <<< Q < z 4695 assert: m <<< M < x <<< X << q <<< Q << a < z < n 4696 4697 &m < a 4698 &[before 3] a <<< x <<< X << q <<< Q < z 4699 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n 4700 4701 4702 &m << a 4703 &[before 1] a < x <<< X << q <<< Q < z 4704 assert: x <<< X << q <<< Q < z < m <<< M << a < n 4705 4706 &m << a 4707 &[before 2] a << x <<< X << q <<< Q < z 4708 assert: m <<< M << x <<< X << q <<< Q << a < z < n 4709 4710 &m << a 4711 &[before 3] a <<< x <<< X << q <<< Q < z 4712 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n 4713 4714 4715 &m <<< a 4716 &[before 1] a < x <<< X << q <<< Q < z 4717 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M 4718 4719 &m <<< a 4720 &[before 2] a << x <<< X << q <<< Q < z 4721 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n 4722 4723 &m <<< a 4724 &[before 3] a <<< x <<< X << q <<< Q < z 4725 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n 4726 4727 4728 &[before 1] s < x <<< X << q <<< Q < z 4729 assert: r <<< R < x <<< X << q <<< Q < z < s < n 4730 4731 &[before 2] s << x <<< X << q <<< Q < z 4732 assert: r <<< R < x <<< X << q <<< Q << s < z < n 4733 4734 &[before 3] s <<< x <<< X << q <<< Q < z 4735 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n 4736 4737 4738 &[before 1] \u24DC < x <<< X << q <<< Q < z 4739 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M 4740 4741 &[before 2] \u24DC << x <<< X << q <<< Q < z 4742 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n 4743 4744 &[before 3] \u24DC <<< x <<< X << q <<< Q < z 4745 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n 4746 */ 4747 4748 4749 #if 0 4750 /* requires features not yet supported */ 4751 static void TestMoreBefore(void) { 4752 static const struct { 4753 const char* rules; 4754 const char* order[16]; 4755 int32_t size; 4756 } tests[] = { 4757 { "&m < a &[before 1] a < x <<< X << q <<< Q < z", 4758 { "m","M","x","X","q","Q","z","a","n" }, 9}, 4759 { "&m < a &[before 2] a << x <<< X << q <<< Q < z", 4760 { "m","M","x","X","q","Q","a","z","n" }, 9}, 4761 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z", 4762 { "m","M","x","X","a","q","Q","z","n" }, 9}, 4763 { "&m << a &[before 1] a < x <<< X << q <<< Q < z", 4764 { "x","X","q","Q","z","m","M","a","n" }, 9}, 4765 { "&m << a &[before 2] a << x <<< X << q <<< Q < z", 4766 { "m","M","x","X","q","Q","a","z","n" }, 9}, 4767 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z", 4768 { "m","M","x","X","a","q","Q","z","n" }, 9}, 4769 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z", 4770 { "x","X","q","Q","z","n","m","a","M" }, 9}, 4771 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z", 4772 { "x","X","q","Q","m","a","M","z","n" }, 9}, 4773 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z", 4774 { "m","x","X","a","M","q","Q","z","n" }, 9}, 4775 { "&[before 1] s < x <<< X << q <<< Q < z", 4776 { "r","R","x","X","q","Q","z","s","n" }, 9}, 4777 { "&[before 2] s << x <<< X << q <<< Q < z", 4778 { "r","R","x","X","q","Q","s","z","n" }, 9}, 4779 { "&[before 3] s <<< x <<< X << q <<< Q < z", 4780 { "r","R","x","X","s","q","Q","z","n" }, 9}, 4781 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z", 4782 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9}, 4783 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z", 4784 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9}, 4785 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z", 4786 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9} 4787 }; 4788 4789 int32_t i = 0; 4790 4791 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { 4792 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size); 4793 } 4794 } 4795 #endif 4796 4797 static void TestTailorNULL( void ) { 4798 const static char* rule = "&a <<< '\\u0000'"; 4799 UErrorCode status = U_ZERO_ERROR; 4800 UChar rlz[RULE_BUFFER_LEN] = { 0 }; 4801 uint32_t rlen = 0; 4802 UChar a = 1, null = 0; 4803 UCollationResult res = UCOL_EQUAL; 4804 4805 UCollator *coll = NULL; 4806 4807 4808 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN); 4809 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); 4810 4811 if(U_FAILURE(status)) { 4812 log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status)); 4813 } else { 4814 res = ucol_strcoll(coll, &a, 1, &null, 1); 4815 4816 if(res != UCOL_LESS) { 4817 log_err("NULL was not tailored properly!\n"); 4818 } 4819 } 4820 4821 ucol_close(coll); 4822 } 4823 4824 static void 4825 TestUpperFirstQuaternary(void) 4826 { 4827 const char* tests[] = { "B", "b", "Bb", "bB" }; 4828 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST }; 4829 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST }; 4830 genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0])); 4831 } 4832 4833 static void 4834 TestJ4960(void) 4835 { 4836 const char* tests[] = { "\\u00e2T", "aT" }; 4837 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL }; 4838 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON }; 4839 const char* tests2[] = { "a", "A" }; 4840 const char* rule = "&[first tertiary ignorable]=A=a"; 4841 UColAttribute att2[] = { UCOL_CASE_LEVEL }; 4842 UColAttributeValue attVals2[] = { UCOL_ON }; 4843 /* Test whether we correctly ignore primary ignorables on case level when */ 4844 /* we have only primary & case level */ 4845 genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL); 4846 /* Test whether ICU4J will make case level for sortkeys that have primary strength */ 4847 /* and case level */ 4848 genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0])); 4849 /* Test whether completely ignorable letters have case level info (they shouldn't) */ 4850 genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL); 4851 } 4852 4853 static void 4854 TestJ5223(void) 4855 { 4856 static const char *test = "this is a test string"; 4857 UChar ustr[256]; 4858 int32_t ustr_length = u_unescape(test, ustr, 256); 4859 unsigned char sortkey[256]; 4860 int32_t sortkey_length; 4861 UErrorCode status = U_ZERO_ERROR; 4862 static UCollator *coll = NULL; 4863 coll = ucol_open("root", &status); 4864 if(U_FAILURE(status)) { 4865 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); 4866 return; 4867 } 4868 ucol_setStrength(coll, UCOL_PRIMARY); 4869 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); 4870 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 4871 if (U_FAILURE(status)) { 4872 log_err("Failed setting atributes\n"); 4873 return; 4874 } 4875 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0); 4876 if (sortkey_length > 256) return; 4877 4878 /* we mark the position where the null byte should be written in advance */ 4879 sortkey[sortkey_length-1] = 0xAA; 4880 4881 /* we set the buffer size one byte higher than needed */ 4882 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey, 4883 sortkey_length+1); 4884 4885 /* no error occurs (for me) */ 4886 if (sortkey[sortkey_length-1] == 0xAA) { 4887 log_err("Hit bug at first try\n"); 4888 } 4889 4890 /* we mark the position where the null byte should be written again */ 4891 sortkey[sortkey_length-1] = 0xAA; 4892 4893 /* this time we set the buffer size to the exact amount needed */ 4894 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey, 4895 sortkey_length); 4896 4897 /* now the trailing null byte is not written */ 4898 if (sortkey[sortkey_length-1] == 0xAA) { 4899 log_err("Hit bug at second try\n"); 4900 } 4901 4902 ucol_close(coll); 4903 } 4904 4905 /* Regression test for Thai partial sort key problem */ 4906 static void 4907 TestJ5232(void) 4908 { 4909 const static char *test[] = { 4910 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21", 4911 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21" 4912 }; 4913 4914 genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0])); 4915 } 4916 4917 static void 4918 TestJ5367(void) 4919 { 4920 const static char *test[] = { "a", "y" }; 4921 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a"; 4922 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0])); 4923 } 4924 4925 static void 4926 TestVI5913(void) 4927 { 4928 UErrorCode status = U_ZERO_ERROR; 4929 int32_t i, j; 4930 UCollator *coll =NULL; 4931 uint8_t resColl[100], expColl[100]; 4932 int32_t rLen, tLen, ruleLen, sLen, kLen; 4933 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypogegrammeni*/ 4934 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/ 4935 UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circumflex.*/ 4936 static const UChar tData[][20]={ 4937 {0x1EAC, 0}, 4938 {0x0041, 0x0323, 0x0302, 0}, 4939 {0x1EA0, 0x0302, 0}, 4940 {0x00C2, 0x0323, 0}, 4941 {0x1ED8, 0}, /* O with dot and circumflex */ 4942 {0x1ECC, 0x0302, 0}, 4943 {0x1EB7, 0}, 4944 {0x1EA1, 0x0306, 0}, 4945 }; 4946 static const UChar tailorData[][20]={ 4947 {0x1FA2, 0}, /* Omega with 3 combining marks */ 4948 {0x03C9, 0x0313, 0x0300, 0x0345, 0}, 4949 {0x1FF3, 0x0313, 0x0300, 0}, 4950 {0x1F60, 0x0300, 0x0345, 0}, 4951 {0x1F62, 0x0345, 0}, 4952 {0x1FA0, 0x0300, 0}, 4953 }; 4954 static const UChar tailorData2[][20]={ 4955 {0x1E63, 0x030C, 0}, /* s with dot below + caron */ 4956 {0x0073, 0x0323, 0x030C, 0}, 4957 {0x0073, 0x030C, 0x0323, 0}, 4958 }; 4959 static const UChar tailorData3[][20]={ 4960 {0x007a, 0}, /* z */ 4961 {0x0061, 0x0065, 0}, /* a + e */ 4962 {0x0061, 0x00ea, 0}, /* a + e with circumflex */ 4963 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */ 4964 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */ 4965 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */ 4966 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */ 4967 {0x00EA, 0}, /* e with circumflex */ 4968 }; 4969 4970 /* Test Vietnamese sort. */ 4971 coll = ucol_open("vi", &status); 4972 if(U_FAILURE(status)) { 4973 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status)); 4974 return; 4975 } 4976 log_verbose("\n\nVI collation:"); 4977 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) { 4978 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n"); 4979 } 4980 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) { 4981 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n"); 4982 } 4983 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) { 4984 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n"); 4985 } 4986 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) { 4987 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n"); 4988 } 4989 4990 for (j=0; j<8; j++) { 4991 tLen = u_strlen(tData[j]); 4992 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen); 4993 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100); 4994 for(i = 0; i<rLen; i++) { 4995 log_verbose(" %02X", resColl[i]); 4996 } 4997 } 4998 4999 ucol_close(coll); 5000 5001 /* Test Romanian sort. */ 5002 coll = ucol_open("ro", &status); 5003 log_verbose("\n\nRO collation:"); 5004 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) { 5005 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n"); 5006 } 5007 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) { 5008 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n"); 5009 } 5010 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) { 5011 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n"); 5012 } 5013 5014 for (j=4; j<8; j++) { 5015 tLen = u_strlen(tData[j]); 5016 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen); 5017 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100); 5018 for(i = 0; i<rLen; i++) { 5019 log_verbose(" %02X", resColl[i]); 5020 } 5021 } 5022 ucol_close(coll); 5023 5024 /* Test the precomposed Greek character with 3 combining marks. */ 5025 log_verbose("\n\nTailoring test: Greek character with 3 combining marks"); 5026 ruleLen = u_strlen(rule); 5027 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5028 if (U_FAILURE(status)) { 5029 log_err("ucol_openRules failed with %s\n", u_errorName(status)); 5030 return; 5031 } 5032 sLen = u_strlen(tailorData[0]); 5033 for (j=1; j<6; j++) { 5034 tLen = u_strlen(tailorData[j]); 5035 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) { 5036 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]); 5037 } 5038 } 5039 /* Test getSortKey. */ 5040 tLen = u_strlen(tailorData[0]); 5041 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100); 5042 for (j=0; j<6; j++) { 5043 tLen = u_strlen(tailorData[j]); 5044 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100); 5045 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) { 5046 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 5047 for(i = 0; i<rLen; i++) { 5048 log_err(" %02X", resColl[i]); 5049 } 5050 } 5051 } 5052 ucol_close(coll); 5053 5054 log_verbose("\n\nTailoring test for s with caron:"); 5055 ruleLen = u_strlen(rule2); 5056 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5057 tLen = u_strlen(tailorData2[0]); 5058 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100); 5059 for (j=1; j<3; j++) { 5060 tLen = u_strlen(tailorData2[j]); 5061 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100); 5062 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) { 5063 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 5064 for(i = 0; i<rLen; i++) { 5065 log_err(" %02X", resColl[i]); 5066 } 5067 } 5068 } 5069 ucol_close(coll); 5070 5071 log_verbose("\n\nTailoring test for &z< ae with circumflex:"); 5072 ruleLen = u_strlen(rule3); 5073 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5074 tLen = u_strlen(tailorData3[3]); 5075 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100); 5076 for (j=4; j<6; j++) { 5077 tLen = u_strlen(tailorData3[j]); 5078 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100); 5079 5080 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) { 5081 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 5082 for(i = 0; i<rLen; i++) { 5083 log_err(" %02X", resColl[i]); 5084 } 5085 } 5086 5087 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 5088 for(i = 0; i<rLen; i++) { 5089 log_verbose(" %02X", resColl[i]); 5090 } 5091 } 5092 ucol_close(coll); 5093 } 5094 5095 static void 5096 TestTailor6179(void) 5097 { 5098 UErrorCode status = U_ZERO_ERROR; 5099 int32_t i; 5100 UCollator *coll =NULL; 5101 uint8_t resColl[100]; 5102 int32_t rLen, tLen, ruleLen; 5103 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */ 5104 static const UChar rule1[]={ 5105 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79, 5106 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20, 5107 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20, 5108 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0}; 5109 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */ 5110 static const UChar rule2[]={ 5111 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61, 5112 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C, 5113 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E, 5114 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C, 5115 0x3C,0x3C,0x20,0x62,0}; 5116 5117 static const UChar tData1[][4]={ 5118 {0x61, 0}, 5119 {0x62, 0}, 5120 { 0xFDD0,0x009E, 0} 5121 }; 5122 static const UChar tData2[][4]={ 5123 {0x61, 0}, 5124 {0x62, 0}, 5125 { 0xFDD0,0x009E, 0} 5126 }; 5127 5128 /* 5129 * These values from FractionalUCA.txt will change, 5130 * and need to be updated here. 5131 */ 5132 static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0}; 5133 static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0}; 5134 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0}; 5135 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0}; 5136 5137 /* Test [Last Primary ignorable] */ 5138 5139 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b\n"); 5140 ruleLen = u_strlen(rule1); 5141 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5142 if (U_FAILURE(status)) { 5143 log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status)); 5144 return; 5145 } 5146 tLen = u_strlen(tData1[0]); 5147 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100); 5148 if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) { 5149 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen); 5150 for(i = 0; i<rLen; i++) { 5151 log_err(" %02X", resColl[i]); 5152 } 5153 log_err("\n"); 5154 } 5155 tLen = u_strlen(tData1[1]); 5156 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100); 5157 if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) { 5158 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen); 5159 for(i = 0; i<rLen; i++) { 5160 log_err(" %02X", resColl[i]); 5161 } 5162 log_err("\n"); 5163 } 5164 ucol_close(coll); 5165 5166 5167 /* Test [Last Secondary ignorable] */ 5168 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n"); 5169 ruleLen = u_strlen(rule1); 5170 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5171 if (U_FAILURE(status)) { 5172 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status)); 5173 return; 5174 } 5175 tLen = u_strlen(tData2[0]); 5176 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100); 5177 if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) { 5178 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen); 5179 for(i = 0; i<rLen; i++) { 5180 log_err(" %02X", resColl[i]); 5181 } 5182 log_err("\n"); 5183 } 5184 if(isICUVersionAtLeast(52, 0, 1)) { /* TODO: debug & fix, see ticket #8982 */ 5185 tLen = u_strlen(tData2[1]); 5186 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100); 5187 if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) { 5188 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen); 5189 for(i = 0; i<rLen; i++) { 5190 log_err(" %02X", resColl[i]); 5191 } 5192 log_err("\n"); 5193 } 5194 } 5195 ucol_close(coll); 5196 } 5197 5198 static void 5199 TestUCAPrecontext(void) 5200 { 5201 UErrorCode status = U_ZERO_ERROR; 5202 int32_t i, j; 5203 UCollator *coll =NULL; 5204 uint8_t resColl[100], prevColl[100]; 5205 int32_t rLen, tLen, ruleLen; 5206 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */ 5207 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0}; 5208 /* & l middle-dot << a a is an expansion. */ 5209 5210 UChar tData1[][20]={ 5211 { 0xb7, 0}, /* standalone middle dot(0xb7) */ 5212 { 0x387, 0}, /* standalone middle dot(0x387) */ 5213 { 0x61, 0}, /* a */ 5214 { 0x6C, 0}, /* l */ 5215 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */ 5216 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */ 5217 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */ 5218 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */ 5219 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */ 5220 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */ 5221 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */ 5222 }; 5223 5224 log_verbose("\n\nEN collation:"); 5225 coll = ucol_open("en", &status); 5226 if (U_FAILURE(status)) { 5227 log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status)); 5228 return; 5229 } 5230 for (j=0; j<11; j++) { 5231 tLen = u_strlen(tData1[j]); 5232 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 5233 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 5234 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 5235 j, tData1[j]); 5236 } 5237 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 5238 for(i = 0; i<rLen; i++) { 5239 log_verbose(" %02X", resColl[i]); 5240 } 5241 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 5242 } 5243 ucol_close(coll); 5244 5245 5246 log_verbose("\n\nJA collation:"); 5247 coll = ucol_open("ja", &status); 5248 if (U_FAILURE(status)) { 5249 log_err("Tailoring test: &z <<a|- failed!"); 5250 return; 5251 } 5252 for (j=0; j<11; j++) { 5253 tLen = u_strlen(tData1[j]); 5254 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 5255 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 5256 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 5257 j, tData1[j]); 5258 } 5259 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 5260 for(i = 0; i<rLen; i++) { 5261 log_verbose(" %02X", resColl[i]); 5262 } 5263 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 5264 } 5265 ucol_close(coll); 5266 5267 5268 log_verbose("\n\nTailoring test: & middle dot < a "); 5269 ruleLen = u_strlen(rule1); 5270 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5271 if (U_FAILURE(status)) { 5272 log_err("Tailoring test: & middle dot < a failed!"); 5273 return; 5274 } 5275 for (j=0; j<11; j++) { 5276 tLen = u_strlen(tData1[j]); 5277 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 5278 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 5279 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 5280 j, tData1[j]); 5281 } 5282 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 5283 for(i = 0; i<rLen; i++) { 5284 log_verbose(" %02X", resColl[i]); 5285 } 5286 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 5287 } 5288 ucol_close(coll); 5289 5290 5291 log_verbose("\n\nTailoring test: & l middle-dot << a "); 5292 ruleLen = u_strlen(rule2); 5293 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5294 if (U_FAILURE(status)) { 5295 log_err("Tailoring test: & l middle-dot << a failed!"); 5296 return; 5297 } 5298 for (j=0; j<11; j++) { 5299 tLen = u_strlen(tData1[j]); 5300 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 5301 if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 5302 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 5303 j, tData1[j]); 5304 } 5305 if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) { 5306 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.", 5307 j, tData1[j]); 5308 } 5309 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 5310 for(i = 0; i<rLen; i++) { 5311 log_verbose(" %02X", resColl[i]); 5312 } 5313 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 5314 } 5315 ucol_close(coll); 5316 } 5317 5318 static void 5319 TestOutOfBuffer5468(void) 5320 { 5321 static const char *test = "\\u4e00"; 5322 UChar ustr[256]; 5323 int32_t ustr_length = u_unescape(test, ustr, 256); 5324 unsigned char shortKeyBuf[1]; 5325 int32_t sortkey_length; 5326 UErrorCode status = U_ZERO_ERROR; 5327 static UCollator *coll = NULL; 5328 5329 coll = ucol_open("root", &status); 5330 if(U_FAILURE(status)) { 5331 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); 5332 return; 5333 } 5334 ucol_setStrength(coll, UCOL_PRIMARY); 5335 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); 5336 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 5337 if (U_FAILURE(status)) { 5338 log_err("Failed setting atributes\n"); 5339 return; 5340 } 5341 5342 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf)); 5343 if (sortkey_length != 4) { 5344 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length); 5345 } 5346 log_verbose("length of sortKey is %d", sortkey_length); 5347 ucol_close(coll); 5348 } 5349 5350 #define TSKC_DATA_SIZE 5 5351 #define TSKC_BUF_SIZE 50 5352 static void 5353 TestSortKeyConsistency(void) 5354 { 5355 UErrorCode icuRC = U_ZERO_ERROR; 5356 UCollator* ucol; 5357 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD}; 5358 5359 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE]; 5360 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE]; 5361 int32_t i, j, i2; 5362 5363 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC); 5364 if (U_FAILURE(icuRC)) 5365 { 5366 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC)); 5367 return; 5368 } 5369 5370 for (i = 0; i < TSKC_DATA_SIZE; i++) 5371 { 5372 UCharIterator uiter; 5373 uint32_t state[2] = { 0, 0 }; 5374 int32_t dataLen = i+1; 5375 for (j=0; j<TSKC_BUF_SIZE; j++) 5376 bufFull[i][j] = bufPart[i][j] = 0; 5377 5378 /* Full sort key */ 5379 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE); 5380 5381 /* Partial sort key */ 5382 uiter_setString(&uiter, data, dataLen); 5383 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC); 5384 if (U_FAILURE(icuRC)) 5385 { 5386 log_err("ucol_nextSortKeyPart failed\n"); 5387 ucol_close(ucol); 5388 return; 5389 } 5390 5391 for (i2=0; i2<i; i2++) 5392 { 5393 UBool fullMatch = TRUE; 5394 UBool partMatch = TRUE; 5395 for (j=0; j<TSKC_BUF_SIZE; j++) 5396 { 5397 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]); 5398 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]); 5399 } 5400 if (fullMatch != partMatch) { 5401 log_err(fullMatch ? "full key was consistent, but partial key changed\n" 5402 : "partial key was consistent, but full key changed\n"); 5403 ucol_close(ucol); 5404 return; 5405 } 5406 } 5407 } 5408 5409 /*=============================================*/ 5410 ucol_close(ucol); 5411 } 5412 5413 /* ticket: 6101 */ 5414 static void TestCroatianSortKey(void) { 5415 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3"; 5416 UErrorCode status = U_ZERO_ERROR; 5417 UCollator *ucol; 5418 UCharIterator iter; 5419 5420 static const UChar text[] = { 0x0044, 0xD81A }; 5421 5422 size_t length = sizeof(text)/sizeof(*text); 5423 5424 uint8_t textSortKey[32]; 5425 size_t lenSortKey = 32; 5426 size_t actualSortKeyLen; 5427 uint32_t uStateInfo[2] = { 0, 0 }; 5428 5429 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status); 5430 if (U_FAILURE(status)) { 5431 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status)); 5432 return; 5433 } 5434 5435 uiter_setString(&iter, text, length); 5436 5437 actualSortKeyLen = ucol_nextSortKeyPart( 5438 ucol, &iter, (uint32_t*)uStateInfo, 5439 textSortKey, lenSortKey, &status 5440 ); 5441 5442 if (actualSortKeyLen == lenSortKey) { 5443 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n"); 5444 } 5445 5446 ucol_close(ucol); 5447 } 5448 5449 /* ticket: 6140 */ 5450 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since 5451 * they are both Hiragana and Katakana 5452 */ 5453 #define SORTKEYLEN 50 5454 static void TestHiragana(void) { 5455 UErrorCode status = U_ZERO_ERROR; 5456 UCollator* ucol; 5457 UCollationResult strcollresult; 5458 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */ 5459 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 }; 5460 int32_t data1Len = sizeof(data1)/sizeof(*data1); 5461 int32_t data2Len = sizeof(data2)/sizeof(*data2); 5462 int32_t i, j; 5463 uint8_t sortKey1[SORTKEYLEN]; 5464 uint8_t sortKey2[SORTKEYLEN]; 5465 5466 UCharIterator uiter1; 5467 UCharIterator uiter2; 5468 uint32_t state1[2] = { 0, 0 }; 5469 uint32_t state2[2] = { 0, 0 }; 5470 int32_t keySize1; 5471 int32_t keySize2; 5472 5473 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL, 5474 &status); 5475 if (U_FAILURE(status)) { 5476 log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status)); 5477 return; 5478 } 5479 5480 /* Start of full sort keys */ 5481 /* Full sort key1 */ 5482 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN); 5483 /* Full sort key2 */ 5484 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN); 5485 if (keySize1 == keySize2) { 5486 for (i = 0; i < keySize1; i++) { 5487 if (sortKey1[i] != sortKey2[i]) { 5488 log_err("Full sort keys are different. Should be equal."); 5489 } 5490 } 5491 } else { 5492 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2); 5493 } 5494 /* End of full sort keys */ 5495 5496 /* Start of partial sort keys */ 5497 /* Partial sort key1 */ 5498 uiter_setString(&uiter1, data1, data1Len); 5499 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status); 5500 /* Partial sort key2 */ 5501 uiter_setString(&uiter2, data2, data2Len); 5502 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status); 5503 if (U_SUCCESS(status) && keySize1 == keySize2) { 5504 for (j = 0; j < keySize1; j++) { 5505 if (sortKey1[j] != sortKey2[j]) { 5506 log_err("Partial sort keys are different. Should be equal"); 5507 } 5508 } 5509 } else { 5510 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2); 5511 } 5512 /* End of partial sort keys */ 5513 5514 /* Start of strcoll */ 5515 /* Use ucol_strcoll() to determine ordering */ 5516 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len); 5517 if (strcollresult != UCOL_EQUAL) { 5518 log_err("Result from ucol_strcoll() should be UCOL_EQUAL."); 5519 } 5520 5521 ucol_close(ucol); 5522 } 5523 5524 /* Convenient struct for running collation tests */ 5525 typedef struct { 5526 const UChar source[MAX_TOKEN_LEN]; /* String on left */ 5527 const UChar target[MAX_TOKEN_LEN]; /* String on right */ 5528 UCollationResult result; /* -1, 0 or +1, depending on collation */ 5529 } OneTestCase; 5530 5531 /* 5532 * Utility function to test one collation test case. 5533 * @param testcases Array of test cases. 5534 * @param n_testcases Size of the array testcases. 5535 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats. 5536 * @param n_rules Size of the array str_rules. 5537 */ 5538 static void doTestOneTestCase(const OneTestCase testcases[], 5539 int n_testcases, 5540 const char* str_rules[], 5541 int n_rules) 5542 { 5543 int rule_no, testcase_no; 5544 UChar rule[500]; 5545 int32_t length = 0; 5546 UErrorCode status = U_ZERO_ERROR; 5547 UParseError parse_error; 5548 UCollator *myCollation; 5549 5550 for (rule_no = 0; rule_no < n_rules; ++rule_no) { 5551 5552 length = u_unescape(str_rules[rule_no], rule, 500); 5553 if (length == 0) { 5554 log_err("ERROR: The rule cannot be unescaped: %s\n"); 5555 return; 5556 } 5557 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status); 5558 if(U_FAILURE(status)){ 5559 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 5560 return; 5561 } 5562 log_verbose("Testing the <<* syntax\n"); 5563 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 5564 ucol_setStrength(myCollation, UCOL_TERTIARY); 5565 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) { 5566 doTest(myCollation, 5567 testcases[testcase_no].source, 5568 testcases[testcase_no].target, 5569 testcases[testcase_no].result 5570 ); 5571 } 5572 ucol_close(myCollation); 5573 } 5574 } 5575 5576 const static OneTestCase rangeTestcases[] = { 5577 { {0x0061}, {0x0062}, UCOL_LESS }, /* "a" < "b" */ 5578 { {0x0062}, {0x0063}, UCOL_LESS }, /* "b" < "c" */ 5579 { {0x0061}, {0x0063}, UCOL_LESS }, /* "a" < "c" */ 5580 5581 { {0x0062}, {0x006b}, UCOL_LESS }, /* "b" << "k" */ 5582 { {0x006b}, {0x006c}, UCOL_LESS }, /* "k" << "l" */ 5583 { {0x0062}, {0x006c}, UCOL_LESS }, /* "b" << "l" */ 5584 { {0x0061}, {0x006c}, UCOL_LESS }, /* "a" < "l" */ 5585 { {0x0061}, {0x006d}, UCOL_LESS }, /* "a" < "m" */ 5586 5587 { {0x0079}, {0x006d}, UCOL_LESS }, /* "y" < "f" */ 5588 { {0x0079}, {0x0067}, UCOL_LESS }, /* "y" < "g" */ 5589 { {0x0061}, {0x0068}, UCOL_LESS }, /* "y" < "h" */ 5590 { {0x0061}, {0x0065}, UCOL_LESS }, /* "g" < "e" */ 5591 5592 { {0x0061}, {0x0031}, UCOL_EQUAL }, /* "a" = "1" */ 5593 { {0x0061}, {0x0032}, UCOL_EQUAL }, /* "a" = "2" */ 5594 { {0x0061}, {0x0033}, UCOL_EQUAL }, /* "a" = "3" */ 5595 { {0x0061}, {0x0066}, UCOL_LESS }, /* "a" < "f" */ 5596 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS }, /* "la" < "123" */ 5597 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL }, /* "aaa" = "123" */ 5598 { {0x0062}, {0x007a}, UCOL_LESS }, /* "b" < "z" */ 5599 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS }, /* "azm" = "2yc" */ 5600 }; 5601 5602 static int nRangeTestcases = LEN(rangeTestcases); 5603 5604 const static OneTestCase rangeTestcasesSupplemental[] = { 5605 { {0xfffe}, {0xffff}, UCOL_LESS }, /* U+FFFE < U+FFFF */ 5606 { {0xffff}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFF < U+10000 */ 5607 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+10000 < U+10001 */ 5608 { {0xfffe}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+FFFE < U+10001 */ 5609 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */ 5610 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */ 5611 { {0xfffe}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+FFFE < U+10001 */ 5612 }; 5613 5614 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental); 5615 5616 const static OneTestCase rangeTestcasesQwerty[] = { 5617 { {0x0071}, {0x0077}, UCOL_LESS }, /* "q" < "w" */ 5618 { {0x0077}, {0x0065}, UCOL_LESS }, /* "w" < "e" */ 5619 5620 { {0x0079}, {0x0075}, UCOL_LESS }, /* "y" < "u" */ 5621 { {0x0071}, {0x0075}, UCOL_LESS }, /* "q" << "u" */ 5622 5623 { {0x0074}, {0x0069}, UCOL_LESS }, /* "t" << "i" */ 5624 { {0x006f}, {0x0070}, UCOL_LESS }, /* "o" << "p" */ 5625 5626 { {0x0079}, {0x0065}, UCOL_LESS }, /* "y" < "e" */ 5627 { {0x0069}, {0x0075}, UCOL_LESS }, /* "i" < "u" */ 5628 5629 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, 5630 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS }, /* "quest" < "were" */ 5631 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b}, 5632 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS }, /* "quack" < "quest" */ 5633 }; 5634 5635 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty); 5636 5637 static void TestSameStrengthList(void) 5638 { 5639 const char* strRules[] = { 5640 /* Normal */ 5641 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3", 5642 5643 /* Lists */ 5644 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123", 5645 }; 5646 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); 5647 } 5648 5649 static void TestSameStrengthListQuoted(void) 5650 { 5651 const char* strRules[] = { 5652 /* Lists with quoted characters */ 5653 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123", 5654 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123", 5655 5656 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033", 5657 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'", 5658 5659 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033", 5660 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'", 5661 }; 5662 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); 5663 } 5664 5665 static void TestSameStrengthListSupplemental(void) 5666 { 5667 const char* strRules[] = { 5668 "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002", 5669 "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02", 5670 "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002", 5671 "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02", 5672 }; 5673 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules)); 5674 } 5675 5676 static void TestSameStrengthListQwerty(void) 5677 { 5678 const char* strRules[] = { 5679 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */ 5680 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */ 5681 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064", 5682 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064", 5683 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064", 5684 5685 /* Quoted characters also will work if two quoted characters are not consecutive. */ 5686 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064", 5687 5688 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */ 5689 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/ 5690 5691 }; 5692 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules)); 5693 } 5694 5695 static void TestSameStrengthListQuotedQwerty(void) 5696 { 5697 const char* strRules[] = { 5698 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */ 5699 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */ 5700 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */ 5701 5702 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */ 5703 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */ 5704 }; 5705 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules)); 5706 } 5707 5708 static void TestSameStrengthListRanges(void) 5709 { 5710 const char* strRules[] = { 5711 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3", 5712 }; 5713 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); 5714 } 5715 5716 static void TestSameStrengthListSupplementalRanges(void) 5717 { 5718 const char* strRules[] = { 5719 "&\\ufffe<*\\uffff-\\U00010002", 5720 }; 5721 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules)); 5722 } 5723 5724 static void TestSpecialCharacters(void) 5725 { 5726 const char* strRules[] = { 5727 /* Normal */ 5728 "&';'<'+'<','<'-'<'&'<'*'", 5729 5730 /* List */ 5731 "&';'<*'+,-&*'", 5732 5733 /* Range */ 5734 "&';'<*'+'-'-&*'", 5735 }; 5736 5737 const static OneTestCase specialCharacterStrings[] = { 5738 { {0x003b}, {0x002b}, UCOL_LESS }, /* ; < + */ 5739 { {0x002b}, {0x002c}, UCOL_LESS }, /* + < , */ 5740 { {0x002c}, {0x002d}, UCOL_LESS }, /* , < - */ 5741 { {0x002d}, {0x0026}, UCOL_LESS }, /* - < & */ 5742 }; 5743 doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules)); 5744 } 5745 5746 static void TestPrivateUseCharacters(void) 5747 { 5748 const char* strRules[] = { 5749 /* Normal */ 5750 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'", 5751 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d", 5752 }; 5753 5754 const static OneTestCase privateUseCharacterStrings[] = { 5755 { {0x5ea7}, {0xe2d8}, UCOL_LESS }, 5756 { {0xe2d8}, {0xe2d9}, UCOL_LESS }, 5757 { {0xe2d9}, {0xe2da}, UCOL_LESS }, 5758 { {0xe2da}, {0xe2db}, UCOL_LESS }, 5759 { {0xe2db}, {0xe2dc}, UCOL_LESS }, 5760 { {0xe2dc}, {0x4e8d}, UCOL_LESS }, 5761 }; 5762 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 5763 } 5764 5765 static void TestPrivateUseCharactersInList(void) 5766 { 5767 const char* strRules[] = { 5768 /* List */ 5769 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'", 5770 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */ 5771 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d", 5772 }; 5773 5774 const static OneTestCase privateUseCharacterStrings[] = { 5775 { {0x5ea7}, {0xe2d8}, UCOL_LESS }, 5776 { {0xe2d8}, {0xe2d9}, UCOL_LESS }, 5777 { {0xe2d9}, {0xe2da}, UCOL_LESS }, 5778 { {0xe2da}, {0xe2db}, UCOL_LESS }, 5779 { {0xe2db}, {0xe2dc}, UCOL_LESS }, 5780 { {0xe2dc}, {0x4e8d}, UCOL_LESS }, 5781 }; 5782 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 5783 } 5784 5785 static void TestPrivateUseCharactersInRange(void) 5786 { 5787 const char* strRules[] = { 5788 /* Range */ 5789 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'", 5790 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d", 5791 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */ 5792 }; 5793 5794 const static OneTestCase privateUseCharacterStrings[] = { 5795 { {0x5ea7}, {0xe2d8}, UCOL_LESS }, 5796 { {0xe2d8}, {0xe2d9}, UCOL_LESS }, 5797 { {0xe2d9}, {0xe2da}, UCOL_LESS }, 5798 { {0xe2da}, {0xe2db}, UCOL_LESS }, 5799 { {0xe2db}, {0xe2dc}, UCOL_LESS }, 5800 { {0xe2dc}, {0x4e8d}, UCOL_LESS }, 5801 }; 5802 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 5803 } 5804 5805 static void TestInvalidListsAndRanges(void) 5806 { 5807 const char* invalidRules[] = { 5808 /* Range not in starred expression */ 5809 "&\\ufffe<\\uffff-\\U00010002", 5810 5811 /* Range without start */ 5812 "&a<*-c", 5813 5814 /* Range without end */ 5815 "&a<*b-", 5816 5817 /* More than one hyphen */ 5818 "&a<*b-g-l", 5819 5820 /* Range in the wrong order */ 5821 "&a<*k-b", 5822 5823 }; 5824 5825 UChar rule[500]; 5826 UErrorCode status = U_ZERO_ERROR; 5827 UParseError parse_error; 5828 int n_rules = LEN(invalidRules); 5829 int rule_no; 5830 int length; 5831 UCollator *myCollation; 5832 5833 for (rule_no = 0; rule_no < n_rules; ++rule_no) { 5834 5835 length = u_unescape(invalidRules[rule_no], rule, 500); 5836 if (length == 0) { 5837 log_err("ERROR: The rule cannot be unescaped: %s\n"); 5838 return; 5839 } 5840 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status); 5841 if(!U_FAILURE(status)){ 5842 log_err("ERROR: Could not cause a failure as expected: \n"); 5843 } 5844 status = U_ZERO_ERROR; 5845 } 5846 } 5847 5848 /* 5849 * This test ensures that characters placed before a character in a different script have the same lead byte 5850 * in their collation key before and after script reordering. 5851 */ 5852 static void TestBeforeRuleWithScriptReordering(void) 5853 { 5854 UParseError error; 5855 UErrorCode status = U_ZERO_ERROR; 5856 UCollator *myCollation; 5857 char srules[500] = "&[before 1]\\u03b1 < \\u0e01"; 5858 UChar rules[500]; 5859 uint32_t rulesLength = 0; 5860 int32_t reorderCodes[1] = {USCRIPT_GREEK}; 5861 UCollationResult collResult; 5862 5863 uint8_t baseKey[256]; 5864 uint32_t baseKeyLength; 5865 uint8_t beforeKey[256]; 5866 uint32_t beforeKeyLength; 5867 5868 UChar base[] = { 0x03b1 }; /* base */ 5869 int32_t baseLen = sizeof(base)/sizeof(*base); 5870 5871 UChar before[] = { 0x0e01 }; /* ko kai */ 5872 int32_t beforeLen = sizeof(before)/sizeof(*before); 5873 5874 /*UChar *data[] = { before, base }; 5875 genericRulesStarter(srules, data, 2);*/ 5876 5877 log_verbose("Testing the &[before 1] rule with [reorder grek]\n"); 5878 5879 5880 /* build collator */ 5881 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n"); 5882 5883 rulesLength = u_unescape(srules, rules, LEN(rules)); 5884 myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status); 5885 if(U_FAILURE(status)) { 5886 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 5887 return; 5888 } 5889 5890 /* check collation results - before rule applied but not script reordering */ 5891 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen); 5892 if (collResult != UCOL_GREATER) { 5893 log_err("Collation result not correct before script reordering = %d\n", collResult); 5894 } 5895 5896 /* check the lead byte of the collation keys before script reordering */ 5897 baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256); 5898 beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256); 5899 if (baseKey[0] != beforeKey[0]) { 5900 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]); 5901 } 5902 5903 /* reorder the scripts */ 5904 ucol_setReorderCodes(myCollation, reorderCodes, 1, &status); 5905 if(U_FAILURE(status)) { 5906 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status)); 5907 return; 5908 } 5909 5910 /* check collation results - before rule applied and after script reordering */ 5911 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen); 5912 if (collResult != UCOL_GREATER) { 5913 log_err("Collation result not correct after script reordering = %d\n", collResult); 5914 } 5915 5916 /* check the lead byte of the collation keys after script reordering */ 5917 ucol_getSortKey(myCollation, base, baseLen, baseKey, 256); 5918 ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256); 5919 if (baseKey[0] != beforeKey[0]) { 5920 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]); 5921 } 5922 5923 ucol_close(myCollation); 5924 } 5925 5926 /* 5927 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering. 5928 */ 5929 static void TestNonLeadBytesDuringCollationReordering(void) 5930 { 5931 UErrorCode status = U_ZERO_ERROR; 5932 UCollator *myCollation; 5933 int32_t reorderCodes[1] = {USCRIPT_GREEK}; 5934 5935 uint8_t baseKey[256]; 5936 uint32_t baseKeyLength; 5937 uint8_t reorderKey[256]; 5938 uint32_t reorderKeyLength; 5939 5940 UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 }; 5941 5942 uint32_t i; 5943 5944 5945 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n"); 5946 5947 /* build collator tertiary */ 5948 myCollation = ucol_open("", &status); 5949 ucol_setStrength(myCollation, UCOL_TERTIARY); 5950 if(U_FAILURE(status)) { 5951 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 5952 return; 5953 } 5954 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256); 5955 5956 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 5957 if(U_FAILURE(status)) { 5958 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 5959 return; 5960 } 5961 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256); 5962 5963 if (baseKeyLength != reorderKeyLength) { 5964 log_err("Key lengths not the same during reordering.\n"); 5965 return; 5966 } 5967 5968 for (i = 1; i < baseKeyLength; i++) { 5969 if (baseKey[i] != reorderKey[i]) { 5970 log_err("Collation key bytes not the same at position %d.\n", i); 5971 return; 5972 } 5973 } 5974 ucol_close(myCollation); 5975 5976 /* build collator quaternary */ 5977 myCollation = ucol_open("", &status); 5978 ucol_setStrength(myCollation, UCOL_QUATERNARY); 5979 if(U_FAILURE(status)) { 5980 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 5981 return; 5982 } 5983 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256); 5984 5985 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 5986 if(U_FAILURE(status)) { 5987 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 5988 return; 5989 } 5990 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256); 5991 5992 if (baseKeyLength != reorderKeyLength) { 5993 log_err("Key lengths not the same during reordering.\n"); 5994 return; 5995 } 5996 5997 for (i = 1; i < baseKeyLength; i++) { 5998 if (baseKey[i] != reorderKey[i]) { 5999 log_err("Collation key bytes not the same at position %d.\n", i); 6000 return; 6001 } 6002 } 6003 ucol_close(myCollation); 6004 } 6005 6006 /* 6007 * Test reordering API. 6008 */ 6009 static void TestReorderingAPI(void) 6010 { 6011 UErrorCode status = U_ZERO_ERROR; 6012 UCollator *myCollation; 6013 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; 6014 int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS}; 6015 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; 6016 UCollationResult collResult; 6017 int32_t retrievedReorderCodesLength; 6018 int32_t retrievedReorderCodes[10]; 6019 UChar greekString[] = { 0x03b1 }; 6020 UChar punctuationString[] = { 0x203e }; 6021 int loopIndex; 6022 6023 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n"); 6024 6025 /* build collator tertiary */ 6026 myCollation = ucol_open("", &status); 6027 ucol_setStrength(myCollation, UCOL_TERTIARY); 6028 if(U_FAILURE(status)) { 6029 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 6030 return; 6031 } 6032 6033 /* set the reorderding */ 6034 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 6035 if (U_FAILURE(status)) { 6036 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 6037 return; 6038 } 6039 6040 /* get the reordering */ 6041 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status); 6042 if (status != U_BUFFER_OVERFLOW_ERROR) { 6043 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status)); 6044 return; 6045 } 6046 status = U_ZERO_ERROR; 6047 if (retrievedReorderCodesLength != LEN(reorderCodes)) { 6048 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); 6049 return; 6050 } 6051 /* now let's really get it */ 6052 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status); 6053 if (U_FAILURE(status)) { 6054 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status)); 6055 return; 6056 } 6057 if (retrievedReorderCodesLength != LEN(reorderCodes)) { 6058 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); 6059 return; 6060 } 6061 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { 6062 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) { 6063 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex); 6064 return; 6065 } 6066 } 6067 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString)); 6068 if (collResult != UCOL_LESS) { 6069 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n"); 6070 return; 6071 } 6072 6073 /* clear the reordering */ 6074 ucol_setReorderCodes(myCollation, NULL, 0, &status); 6075 if (U_FAILURE(status)) { 6076 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status)); 6077 return; 6078 } 6079 6080 /* get the reordering again */ 6081 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status); 6082 if (retrievedReorderCodesLength != 0) { 6083 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0); 6084 return; 6085 } 6086 6087 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString)); 6088 if (collResult != UCOL_GREATER) { 6089 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n"); 6090 return; 6091 } 6092 6093 /* test for error condition on duplicate reorder codes */ 6094 ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status); 6095 if (!U_FAILURE(status)) { 6096 log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n"); 6097 return; 6098 } 6099 6100 status = U_ZERO_ERROR; 6101 /* test for reorder codes after a reset code */ 6102 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status); 6103 if (!U_FAILURE(status)) { 6104 log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n"); 6105 return; 6106 } 6107 6108 ucol_close(myCollation); 6109 } 6110 6111 /* 6112 * Test reordering API. 6113 */ 6114 static void TestReorderingAPIWithRuleCreatedCollator(void) 6115 { 6116 UErrorCode status = U_ZERO_ERROR; 6117 UCollator *myCollation; 6118 UChar rules[90]; 6119 int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK}; 6120 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; 6121 UCollationResult collResult; 6122 int32_t retrievedReorderCodesLength; 6123 int32_t retrievedReorderCodes[10]; 6124 UChar greekString[] = { 0x03b1 }; 6125 UChar punctuationString[] = { 0x203e }; 6126 UChar hanString[] = { 0x65E5, 0x672C }; 6127 int loopIndex; 6128 6129 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n"); 6130 6131 /* build collator from rules */ 6132 u_uastrcpy(rules, "[reorder Hani Grek]"); 6133 myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status); 6134 if(U_FAILURE(status)) { 6135 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 6136 return; 6137 } 6138 6139 /* get the reordering */ 6140 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status); 6141 if (U_FAILURE(status)) { 6142 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status)); 6143 return; 6144 } 6145 if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) { 6146 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes)); 6147 return; 6148 } 6149 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { 6150 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) { 6151 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex); 6152 return; 6153 } 6154 } 6155 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString)); 6156 if (collResult != UCOL_GREATER) { 6157 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n"); 6158 return; 6159 } 6160 6161 6162 /* set the reorderding */ 6163 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 6164 if (U_FAILURE(status)) { 6165 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 6166 return; 6167 } 6168 6169 /* get the reordering */ 6170 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status); 6171 if (status != U_BUFFER_OVERFLOW_ERROR) { 6172 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status)); 6173 return; 6174 } 6175 status = U_ZERO_ERROR; 6176 if (retrievedReorderCodesLength != LEN(reorderCodes)) { 6177 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); 6178 return; 6179 } 6180 /* now let's really get it */ 6181 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status); 6182 if (U_FAILURE(status)) { 6183 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status)); 6184 return; 6185 } 6186 if (retrievedReorderCodesLength != LEN(reorderCodes)) { 6187 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); 6188 return; 6189 } 6190 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { 6191 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) { 6192 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex); 6193 return; 6194 } 6195 } 6196 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString)); 6197 if (collResult != UCOL_LESS) { 6198 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n"); 6199 return; 6200 } 6201 6202 /* clear the reordering */ 6203 ucol_setReorderCodes(myCollation, NULL, 0, &status); 6204 if (U_FAILURE(status)) { 6205 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status)); 6206 return; 6207 } 6208 6209 /* get the reordering again */ 6210 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status); 6211 if (retrievedReorderCodesLength != 0) { 6212 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0); 6213 return; 6214 } 6215 6216 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString)); 6217 if (collResult != UCOL_GREATER) { 6218 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n"); 6219 return; 6220 } 6221 6222 ucol_close(myCollation); 6223 } 6224 6225 static int compareUScriptCodes(const void * a, const void * b) 6226 { 6227 return ( *(int32_t*)a - *(int32_t*)b ); 6228 } 6229 6230 static void TestEquivalentReorderingScripts(void) { 6231 UErrorCode status = U_ZERO_ERROR; 6232 int32_t equivalentScripts[50]; 6233 int32_t equivalentScriptsLength; 6234 int loopIndex; 6235 int32_t equivalentScriptsResult[] = { 6236 USCRIPT_BOPOMOFO, 6237 USCRIPT_LISU, 6238 USCRIPT_LYCIAN, 6239 USCRIPT_CARIAN, 6240 USCRIPT_LYDIAN, 6241 USCRIPT_YI, 6242 USCRIPT_OLD_ITALIC, 6243 USCRIPT_GOTHIC, 6244 USCRIPT_DESERET, 6245 USCRIPT_SHAVIAN, 6246 USCRIPT_OSMANYA, 6247 USCRIPT_LINEAR_B, 6248 USCRIPT_CYPRIOT, 6249 USCRIPT_OLD_SOUTH_ARABIAN, 6250 USCRIPT_AVESTAN, 6251 USCRIPT_IMPERIAL_ARAMAIC, 6252 USCRIPT_INSCRIPTIONAL_PARTHIAN, 6253 USCRIPT_INSCRIPTIONAL_PAHLAVI, 6254 USCRIPT_UGARITIC, 6255 USCRIPT_OLD_PERSIAN, 6256 USCRIPT_CUNEIFORM, 6257 USCRIPT_EGYPTIAN_HIEROGLYPHS, 6258 USCRIPT_PHONETIC_POLLARD, 6259 USCRIPT_SORA_SOMPENG, 6260 USCRIPT_MEROITIC_CURSIVE, 6261 USCRIPT_MEROITIC_HIEROGLYPHS 6262 }; 6263 6264 qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes); 6265 6266 /* UScript.GOTHIC */ 6267 equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status); 6268 if (U_FAILURE(status)) { 6269 log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status)); 6270 return; 6271 } 6272 /* 6273 fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); 6274 fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength); 6275 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { 6276 fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]); 6277 } 6278 */ 6279 if (equivalentScriptsLength != LEN(equivalentScriptsResult)) { 6280 log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength); 6281 return; 6282 } 6283 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { 6284 if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) { 6285 log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]); 6286 return; 6287 } 6288 } 6289 6290 /* UScript.SHAVIAN */ 6291 equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status); 6292 if (U_FAILURE(status)) { 6293 log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status)); 6294 return; 6295 } 6296 if (equivalentScriptsLength != LEN(equivalentScriptsResult)) { 6297 log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength); 6298 return; 6299 } 6300 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { 6301 if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) { 6302 log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]); 6303 return; 6304 } 6305 } 6306 } 6307 6308 static void TestReorderingAcrossCloning(void) 6309 { 6310 UErrorCode status = U_ZERO_ERROR; 6311 UCollator *myCollation; 6312 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; 6313 UCollator *clonedCollation; 6314 int32_t bufferSize; 6315 int32_t retrievedReorderCodesLength; 6316 int32_t retrievedReorderCodes[10]; 6317 int loopIndex; 6318 6319 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n"); 6320 6321 /* build collator tertiary */ 6322 myCollation = ucol_open("", &status); 6323 ucol_setStrength(myCollation, UCOL_TERTIARY); 6324 if(U_FAILURE(status)) { 6325 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 6326 return; 6327 } 6328 6329 /* set the reorderding */ 6330 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 6331 if (U_FAILURE(status)) { 6332 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 6333 return; 6334 } 6335 6336 /* clone the collator */ 6337 clonedCollation = ucol_safeClone(myCollation, NULL, &bufferSize, &status); 6338 if (U_FAILURE(status)) { 6339 log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status)); 6340 return; 6341 } 6342 6343 /* get the reordering */ 6344 retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status); 6345 if (U_FAILURE(status)) { 6346 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status)); 6347 return; 6348 } 6349 if (retrievedReorderCodesLength != LEN(reorderCodes)) { 6350 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); 6351 return; 6352 } 6353 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { 6354 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) { 6355 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex); 6356 return; 6357 } 6358 } 6359 6360 /*uprv_free(buffer);*/ 6361 ucol_close(myCollation); 6362 ucol_close(clonedCollation); 6363 } 6364 6365 /* 6366 * Utility function to test one collation reordering test case set. 6367 * @param testcases Array of test cases. 6368 * @param n_testcases Size of the array testcases. 6369 * @param reorderTokens Array of reordering codes. 6370 * @param reorderTokensLen Size of the array reorderTokens. 6371 */ 6372 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen) 6373 { 6374 uint32_t testCaseNum; 6375 UErrorCode status = U_ZERO_ERROR; 6376 UCollator *myCollation; 6377 6378 myCollation = ucol_open("", &status); 6379 if (U_FAILURE(status)) { 6380 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 6381 return; 6382 } 6383 ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status); 6384 if(U_FAILURE(status)) { 6385 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status)); 6386 return; 6387 } 6388 6389 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) { 6390 doTest(myCollation, 6391 testCases[testCaseNum].source, 6392 testCases[testCaseNum].target, 6393 testCases[testCaseNum].result 6394 ); 6395 } 6396 ucol_close(myCollation); 6397 } 6398 6399 static void TestGreekFirstReorder(void) 6400 { 6401 const char* strRules[] = { 6402 "[reorder Grek]" 6403 }; 6404 6405 const int32_t apiRules[] = { 6406 USCRIPT_GREEK 6407 }; 6408 6409 const static OneTestCase privateUseCharacterStrings[] = { 6410 { {0x0391}, {0x0391}, UCOL_EQUAL }, 6411 { {0x0041}, {0x0391}, UCOL_GREATER }, 6412 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER }, 6413 { {0x0060}, {0x0391}, UCOL_LESS }, 6414 { {0x0391}, {0xe2dc}, UCOL_LESS }, 6415 { {0x0391}, {0x0060}, UCOL_GREATER }, 6416 }; 6417 6418 /* Test rules creation */ 6419 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 6420 6421 /* Test collation reordering API */ 6422 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules)); 6423 } 6424 6425 static void TestGreekLastReorder(void) 6426 { 6427 const char* strRules[] = { 6428 "[reorder Zzzz Grek]" 6429 }; 6430 6431 const int32_t apiRules[] = { 6432 USCRIPT_UNKNOWN, USCRIPT_GREEK 6433 }; 6434 6435 const static OneTestCase privateUseCharacterStrings[] = { 6436 { {0x0391}, {0x0391}, UCOL_EQUAL }, 6437 { {0x0041}, {0x0391}, UCOL_LESS }, 6438 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS }, 6439 { {0x0060}, {0x0391}, UCOL_LESS }, 6440 { {0x0391}, {0xe2dc}, UCOL_GREATER }, 6441 }; 6442 6443 /* Test rules creation */ 6444 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 6445 6446 /* Test collation reordering API */ 6447 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules)); 6448 } 6449 6450 static void TestNonScriptReorder(void) 6451 { 6452 const char* strRules[] = { 6453 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]" 6454 }; 6455 6456 const int32_t apiRules[] = { 6457 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, 6458 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN, 6459 UCOL_REORDER_CODE_CURRENCY 6460 }; 6461 6462 const static OneTestCase privateUseCharacterStrings[] = { 6463 { {0x0391}, {0x0041}, UCOL_LESS }, 6464 { {0x0041}, {0x0391}, UCOL_GREATER }, 6465 { {0x0060}, {0x0041}, UCOL_LESS }, 6466 { {0x0060}, {0x0391}, UCOL_GREATER }, 6467 { {0x0024}, {0x0041}, UCOL_GREATER }, 6468 }; 6469 6470 /* Test rules creation */ 6471 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 6472 6473 /* Test collation reordering API */ 6474 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules)); 6475 } 6476 6477 static void TestHaniReorder(void) 6478 { 6479 const char* strRules[] = { 6480 "[reorder Hani]" 6481 }; 6482 const int32_t apiRules[] = { 6483 USCRIPT_HAN 6484 }; 6485 6486 const static OneTestCase privateUseCharacterStrings[] = { 6487 { {0x4e00}, {0x0041}, UCOL_LESS }, 6488 { {0x4e00}, {0x0060}, UCOL_GREATER }, 6489 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS }, 6490 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER }, 6491 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS }, 6492 { {0xfa27}, {0x0041}, UCOL_LESS }, 6493 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS }, 6494 }; 6495 6496 /* Test rules creation */ 6497 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 6498 6499 /* Test collation reordering API */ 6500 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules)); 6501 } 6502 6503 static void TestHaniReorderWithOtherRules(void) 6504 { 6505 const char* strRules[] = { 6506 "[reorder Hani] &b<a" 6507 }; 6508 /*const int32_t apiRules[] = { 6509 USCRIPT_HAN 6510 };*/ 6511 6512 const static OneTestCase privateUseCharacterStrings[] = { 6513 { {0x4e00}, {0x0041}, UCOL_LESS }, 6514 { {0x4e00}, {0x0060}, UCOL_GREATER }, 6515 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS }, 6516 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER }, 6517 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS }, 6518 { {0xfa27}, {0x0041}, UCOL_LESS }, 6519 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS }, 6520 { {0x0062}, {0x0061}, UCOL_LESS }, 6521 }; 6522 6523 /* Test rules creation */ 6524 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 6525 } 6526 6527 static void TestMultipleReorder(void) 6528 { 6529 const char* strRules[] = { 6530 "[reorder Grek Zzzz DIGIT Latn Hani]" 6531 }; 6532 6533 const int32_t apiRules[] = { 6534 USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN 6535 }; 6536 6537 const static OneTestCase collationTestCases[] = { 6538 { {0x0391}, {0x0041}, UCOL_LESS}, 6539 { {0x0031}, {0x0041}, UCOL_LESS}, 6540 { {0x0041}, {0x4e00}, UCOL_LESS}, 6541 }; 6542 6543 /* Test rules creation */ 6544 doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules)); 6545 6546 /* Test collation reordering API */ 6547 doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules)); 6548 } 6549 6550 /* 6551 * Test that covers issue reported in ticket 8814 6552 */ 6553 static void TestReorderWithNumericCollation(void) 6554 { 6555 UErrorCode status = U_ZERO_ERROR; 6556 UCollator *myCollation; 6557 UCollator *myReorderCollation; 6558 int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS}; 6559 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 }; 6560 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */ 6561 UChar fortyS[] = { 0x0053 }; 6562 UChar fortyThreeP[] = { 0x0050 }; 6563 uint8_t fortyS_sortKey[128]; 6564 int32_t fortyS_sortKey_Length; 6565 uint8_t fortyThreeP_sortKey[128]; 6566 int32_t fortyThreeP_sortKey_Length; 6567 uint8_t fortyS_sortKey_reorder[128]; 6568 int32_t fortyS_sortKey_reorder_Length; 6569 uint8_t fortyThreeP_sortKey_reorder[128]; 6570 int32_t fortyThreeP_sortKey_reorder_Length; 6571 UCollationResult collResult; 6572 UCollationResult collResultReorder; 6573 6574 log_verbose("Testing reordering with and without numeric collation\n"); 6575 6576 /* build collator tertiary with numeric */ 6577 myCollation = ucol_open("", &status); 6578 /* 6579 ucol_setStrength(myCollation, UCOL_TERTIARY); 6580 */ 6581 ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); 6582 if(U_FAILURE(status)) { 6583 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 6584 return; 6585 } 6586 6587 /* build collator tertiary with numeric and reordering */ 6588 myReorderCollation = ucol_open("", &status); 6589 /* 6590 ucol_setStrength(myReorderCollation, UCOL_TERTIARY); 6591 */ 6592 ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); 6593 ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status); 6594 if(U_FAILURE(status)) { 6595 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 6596 return; 6597 } 6598 6599 fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128); 6600 fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128); 6601 fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128); 6602 fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128); 6603 6604 if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) { 6605 log_err_status(status, "ERROR: couldn't generate sort keys\n"); 6606 return; 6607 } 6608 collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP)); 6609 collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP)); 6610 /* 6611 fprintf(stderr, "\tcollResult = %x\n", collResult); 6612 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder); 6613 fprintf(stderr, "\nfortyS\n"); 6614 for (i = 0; i < fortyS_sortKey_Length; i++) { 6615 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]); 6616 } 6617 fprintf(stderr, "\nfortyThreeP\n"); 6618 for (i = 0; i < fortyThreeP_sortKey_Length; i++) { 6619 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]); 6620 } 6621 */ 6622 if (collResult != collResultReorder) { 6623 log_err_status(status, "ERROR: collation results should have been the same.\n"); 6624 return; 6625 } 6626 6627 ucol_close(myCollation); 6628 ucol_close(myReorderCollation); 6629 } 6630 6631 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b) 6632 { 6633 for (; *a == *b; ++a, ++b) { 6634 if (*a == 0) { 6635 return 0; 6636 } 6637 } 6638 return (*a < *b ? -1 : 1); 6639 } 6640 6641 static void TestImportRulesDeWithPhonebook(void) 6642 { 6643 const char* normalRules[] = { 6644 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc", 6645 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc", 6646 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc", 6647 }; 6648 const OneTestCase normalTests[] = { 6649 { {0x00e6}, {0x00c6}, UCOL_LESS}, 6650 { {0x00fc}, {0x00dc}, UCOL_GREATER}, 6651 }; 6652 6653 const char* importRules[] = { 6654 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]", 6655 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]", 6656 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]", 6657 }; 6658 const OneTestCase importTests[] = { 6659 { {0x00e6}, {0x00c6}, UCOL_LESS}, 6660 { {0x00fc}, {0x00dc}, UCOL_LESS}, 6661 }; 6662 6663 doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules)); 6664 // BEGIN Android-remove. Android does not use rule-based collation0 6665 // doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules)); 6666 // END Android-remove 6667 } 6668 6669 #if 0 6670 static void TestImportRulesFiWithEor(void) 6671 { 6672 /* DUCET. */ 6673 const char* defaultRules[] = { 6674 "&a<b", /* Dummy rule. */ 6675 }; 6676 6677 const OneTestCase defaultTests[] = { 6678 { {0x0110}, {0x00F0}, UCOL_LESS}, 6679 { {0x00a3}, {0x00a5}, UCOL_LESS}, 6680 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS}, 6681 }; 6682 6683 /* European Ordering rules: ignore currency characters. */ 6684 const char* eorRules[] = { 6685 "[import root-u-co-eor]", 6686 }; 6687 6688 const OneTestCase eorTests[] = { 6689 { {0x0110}, {0x00F0}, UCOL_LESS}, 6690 { {0x00a3}, {0x00a5}, UCOL_EQUAL}, 6691 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL}, 6692 }; 6693 6694 const char* fiStdRules[] = { 6695 "[import fi-u-co-standard]", 6696 }; 6697 6698 const OneTestCase fiStdTests[] = { 6699 { {0x0110}, {0x00F0}, UCOL_GREATER}, 6700 { {0x00a3}, {0x00a5}, UCOL_LESS}, 6701 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS}, 6702 }; 6703 6704 /* Both European Ordering Rules and Fi Standard Rules. */ 6705 const char* eorFiStdRules[] = { 6706 "[import root-u-co-eor][import fi-u-co-standard]", 6707 }; 6708 6709 /* This is essentially same as the one before once fi.txt is updated with import. */ 6710 const char* fiEorRules[] = { 6711 "[import fi-u-co-eor]", 6712 }; 6713 6714 const OneTestCase fiEorTests[] = { 6715 { {0x0110}, {0x00F0}, UCOL_GREATER}, 6716 { {0x00a3}, {0x00a5}, UCOL_EQUAL}, 6717 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL}, 6718 }; 6719 6720 doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules)); 6721 doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules)); 6722 doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules)); 6723 doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules)); 6724 6725 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule: 6726 eor{ 6727 Sequence{ 6728 "[import root-u-co-eor][import fi-u-co-standard]" 6729 } 6730 Version{"21.0"} 6731 } 6732 */ 6733 /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */ 6734 6735 } 6736 #endif 6737 6738 #if 0 6739 /* 6740 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless 6741 * the resource files are built with -includeUnihanColl option. 6742 * TODO: Uncomment this function and make it work when unihan rules are built by default. 6743 */ 6744 static void TestImportRulesCJKWithUnihan(void) 6745 { 6746 /* DUCET. */ 6747 const char* defaultRules[] = { 6748 "&a<b", /* Dummy rule. */ 6749 }; 6750 6751 const OneTestCase defaultTests[] = { 6752 { {0x3402}, {0x4e1e}, UCOL_GREATER}, 6753 }; 6754 6755 /* European Ordering rules: ignore currency characters. */ 6756 const char* unihanRules[] = { 6757 "[import ko-u-co-unihan]", 6758 }; 6759 6760 const OneTestCase unihanTests[] = { 6761 { {0x3402}, {0x4e1e}, UCOL_LESS}, 6762 }; 6763 6764 doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules)); 6765 doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules)); 6766 6767 } 6768 #endif 6769 6770 static void TestImport(void) 6771 { 6772 UCollator* vicoll; 6773 UCollator* escoll; 6774 UCollator* viescoll; 6775 UCollator* importviescoll; 6776 UParseError error; 6777 UErrorCode status = U_ZERO_ERROR; 6778 UChar* virules; 6779 int32_t viruleslength; 6780 UChar* esrules; 6781 int32_t esruleslength; 6782 UChar* viesrules; 6783 int32_t viesruleslength; 6784 char srules[500] = "[import vi][import es]"; 6785 UChar rules[500]; 6786 uint32_t length = 0; 6787 int32_t itemCount; 6788 int32_t i, k; 6789 UChar32 start; 6790 UChar32 end; 6791 UChar str[500]; 6792 int32_t strLength; 6793 6794 uint8_t sk1[500]; 6795 uint8_t sk2[500]; 6796 6797 UBool b; 6798 USet* tailoredSet; 6799 USet* importTailoredSet; 6800 6801 6802 vicoll = ucol_open("vi", &status); 6803 if(U_FAILURE(status)){ 6804 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status)); 6805 return; 6806 } 6807 6808 virules = (UChar*) ucol_getRules(vicoll, &viruleslength); 6809 escoll = ucol_open("es", &status); 6810 esrules = (UChar*) ucol_getRules(escoll, &esruleslength); 6811 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*)); 6812 viesrules[0] = 0; 6813 u_strcat(viesrules, virules); 6814 u_strcat(viesrules, esrules); 6815 viesruleslength = viruleslength + esruleslength; 6816 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status); 6817 6818 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */ 6819 length = u_unescape(srules, rules, 500); 6820 importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status); 6821 if(U_FAILURE(status)){ 6822 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 6823 return; 6824 } 6825 6826 tailoredSet = ucol_getTailoredSet(viescoll, &status); 6827 importTailoredSet = ucol_getTailoredSet(importviescoll, &status); 6828 6829 if(!uset_equals(tailoredSet, importTailoredSet)){ 6830 log_err("Tailored sets not equal"); 6831 } 6832 6833 uset_close(importTailoredSet); 6834 6835 itemCount = uset_getItemCount(tailoredSet); 6836 6837 for( i = 0; i < itemCount; i++){ 6838 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status); 6839 if(strLength < 2){ 6840 for (; start <= end; start++){ 6841 k = 0; 6842 U16_APPEND(str, k, 500, start, b); 6843 ucol_getSortKey(viescoll, str, 1, sk1, 500); 6844 ucol_getSortKey(importviescoll, str, 1, sk2, 500); 6845 if(compare_uint8_t_arrays(sk1, sk2) != 0){ 6846 log_err("Sort key for %s not equal\n", str); 6847 break; 6848 } 6849 } 6850 }else{ 6851 ucol_getSortKey(viescoll, str, strLength, sk1, 500); 6852 ucol_getSortKey(importviescoll, str, strLength, sk2, 500); 6853 if(compare_uint8_t_arrays(sk1, sk2) != 0){ 6854 log_err("ZZSort key for %s not equal\n", str); 6855 break; 6856 } 6857 6858 } 6859 } 6860 6861 uset_close(tailoredSet); 6862 6863 uprv_free(viesrules); 6864 6865 ucol_close(vicoll); 6866 ucol_close(escoll); 6867 ucol_close(viescoll); 6868 ucol_close(importviescoll); 6869 } 6870 6871 static void TestImportWithType(void) 6872 { 6873 UCollator* vicoll; 6874 UCollator* decoll; 6875 UCollator* videcoll; 6876 UCollator* importvidecoll; 6877 UParseError error; 6878 UErrorCode status = U_ZERO_ERROR; 6879 const UChar* virules; 6880 int32_t viruleslength; 6881 const UChar* derules; 6882 int32_t deruleslength; 6883 UChar* viderules; 6884 int32_t videruleslength; 6885 const char srules[500] = "[import vi][import de-u-co-phonebk]"; 6886 UChar rules[500]; 6887 uint32_t length = 0; 6888 int32_t itemCount; 6889 int32_t i, k; 6890 UChar32 start; 6891 UChar32 end; 6892 UChar str[500]; 6893 int32_t strLength; 6894 6895 uint8_t sk1[500]; 6896 uint8_t sk2[500]; 6897 6898 USet* tailoredSet; 6899 USet* importTailoredSet; 6900 6901 vicoll = ucol_open("vi", &status); 6902 if(U_FAILURE(status)){ 6903 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 6904 return; 6905 } 6906 virules = ucol_getRules(vicoll, &viruleslength); 6907 /* decoll = ucol_open("de@collation=phonebook", &status); */ 6908 decoll = ucol_open("de-u-co-phonebk", &status); 6909 if(U_FAILURE(status)){ 6910 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 6911 return; 6912 } 6913 6914 6915 derules = ucol_getRules(decoll, &deruleslength); 6916 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*)); 6917 viderules[0] = 0; 6918 u_strcat(viderules, virules); 6919 u_strcat(viderules, derules); 6920 videruleslength = viruleslength + deruleslength; 6921 videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status); 6922 6923 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */ 6924 length = u_unescape(srules, rules, 500); 6925 importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status); 6926 if(U_FAILURE(status)){ 6927 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 6928 return; 6929 } 6930 6931 tailoredSet = ucol_getTailoredSet(videcoll, &status); 6932 importTailoredSet = ucol_getTailoredSet(importvidecoll, &status); 6933 6934 if(!uset_equals(tailoredSet, importTailoredSet)){ 6935 log_err("Tailored sets not equal"); 6936 } 6937 6938 uset_close(importTailoredSet); 6939 6940 itemCount = uset_getItemCount(tailoredSet); 6941 6942 for( i = 0; i < itemCount; i++){ 6943 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status); 6944 if(strLength < 2){ 6945 for (; start <= end; start++){ 6946 k = 0; 6947 U16_APPEND_UNSAFE(str, k, start); 6948 ucol_getSortKey(videcoll, str, 1, sk1, 500); 6949 ucol_getSortKey(importvidecoll, str, 1, sk2, 500); 6950 if(compare_uint8_t_arrays(sk1, sk2) != 0){ 6951 log_err("Sort key for %s not equal\n", str); 6952 break; 6953 } 6954 } 6955 }else{ 6956 ucol_getSortKey(videcoll, str, strLength, sk1, 500); 6957 ucol_getSortKey(importvidecoll, str, strLength, sk2, 500); 6958 if(compare_uint8_t_arrays(sk1, sk2) != 0){ 6959 log_err("Sort key for %s not equal\n", str); 6960 break; 6961 } 6962 6963 } 6964 } 6965 6966 uset_close(tailoredSet); 6967 6968 uprv_free(viderules); 6969 6970 ucol_close(videcoll); 6971 ucol_close(importvidecoll); 6972 ucol_close(vicoll); 6973 ucol_close(decoll); 6974 } 6975 6976 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */ 6977 static const UChar longUpperStr1[]= { /* 155 chars */ 6978 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 6979 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52, 6980 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E, 6981 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C, 6982 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E, 6983 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20, 6984 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45, 6985 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32, 6986 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62, 6987 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61 6988 }; 6989 6990 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */ 6991 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */ 6992 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20, 6993 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20, 6994 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20, 6995 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20, 6996 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20 6997 }; 6998 6999 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */ 7000 static const UChar longUpperStr3[]= { /* 324 chars */ 7001 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 7002 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 7003 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 7004 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 7005 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 7006 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 7007 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 7008 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 7009 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 7010 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 7011 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 7012 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20 7013 }; 7014 7015 #define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0])) 7016 7017 typedef struct { 7018 const UChar * longUpperStrPtr; 7019 int32_t longUpperStrLen; 7020 } LongUpperStrItem; 7021 7022 /* String pointers must be in reverse collation order of the corresponding strings */ 7023 static const LongUpperStrItem longUpperStrItems[] = { 7024 { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) }, 7025 { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) }, 7026 { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) }, 7027 { NULL, 0 } 7028 }; 7029 7030 enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with collation changes */ 7031 7032 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */ 7033 static void TestCaseLevelBufferOverflow(void) 7034 { 7035 UErrorCode status = U_ZERO_ERROR; 7036 UCollator * ucol = ucol_open("root", &status); 7037 if ( U_SUCCESS(status) ) { 7038 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status); 7039 if ( U_SUCCESS(status) ) { 7040 const LongUpperStrItem * itemPtr; 7041 uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax]; 7042 for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) { 7043 int32_t sortKeyLen; 7044 if (itemPtr > longUpperStrItems) { 7045 uprv_strcpy((char *)sortKeyB, (char *)sortKeyA); 7046 } 7047 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax); 7048 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) { 7049 log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen); 7050 break; 7051 } 7052 if ( itemPtr > longUpperStrItems ) { 7053 int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB); 7054 if (compareResult >= 0) { 7055 log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult); 7056 } 7057 } 7058 } 7059 } else { 7060 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status)); 7061 } 7062 ucol_close(ucol); 7063 } else { 7064 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status)); 7065 } 7066 } 7067 7068 7069 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x) 7070 7071 void addMiscCollTest(TestNode** root) 7072 { 7073 TEST(TestRuleOptions); 7074 TEST(TestBeforePrefixFailure); 7075 TEST(TestContractionClosure); 7076 TEST(TestPrefixCompose); 7077 TEST(TestStrCollIdenticalPrefix); 7078 TEST(TestPrefix); 7079 TEST(TestNewJapanese); 7080 /*TEST(TestLimitations);*/ 7081 TEST(TestNonChars); 7082 TEST(TestExtremeCompression); 7083 TEST(TestSurrogates); 7084 /* BEGIN android-removed 7085 To save space, Android does not include the collation tailoring rules. 7086 We skip the tailing tests for collations. */ 7087 /* TEST(TestVariableTopSetting); */ 7088 /* END android-removed */ 7089 TEST(TestBocsuCoverage); 7090 TEST(TestCyrillicTailoring); 7091 TEST(TestCase); 7092 TEST(IncompleteCntTest); 7093 TEST(BlackBirdTest); 7094 TEST(FunkyATest); 7095 TEST(BillFairmanTest); 7096 TEST(RamsRulesTest); 7097 TEST(IsTailoredTest); 7098 TEST(TestCollations); 7099 TEST(TestChMove); 7100 TEST(TestImplicitTailoring); 7101 TEST(TestFCDProblem); 7102 TEST(TestEmptyRule); 7103 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */ 7104 TEST(TestJ815); 7105 /*TEST(TestJ831);*/ /* we changed lv locale */ 7106 TEST(TestBefore); 7107 TEST(TestRedundantRules); 7108 TEST(TestExpansionSyntax); 7109 TEST(TestHangulTailoring); 7110 TEST(TestUCARules); 7111 TEST(TestIncrementalNormalize); 7112 TEST(TestComposeDecompose); 7113 TEST(TestCompressOverlap); 7114 TEST(TestContraction); 7115 TEST(TestExpansion); 7116 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */ 7117 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */ 7118 TEST(TestOptimize); 7119 TEST(TestSuppressContractions); 7120 TEST(Alexis2); 7121 TEST(TestHebrewUCA); 7122 TEST(TestPartialSortKeyTermination); 7123 TEST(TestSettings); 7124 TEST(TestEquals); 7125 TEST(TestJ2726); 7126 TEST(NullRule); 7127 TEST(TestNumericCollation); 7128 TEST(TestTibetanConformance); 7129 TEST(TestPinyinProblem); 7130 TEST(TestImplicitGeneration); 7131 TEST(TestSeparateTrees); 7132 TEST(TestBeforePinyin); 7133 TEST(TestBeforeTightening); 7134 /*TEST(TestMoreBefore);*/ 7135 TEST(TestTailorNULL); 7136 TEST(TestUpperFirstQuaternary); 7137 TEST(TestJ4960); 7138 TEST(TestJ5223); 7139 TEST(TestJ5232); 7140 TEST(TestJ5367); 7141 TEST(TestHiragana); 7142 TEST(TestSortKeyConsistency); 7143 TEST(TestVI5913); /* VI, RO tailored rules */ 7144 TEST(TestCroatianSortKey); 7145 TEST(TestTailor6179); 7146 TEST(TestUCAPrecontext); 7147 TEST(TestOutOfBuffer5468); 7148 TEST(TestSameStrengthList); 7149 7150 TEST(TestSameStrengthListQuoted); 7151 TEST(TestSameStrengthListSupplemental); 7152 TEST(TestSameStrengthListQwerty); 7153 TEST(TestSameStrengthListQuotedQwerty); 7154 TEST(TestSameStrengthListRanges); 7155 TEST(TestSameStrengthListSupplementalRanges); 7156 TEST(TestSpecialCharacters); 7157 TEST(TestPrivateUseCharacters); 7158 TEST(TestPrivateUseCharactersInList); 7159 TEST(TestPrivateUseCharactersInRange); 7160 TEST(TestInvalidListsAndRanges); 7161 TEST(TestImportRulesDeWithPhonebook); 7162 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */ 7163 /* TEST(TestImportRulesCJKWithUnihan); */ 7164 /* BEGIN android-removed: Due to Android does not include reverse UCA table. 7165 TEST(TestImport); 7166 TEST(TestImportWithType); 7167 END android-removed */ 7168 7169 TEST(TestBeforeRuleWithScriptReordering); 7170 TEST(TestNonLeadBytesDuringCollationReordering); 7171 TEST(TestReorderingAPI); 7172 TEST(TestReorderingAPIWithRuleCreatedCollator); 7173 TEST(TestEquivalentReorderingScripts); 7174 TEST(TestGreekFirstReorder); 7175 TEST(TestGreekLastReorder); 7176 TEST(TestNonScriptReorder); 7177 TEST(TestHaniReorder); 7178 TEST(TestHaniReorderWithOtherRules); 7179 TEST(TestMultipleReorder); 7180 TEST(TestReorderingAcrossCloning); 7181 TEST(TestReorderWithNumericCollation); 7182 7183 TEST(TestCaseLevelBufferOverflow); 7184 } 7185 7186 #endif /* #if !UCONFIG_NO_COLLATION */ 7187