1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 2001-2009, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************* 7 * 8 * File cmsccoll.C 9 * 10 *******************************************************************************/ 11 /** 12 * These are the tests specific to ICU 1.8 and above, that I didn't know where 13 * to fit. 14 */ 15 16 #include <stdio.h> 17 18 #include "unicode/utypes.h" 19 20 #if !UCONFIG_NO_COLLATION 21 22 #include "unicode/ucol.h" 23 #include "unicode/ucoleitr.h" 24 #include "unicode/uloc.h" 25 #include "cintltst.h" 26 #include "ccolltst.h" 27 #include "callcoll.h" 28 #include "unicode/ustring.h" 29 #include "string.h" 30 #include "ucol_imp.h" 31 #include "ucol_tok.h" 32 #include "cmemory.h" 33 #include "cstring.h" 34 #include "uassert.h" 35 #include "unicode/parseerr.h" 36 #include "unicode/ucnv.h" 37 #include "unicode/ures.h" 38 #include "uparse.h" 39 #include "putilimp.h" 40 41 42 #define LEN(a) (sizeof(a)/sizeof(a[0])) 43 44 #define MAX_TOKEN_LEN 16 45 46 typedef UCollationResult tst_strcoll(void *collator, const int object, 47 const UChar *source, const int sLen, 48 const UChar *target, const int tLen); 49 50 51 52 const static char cnt1[][10] = { 53 54 "AA", 55 "AC", 56 "AZ", 57 "AQ", 58 "AB", 59 "ABZ", 60 "ABQ", 61 "Z", 62 "ABC", 63 "Q", 64 "B" 65 }; 66 67 const static char cnt2[][10] = { 68 "DA", 69 "DAD", 70 "DAZ", 71 "MAR", 72 "Z", 73 "DAVIS", 74 "MARK", 75 "DAV", 76 "DAVI" 77 }; 78 79 static void IncompleteCntTest(void) 80 { 81 UErrorCode status = U_ZERO_ERROR; 82 UChar temp[90]; 83 UChar t1[90]; 84 UChar t2[90]; 85 86 UCollator *coll = NULL; 87 uint32_t i = 0, j = 0; 88 uint32_t size = 0; 89 90 u_uastrcpy(temp, " & Z < ABC < Q < B"); 91 92 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status); 93 94 if(U_SUCCESS(status)) { 95 size = sizeof(cnt1)/sizeof(cnt1[0]); 96 for(i = 0; i < size-1; i++) { 97 for(j = i+1; j < size; j++) { 98 UCollationElements *iter; 99 u_uastrcpy(t1, cnt1[i]); 100 u_uastrcpy(t2, cnt1[j]); 101 doTest(coll, t1, t2, UCOL_LESS); 102 /* synwee : added collation element iterator test */ 103 iter = ucol_openElements(coll, t2, u_strlen(t2), &status); 104 if (U_FAILURE(status)) { 105 log_err("Creation of iterator failed\n"); 106 break; 107 } 108 backAndForth(iter); 109 ucol_closeElements(iter); 110 } 111 } 112 } 113 114 ucol_close(coll); 115 116 117 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV"); 118 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); 119 120 if(U_SUCCESS(status)) { 121 size = sizeof(cnt2)/sizeof(cnt2[0]); 122 for(i = 0; i < size-1; i++) { 123 for(j = i+1; j < size; j++) { 124 UCollationElements *iter; 125 u_uastrcpy(t1, cnt2[i]); 126 u_uastrcpy(t2, cnt2[j]); 127 doTest(coll, t1, t2, UCOL_LESS); 128 129 /* synwee : added collation element iterator test */ 130 iter = ucol_openElements(coll, t2, u_strlen(t2), &status); 131 if (U_FAILURE(status)) { 132 log_err("Creation of iterator failed\n"); 133 break; 134 } 135 backAndForth(iter); 136 ucol_closeElements(iter); 137 } 138 } 139 } 140 141 ucol_close(coll); 142 143 144 } 145 146 const static char shifted[][20] = { 147 "black bird", 148 "black-bird", 149 "blackbird", 150 "black Bird", 151 "black-Bird", 152 "blackBird", 153 "black birds", 154 "black-birds", 155 "blackbirds" 156 }; 157 158 const static UCollationResult shiftedTert[] = { 159 UCOL_EQUAL, 160 UCOL_EQUAL, 161 UCOL_EQUAL, 162 UCOL_LESS, 163 UCOL_EQUAL, 164 UCOL_EQUAL, 165 UCOL_LESS, 166 UCOL_EQUAL, 167 UCOL_EQUAL 168 }; 169 170 const static char nonignorable[][20] = { 171 "black bird", 172 "black Bird", 173 "black birds", 174 "black-bird", 175 "black-Bird", 176 "black-birds", 177 "blackbird", 178 "blackBird", 179 "blackbirds" 180 }; 181 182 static void BlackBirdTest(void) { 183 UErrorCode status = U_ZERO_ERROR; 184 UChar t1[90]; 185 UChar t2[90]; 186 187 uint32_t i = 0, j = 0; 188 uint32_t size = 0; 189 UCollator *coll = ucol_open("en_US", &status); 190 191 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status); 192 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status); 193 194 if(U_SUCCESS(status)) { 195 size = sizeof(nonignorable)/sizeof(nonignorable[0]); 196 for(i = 0; i < size-1; i++) { 197 for(j = i+1; j < size; j++) { 198 u_uastrcpy(t1, nonignorable[i]); 199 u_uastrcpy(t2, nonignorable[j]); 200 doTest(coll, t1, t2, UCOL_LESS); 201 } 202 } 203 } 204 205 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 206 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status); 207 208 if(U_SUCCESS(status)) { 209 size = sizeof(shifted)/sizeof(shifted[0]); 210 for(i = 0; i < size-1; i++) { 211 for(j = i+1; j < size; j++) { 212 u_uastrcpy(t1, shifted[i]); 213 u_uastrcpy(t2, shifted[j]); 214 doTest(coll, t1, t2, UCOL_LESS); 215 } 216 } 217 } 218 219 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status); 220 if(U_SUCCESS(status)) { 221 size = sizeof(shifted)/sizeof(shifted[0]); 222 for(i = 1; i < size; i++) { 223 u_uastrcpy(t1, shifted[i-1]); 224 u_uastrcpy(t2, shifted[i]); 225 doTest(coll, t1, t2, shiftedTert[i]); 226 } 227 } 228 229 ucol_close(coll); 230 } 231 232 const static UChar testSourceCases[][MAX_TOKEN_LEN] = { 233 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000}, 234 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000}, 235 {0x0041/*'A'*/, 0x0300, 0x0000}, 236 {0x00C0, 0x0301, 0x0000}, 237 /* this would work with forced normalization */ 238 {0x00C0, 0x0316, 0x0000} 239 }; 240 241 const static UChar testTargetCases[][MAX_TOKEN_LEN] = { 242 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000}, 243 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}, 244 {0x00C0, 0}, 245 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000}, 246 /* this would work with forced normalization */ 247 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000} 248 }; 249 250 const static UCollationResult results[] = { 251 UCOL_GREATER, 252 UCOL_EQUAL, 253 UCOL_EQUAL, 254 UCOL_GREATER, 255 UCOL_EQUAL 256 }; 257 258 static void FunkyATest(void) 259 { 260 261 int32_t i; 262 UErrorCode status = U_ZERO_ERROR; 263 UCollator *myCollation; 264 myCollation = ucol_open("en_US", &status); 265 if(U_FAILURE(status)){ 266 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 267 return; 268 } 269 log_verbose("Testing some A letters, for some reason\n"); 270 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 271 ucol_setStrength(myCollation, UCOL_TERTIARY); 272 for (i = 0; i < 4 ; i++) 273 { 274 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); 275 } 276 ucol_close(myCollation); 277 } 278 279 UColAttributeValue caseFirst[] = { 280 UCOL_OFF, 281 UCOL_LOWER_FIRST, 282 UCOL_UPPER_FIRST 283 }; 284 285 286 UColAttributeValue alternateHandling[] = { 287 UCOL_NON_IGNORABLE, 288 UCOL_SHIFTED 289 }; 290 291 UColAttributeValue caseLevel[] = { 292 UCOL_OFF, 293 UCOL_ON 294 }; 295 296 UColAttributeValue strengths[] = { 297 UCOL_PRIMARY, 298 UCOL_SECONDARY, 299 UCOL_TERTIARY, 300 UCOL_QUATERNARY, 301 UCOL_IDENTICAL 302 }; 303 304 #if 0 305 static const char * strengthsC[] = { 306 "UCOL_PRIMARY", 307 "UCOL_SECONDARY", 308 "UCOL_TERTIARY", 309 "UCOL_QUATERNARY", 310 "UCOL_IDENTICAL" 311 }; 312 313 static const char * caseFirstC[] = { 314 "UCOL_OFF", 315 "UCOL_LOWER_FIRST", 316 "UCOL_UPPER_FIRST" 317 }; 318 319 320 static const char * alternateHandlingC[] = { 321 "UCOL_NON_IGNORABLE", 322 "UCOL_SHIFTED" 323 }; 324 325 static const char * caseLevelC[] = { 326 "UCOL_OFF", 327 "UCOL_ON" 328 }; 329 330 /* not used currently - does not test only prints */ 331 static void PrintMarkDavis(void) 332 { 333 UErrorCode status = U_ZERO_ERROR; 334 UChar m[256]; 335 uint8_t sortkey[256]; 336 UCollator *coll = ucol_open("en_US", &status); 337 uint32_t h,i,j,k, sortkeysize; 338 uint32_t sizem = 0; 339 char buffer[512]; 340 uint32_t len = 512; 341 342 log_verbose("PrintMarkDavis"); 343 344 u_uastrcpy(m, "Mark Davis"); 345 sizem = u_strlen(m); 346 347 348 m[1] = 0xe4; 349 350 for(i = 0; i<sizem; i++) { 351 fprintf(stderr, "\\u%04X ", m[i]); 352 } 353 fprintf(stderr, "\n"); 354 355 for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) { 356 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status); 357 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]); 358 359 for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) { 360 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status); 361 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]); 362 363 for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) { 364 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status); 365 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]); 366 367 for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) { 368 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status); 369 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256); 370 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]); 371 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len)); 372 } 373 374 } 375 376 } 377 378 } 379 } 380 #endif 381 382 static void BillFairmanTest(void) { 383 /* 384 ** check for actual locale via ICU resource bundles 385 ** 386 ** lp points to the original locale ("fr_FR_....") 387 */ 388 389 UResourceBundle *lr,*cr; 390 UErrorCode lec = U_ZERO_ERROR; 391 const char *lp = "fr_FR_you_ll_never_find_this_locale"; 392 393 log_verbose("BillFairmanTest\n"); 394 395 lr = ures_open(NULL,lp,&lec); 396 if (lr) { 397 cr = ures_getByKey(lr,"collations",0,&lec); 398 if (cr) { 399 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec); 400 if (lp) { 401 if (U_SUCCESS(lec)) { 402 if(strcmp(lp, "fr") != 0) { 403 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp); 404 } 405 } 406 } 407 ures_close(cr); 408 } 409 ures_close(lr); 410 } 411 } 412 413 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){ 414 UChar source[256] = { '\0'}; 415 UChar target[256] = { '\0'}; 416 UChar preP = 0x31a3; 417 UChar preQ = 0x310d; 418 /* 419 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491; 420 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413; 421 */ 422 /*log_verbose("Testing primary\n");*/ 423 424 doTest(col, p, q, UCOL_LESS); 425 /* 426 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q)); 427 428 if(result!=UCOL_LESS){ 429 aescstrdup(p,utfSource,256); 430 aescstrdup(q,utfTarget,256); 431 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget); 432 } 433 */ 434 source[0] = preP; 435 u_strcpy(source+1,p); 436 target[0] = preQ; 437 u_strcpy(target+1,q); 438 doTest(col, source, target, UCOL_LESS); 439 /* 440 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget); 441 */ 442 } 443 444 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){ 445 UChar source[256] = { '\0'}; 446 UChar target[256] = { '\0'}; 447 448 /*log_verbose("Testing secondary\n");*/ 449 450 doTest(col, p, q, UCOL_LESS); 451 /* 452 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget); 453 */ 454 source[0] = 0x0053; 455 u_strcpy(source+1,p); 456 target[0]= 0x0073; 457 u_strcpy(target+1,q); 458 459 doTest(col, source, target, UCOL_LESS); 460 /* 461 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget); 462 */ 463 464 465 u_strcpy(source,p); 466 source[u_strlen(p)] = 0x62; 467 source[u_strlen(p)+1] = 0; 468 469 470 u_strcpy(target,q); 471 target[u_strlen(q)] = 0x61; 472 target[u_strlen(q)+1] = 0; 473 474 doTest(col, source, target, UCOL_GREATER); 475 476 /* 477 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget); 478 */ 479 } 480 481 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){ 482 UChar source[256] = { '\0'}; 483 UChar target[256] = { '\0'}; 484 485 /*log_verbose("Testing tertiary\n");*/ 486 487 doTest(col, p, q, UCOL_LESS); 488 /* 489 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget); 490 */ 491 source[0] = 0x0020; 492 u_strcpy(source+1,p); 493 target[0]= 0x002D; 494 u_strcpy(target+1,q); 495 496 doTest(col, source, target, UCOL_LESS); 497 /* 498 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget); 499 */ 500 501 u_strcpy(source,p); 502 source[u_strlen(p)] = 0xE0; 503 source[u_strlen(p)+1] = 0; 504 505 u_strcpy(target,q); 506 target[u_strlen(q)] = 0x61; 507 target[u_strlen(q)+1] = 0; 508 509 doTest(col, source, target, UCOL_GREATER); 510 511 /* 512 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget); 513 */ 514 } 515 516 static void testEquality(UCollator* col, const UChar* p,const UChar* q){ 517 /* 518 UChar source[256] = { '\0'}; 519 UChar target[256] = { '\0'}; 520 */ 521 522 doTest(col, p, q, UCOL_EQUAL); 523 /* 524 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget); 525 */ 526 } 527 528 static void testCollator(UCollator *coll, UErrorCode *status) { 529 const UChar *rules = NULL, *current = NULL; 530 int32_t ruleLen = 0; 531 uint32_t strength = 0; 532 uint32_t chOffset = 0; uint32_t chLen = 0; 533 uint32_t exOffset = 0; uint32_t exLen = 0; 534 uint32_t prefixOffset = 0; uint32_t prefixLen = 0; 535 uint32_t firstEx = 0; 536 /* uint32_t rExpsLen = 0; */ 537 uint32_t firstLen = 0; 538 UBool varT = FALSE; UBool top_ = TRUE; 539 uint16_t specs = 0; 540 UBool startOfRules = TRUE; 541 UBool lastReset = FALSE; 542 UBool before = FALSE; 543 uint32_t beforeStrength = 0; 544 UColTokenParser src; 545 UColOptionSet opts; 546 547 UChar first[256]; 548 UChar second[256]; 549 UChar tempB[256]; 550 uint32_t tempLen; 551 UChar *rulesCopy = NULL; 552 UParseError parseError; 553 554 src.opts = &opts; 555 556 rules = ucol_getRules(coll, &ruleLen); 557 if(U_SUCCESS(*status) && ruleLen > 0) { 558 rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); 559 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); 560 src.current = src.source = rulesCopy; 561 src.end = rulesCopy+ruleLen; 562 src.extraCurrent = src.end; 563 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 564 *first = *second = 0; 565 566 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) { 567 strength = src.parsedToken.strength; 568 chOffset = src.parsedToken.charsOffset; 569 chLen = src.parsedToken.charsLen; 570 exOffset = src.parsedToken.extensionOffset; 571 exLen = src.parsedToken.extensionLen; 572 prefixOffset = src.parsedToken.prefixOffset; 573 prefixLen = src.parsedToken.prefixLen; 574 specs = src.parsedToken.flags; 575 576 startOfRules = FALSE; 577 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); 578 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); 579 if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */ 580 second[0] = 0; 581 } else { 582 u_strncpy(second,rulesCopy+chOffset, chLen); 583 second[chLen] = 0; 584 585 if(exLen > 0 && firstEx == 0) { 586 u_strncat(first, rulesCopy+exOffset, exLen); 587 first[firstLen+exLen] = 0; 588 } 589 590 if(lastReset == TRUE && prefixLen != 0) { 591 u_strncpy(first+prefixLen, first, firstLen); 592 u_strncpy(first, rulesCopy+prefixOffset, prefixLen); 593 first[firstLen+prefixLen] = 0; 594 firstLen = firstLen+prefixLen; 595 } 596 597 if(before == TRUE) { /* swap first and second */ 598 u_strcpy(tempB, first); 599 u_strcpy(first, second); 600 u_strcpy(second, tempB); 601 602 tempLen = firstLen; 603 firstLen = chLen; 604 chLen = tempLen; 605 606 tempLen = firstEx; 607 firstEx = exLen; 608 exLen = tempLen; 609 if(beforeStrength < strength) { 610 strength = beforeStrength; 611 } 612 } 613 } 614 lastReset = FALSE; 615 616 switch(strength){ 617 case UCOL_IDENTICAL: 618 testEquality(coll,first,second); 619 break; 620 case UCOL_PRIMARY: 621 testPrimary(coll,first,second); 622 break; 623 case UCOL_SECONDARY: 624 testSecondary(coll,first,second); 625 break; 626 case UCOL_TERTIARY: 627 testTertiary(coll,first,second); 628 break; 629 case UCOL_TOK_RESET: 630 lastReset = TRUE; 631 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0); 632 if(before) { 633 beforeStrength = (specs & UCOL_TOK_BEFORE)-1; 634 } 635 break; 636 default: 637 break; 638 } 639 640 if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */ 641 before = FALSE; 642 } else { 643 firstLen = chLen; 644 firstEx = exLen; 645 u_strcpy(first, second); 646 } 647 } 648 free(rulesCopy); 649 } 650 } 651 652 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) { 653 UCollator *UCA = (UCollator *)collator; 654 return ucol_strcoll(UCA, source, sLen, target, tLen); 655 } 656 657 /* 658 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) { 659 #ifdef U_WINDOWS 660 LCID lcid = (LCID)collator; 661 return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen); 662 #else 663 return 0; 664 #endif 665 } 666 */ 667 668 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts, 669 UChar s1, UChar s2, 670 const UChar *s, const uint32_t sLen, 671 const UChar *t, const uint32_t tLen) { 672 UChar source[256] = {0}; 673 UChar target[256] = {0}; 674 675 source[0] = s1; 676 u_strcpy(source+1, s); 677 target[0] = s2; 678 u_strcpy(target+1, t); 679 680 return func(collator, opts, source, sLen+1, target, tLen+1); 681 } 682 683 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts, 684 UChar s1, UChar s2, 685 const UChar *s, const uint32_t sLen, 686 const UChar *t, const uint32_t tLen) { 687 UChar source[256] = {0}; 688 UChar target[256] = {0}; 689 690 u_strcpy(source, s); 691 source[sLen] = s1; 692 u_strcpy(target, t); 693 target[tLen] = s2; 694 695 return func(collator, opts, source, sLen+1, target, tLen+1); 696 } 697 698 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts, 699 const UChar *s, const uint32_t sLen, 700 const UChar *t, const uint32_t tLen, 701 UCollationResult result) { 702 /*UChar fPrimary = 0x6d;*/ 703 /*UChar sPrimary = 0x6e;*/ 704 UChar fSecondary = 0x310d; 705 UChar sSecondary = 0x31a3; 706 UChar fTertiary = 0x310f; 707 UChar sTertiary = 0x31b7; 708 709 UCollationResult oposite; 710 if(result == UCOL_EQUAL) { 711 return UCOL_IDENTICAL; 712 } else if(result == UCOL_GREATER) { 713 oposite = UCOL_LESS; 714 } else { 715 oposite = UCOL_GREATER; 716 } 717 718 if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) { 719 return UCOL_PRIMARY; 720 } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) && 721 (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) { 722 return UCOL_SECONDARY; 723 } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) && 724 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) { 725 return UCOL_TERTIARY; 726 } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) && 727 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) { 728 return UCOL_QUATERNARY; 729 } else { 730 return UCOL_IDENTICAL; 731 } 732 } 733 734 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) { 735 uint32_t i = 0; 736 737 if(res == UCOL_EQUAL || strength == 0xdeadbeef) { 738 buffer[0] = '='; 739 buffer[1] = '='; 740 buffer[2] = '\0'; 741 } else if(res == UCOL_GREATER) { 742 for(i = 0; i<strength+1; i++) { 743 buffer[i] = '>'; 744 } 745 buffer[strength+1] = '\0'; 746 } else { 747 for(i = 0; i<strength+1; i++) { 748 buffer[i] = '<'; 749 } 750 buffer[strength+1] = '\0'; 751 } 752 753 return buffer; 754 } 755 756 757 758 static void logFailure (const char *platform, const char *test, 759 const UChar *source, const uint32_t sLen, 760 const UChar *target, const uint32_t tLen, 761 UCollationResult realRes, uint32_t realStrength, 762 UCollationResult expRes, uint32_t expStrength, UBool error) { 763 764 uint32_t i = 0; 765 766 char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256]; 767 static int32_t maxOutputLength = 0; 768 int32_t outputLength; 769 770 *sEsc = *tEsc = *s = *t = 0; 771 if(error == TRUE) { 772 log_err("Difference between expected and generated order. Run test with -v for more info\n"); 773 } else if(VERBOSITY == 0) { 774 return; 775 } 776 for(i = 0; i<sLen; i++) { 777 sprintf(b, "%04X", source[i]); 778 strcat(sEsc, "\\u"); 779 strcat(sEsc, b); 780 strcat(s, b); 781 strcat(s, " "); 782 if(source[i] < 0x80) { 783 sprintf(b, "(%c)", source[i]); 784 strcat(sEsc, b); 785 } 786 } 787 for(i = 0; i<tLen; i++) { 788 sprintf(b, "%04X", target[i]); 789 strcat(tEsc, "\\u"); 790 strcat(tEsc, b); 791 strcat(t, b); 792 strcat(t, " "); 793 if(target[i] < 0x80) { 794 sprintf(b, "(%c)", target[i]); 795 strcat(tEsc, b); 796 } 797 } 798 /* 799 strcpy(output, "[[ "); 800 strcat(output, sEsc); 801 strcat(output, getRelationSymbol(expRes, expStrength, relation)); 802 strcat(output, tEsc); 803 804 strcat(output, " : "); 805 806 strcat(output, sEsc); 807 strcat(output, getRelationSymbol(realRes, realStrength, relation)); 808 strcat(output, tEsc); 809 strcat(output, " ]] "); 810 811 log_verbose("%s", output); 812 */ 813 814 815 strcpy(output, "DIFF: "); 816 817 strcat(output, s); 818 strcat(output, " : "); 819 strcat(output, t); 820 821 strcat(output, test); 822 strcat(output, ": "); 823 824 strcat(output, sEsc); 825 strcat(output, getRelationSymbol(expRes, expStrength, relation)); 826 strcat(output, tEsc); 827 828 strcat(output, " "); 829 830 strcat(output, platform); 831 strcat(output, ": "); 832 833 strcat(output, sEsc); 834 strcat(output, getRelationSymbol(realRes, realStrength, relation)); 835 strcat(output, tEsc); 836 837 outputLength = (int32_t)strlen(output); 838 if(outputLength > maxOutputLength) { 839 maxOutputLength = outputLength; 840 U_ASSERT(outputLength < sizeof(output)); 841 } 842 843 log_verbose("%s\n", output); 844 845 } 846 847 /* 848 static void printOutRules(const UChar *rules) { 849 uint32_t len = u_strlen(rules); 850 uint32_t i = 0; 851 char toPrint; 852 uint32_t line = 0; 853 854 fprintf(stdout, "Rules:"); 855 856 for(i = 0; i<len; i++) { 857 if(rules[i]<0x7f && rules[i]>=0x20) { 858 toPrint = (char)rules[i]; 859 if(toPrint == '&') { 860 line = 1; 861 fprintf(stdout, "\n&"); 862 } else if(toPrint == ';') { 863 fprintf(stdout, "<<"); 864 line+=2; 865 } else if(toPrint == ',') { 866 fprintf(stdout, "<<<"); 867 line+=3; 868 } else { 869 fprintf(stdout, "%c", toPrint); 870 line++; 871 } 872 } else if(rules[i]<0x3400 || rules[i]>=0xa000) { 873 fprintf(stdout, "\\u%04X", rules[i]); 874 line+=6; 875 } 876 if(line>72) { 877 fprintf(stdout, "\n"); 878 line = 0; 879 } 880 } 881 882 log_verbose("\n"); 883 884 } 885 */ 886 887 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) { 888 uint32_t diffs = 0; 889 UCollationResult realResult; 890 uint32_t realStrength; 891 892 uint32_t sLen = u_strlen(first); 893 uint32_t tLen = u_strlen(second); 894 895 realResult = func(collator, opts, first, sLen, second, tLen); 896 realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult); 897 898 if(strength == UCOL_IDENTICAL && realResult != UCOL_IDENTICAL) { 899 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error); 900 diffs++; 901 } else if(realResult != UCOL_LESS || realStrength != strength) { 902 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error); 903 diffs++; 904 } 905 return diffs; 906 } 907 908 909 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) { 910 const UChar *rules = NULL, *current = NULL; 911 int32_t ruleLen = 0; 912 uint32_t strength = 0; 913 uint32_t chOffset = 0; uint32_t chLen = 0; 914 uint32_t exOffset = 0; uint32_t exLen = 0; 915 uint32_t prefixOffset = 0; uint32_t prefixLen = 0; 916 /* uint32_t rExpsLen = 0; */ 917 uint32_t firstLen = 0, secondLen = 0; 918 UBool varT = FALSE; UBool top_ = TRUE; 919 uint16_t specs = 0; 920 UBool startOfRules = TRUE; 921 UColTokenParser src; 922 UColOptionSet opts; 923 924 UChar first[256]; 925 UChar second[256]; 926 UChar *rulesCopy = NULL; 927 928 uint32_t UCAdiff = 0; 929 uint32_t Windiff = 1; 930 UParseError parseError; 931 932 src.opts = &opts; 933 934 rules = ucol_getRules(coll, &ruleLen); 935 936 /*printOutRules(rules);*/ 937 938 if(U_SUCCESS(*status) && ruleLen > 0) { 939 rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); 940 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); 941 src.current = src.source = rulesCopy; 942 src.end = rulesCopy+ruleLen; 943 src.extraCurrent = src.end; 944 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 945 *first = *second = 0; 946 947 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) { 948 strength = src.parsedToken.strength; 949 chOffset = src.parsedToken.charsOffset; 950 chLen = src.parsedToken.charsLen; 951 exOffset = src.parsedToken.extensionOffset; 952 exLen = src.parsedToken.extensionLen; 953 prefixOffset = src.parsedToken.prefixOffset; 954 prefixLen = src.parsedToken.prefixLen; 955 specs = src.parsedToken.flags; 956 957 startOfRules = FALSE; 958 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); 959 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); 960 961 u_strncpy(second,rulesCopy+chOffset, chLen); 962 second[chLen] = 0; 963 secondLen = chLen; 964 965 if(exLen > 0) { 966 u_strncat(first, rulesCopy+exOffset, exLen); 967 first[firstLen+exLen] = 0; 968 firstLen += exLen; 969 } 970 971 if(strength != UCOL_TOK_RESET) { 972 if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) { 973 UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error); 974 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/ 975 } 976 } 977 978 979 firstLen = chLen; 980 u_strcpy(first, second); 981 982 } 983 if(UCAdiff != 0 && Windiff != 0) { 984 log_verbose("\n"); 985 } 986 if(UCAdiff == 0) { 987 log_verbose("No immediate difference with %s!\n", refName); 988 } 989 if(Windiff == 0) { 990 log_verbose("No immediate difference with Win32!\n"); 991 } 992 free(rulesCopy); 993 } 994 } 995 996 /* 997 * Takes two CEs (lead and continuation) and 998 * compares them as CEs should be compared: 999 * primary vs. primary, secondary vs. secondary 1000 * tertiary vs. tertiary 1001 */ 1002 static int32_t compareCEs(uint32_t s1, uint32_t s2, 1003 uint32_t t1, uint32_t t2) { 1004 uint32_t s = 0, t = 0; 1005 if(s1 == t1 && s2 == t2) { 1006 return 0; 1007 } 1008 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16); 1009 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16); 1010 if(s < t) { 1011 return -1; 1012 } else if(s > t) { 1013 return 1; 1014 } else { 1015 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8; 1016 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8; 1017 if(s < t) { 1018 return -1; 1019 } else if(s > t) { 1020 return 1; 1021 } else { 1022 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF); 1023 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF); 1024 if(s < t) { 1025 return -1; 1026 } else { 1027 return 1; 1028 } 1029 } 1030 } 1031 } 1032 1033 typedef struct { 1034 uint32_t startCE; 1035 uint32_t startContCE; 1036 uint32_t limitCE; 1037 uint32_t limitContCE; 1038 } indirectBoundaries; 1039 1040 /* these values are used for finding CE values for indirect positioning. */ 1041 /* Indirect positioning is a mechanism for allowing resets on symbolic */ 1042 /* values. It only works for resets and you cannot tailor indirect names */ 1043 /* An indirect name can define either an anchor point or a range. An */ 1044 /* anchor point behaves in exactly the same way as a code point in reset */ 1045 /* would, except that it cannot be tailored. A range (we currently only */ 1046 /* know for the [top] range will explicitly set the upper bound for */ 1047 /* generated CEs, thus allowing for better control over how many CEs can */ 1048 /* be squeezed between in the range without performance penalty. */ 1049 /* In that respect, we use [top] for tailoring of locales that use CJK */ 1050 /* characters. Other indirect values are currently a pure convenience, */ 1051 /* they can be used to assure that the CEs will be always positioned in */ 1052 /* the same place relative to a point with known properties (e.g. first */ 1053 /* primary ignorable). */ 1054 static indirectBoundaries ucolIndirectBoundaries[15]; 1055 static UBool indirectBoundariesSet = FALSE; 1056 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) { 1057 /* Set values for the top - TODO: once we have values for all the indirects, we are going */ 1058 /* to initalize here. */ 1059 ucolIndirectBoundaries[indexR].startCE = start[0]; 1060 ucolIndirectBoundaries[indexR].startContCE = start[1]; 1061 if(end) { 1062 ucolIndirectBoundaries[indexR].limitCE = end[0]; 1063 ucolIndirectBoundaries[indexR].limitContCE = end[1]; 1064 } else { 1065 ucolIndirectBoundaries[indexR].limitCE = 0; 1066 ucolIndirectBoundaries[indexR].limitContCE = 0; 1067 } 1068 } 1069 1070 static void testCEs(UCollator *coll, UErrorCode *status) { 1071 const UChar *rules = NULL, *current = NULL; 1072 int32_t ruleLen = 0; 1073 1074 uint32_t strength = 0; 1075 uint32_t maxStrength = UCOL_IDENTICAL; 1076 uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE; 1077 uint32_t lastCE; 1078 uint32_t lastContCE; 1079 1080 int32_t result = 0; 1081 uint32_t chOffset = 0; uint32_t chLen = 0; 1082 uint32_t exOffset = 0; uint32_t exLen = 0; 1083 uint32_t prefixOffset = 0; uint32_t prefixLen = 0; 1084 uint32_t oldOffset = 0; 1085 1086 /* uint32_t rExpsLen = 0; */ 1087 /* uint32_t firstLen = 0; */ 1088 uint16_t specs = 0; 1089 UBool varT = FALSE; UBool top_ = TRUE; 1090 UBool startOfRules = TRUE; 1091 UBool before = FALSE; 1092 UColTokenParser src; 1093 UColOptionSet opts; 1094 UParseError parseError; 1095 UChar *rulesCopy = NULL; 1096 collIterate c; 1097 UCAConstants *consts = NULL; 1098 uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */ 1099 UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT; 1100 const char *colLoc; 1101 UCollator *UCA = ucol_open("root", status); 1102 1103 if (U_FAILURE(*status)) { 1104 log_err("Could not open root collator %s\n", u_errorName(*status)); 1105 return; 1106 } 1107 1108 colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status); 1109 if (U_FAILURE(*status)) { 1110 log_err("Could not get collator name: %s\n", u_errorName(*status)); 1111 return; 1112 } 1113 1114 consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts); 1115 UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0]; 1116 /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */ 1117 UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0]; 1118 UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1]; 1119 1120 baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND; 1121 1122 src.opts = &opts; 1123 1124 rules = ucol_getRules(coll, &ruleLen); 1125 1126 src.invUCA = ucol_initInverseUCA(status); 1127 1128 if(indirectBoundariesSet == FALSE) { 1129 /* UCOL_RESET_TOP_VALUE */ 1130 setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT); 1131 /* UCOL_FIRST_PRIMARY_IGNORABLE */ 1132 setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0); 1133 /* UCOL_LAST_PRIMARY_IGNORABLE */ 1134 setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0); 1135 /* UCOL_FIRST_SECONDARY_IGNORABLE */ 1136 setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0); 1137 /* UCOL_LAST_SECONDARY_IGNORABLE */ 1138 setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0); 1139 /* UCOL_FIRST_TERTIARY_IGNORABLE */ 1140 setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0); 1141 /* UCOL_LAST_TERTIARY_IGNORABLE */ 1142 setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0); 1143 /* UCOL_FIRST_VARIABLE */ 1144 setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0); 1145 /* UCOL_LAST_VARIABLE */ 1146 setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0); 1147 /* UCOL_FIRST_NON_VARIABLE */ 1148 setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0); 1149 /* UCOL_LAST_NON_VARIABLE */ 1150 setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT); 1151 /* UCOL_FIRST_IMPLICIT */ 1152 setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0); 1153 /* UCOL_LAST_IMPLICIT */ 1154 setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING); 1155 /* UCOL_FIRST_TRAILING */ 1156 setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0); 1157 /* UCOL_LAST_TRAILING */ 1158 setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0); 1159 ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24); 1160 indirectBoundariesSet = TRUE; 1161 } 1162 1163 1164 if(U_SUCCESS(*status) && ruleLen > 0) { 1165 rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); 1166 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); 1167 src.current = src.source = rulesCopy; 1168 src.end = rulesCopy+ruleLen; 1169 src.extraCurrent = src.end; 1170 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 1171 1172 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) { 1173 strength = src.parsedToken.strength; 1174 chOffset = src.parsedToken.charsOffset; 1175 chLen = src.parsedToken.charsLen; 1176 exOffset = src.parsedToken.extensionOffset; 1177 exLen = src.parsedToken.extensionLen; 1178 prefixOffset = src.parsedToken.prefixOffset; 1179 prefixLen = src.parsedToken.prefixLen; 1180 specs = src.parsedToken.flags; 1181 1182 startOfRules = FALSE; 1183 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); 1184 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); 1185 1186 uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, &c); 1187 1188 currCE = ucol_getNextCE(coll, &c, status); 1189 if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(rulesCopy+chOffset))) { 1190 log_verbose("Thai prevowel detected. Will pick next CE\n"); 1191 currCE = ucol_getNextCE(coll, &c, status); 1192 } 1193 1194 currContCE = ucol_getNextCE(coll, &c, status); 1195 if(!isContinuation(currContCE)) { 1196 currContCE = 0; 1197 } 1198 1199 /* we need to repack CEs here */ 1200 1201 if(strength == UCOL_TOK_RESET) { 1202 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0); 1203 if(top_ == TRUE) { 1204 int32_t index = src.parsedToken.indirectIndex; 1205 1206 nextCE = baseCE = currCE = ucolIndirectBoundaries[index].startCE; 1207 nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[index].startContCE; 1208 } else { 1209 nextCE = baseCE = currCE; 1210 nextContCE = baseContCE = currContCE; 1211 } 1212 maxStrength = UCOL_IDENTICAL; 1213 } else { 1214 if(strength < maxStrength) { 1215 maxStrength = strength; 1216 if(baseCE == UCOL_RESET_TOP_VALUE) { 1217 log_verbose("Resetting to [top]\n"); 1218 nextCE = UCOL_NEXT_TOP_VALUE; 1219 nextContCE = UCOL_NEXT_TOP_CONT; 1220 } else { 1221 result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength); 1222 } 1223 if(result < 0) { 1224 if(ucol_isTailored(coll, *(rulesCopy+oldOffset), status)) { 1225 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(rulesCopy+oldOffset)); 1226 return; 1227 } else { 1228 log_err("%s: couldn't find the CE\n", colLoc); 1229 return; 1230 } 1231 } 1232 } 1233 1234 currCE &= 0xFFFFFF3F; 1235 currContCE &= 0xFFFFFFBF; 1236 1237 if(maxStrength == UCOL_IDENTICAL) { 1238 if(baseCE != currCE || baseContCE != currContCE) { 1239 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc); 1240 } 1241 } else { 1242 if(strength == UCOL_IDENTICAL) { 1243 if(lastCE != currCE || lastContCE != currContCE) { 1244 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc); 1245 } 1246 } else { 1247 if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) { 1248 /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/ 1249 log_err("%s: current CE is not less than base CE\n", colLoc); 1250 } 1251 if(!before) { 1252 if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) { 1253 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/ 1254 log_err("%s: sequence of generated CEs is broken\n", colLoc); 1255 } 1256 } else { 1257 before = FALSE; 1258 if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) { 1259 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/ 1260 log_err("%s: sequence of generated CEs is broken\n", colLoc); 1261 } 1262 } 1263 } 1264 } 1265 1266 } 1267 1268 oldOffset = chOffset; 1269 lastCE = currCE & 0xFFFFFF3F; 1270 lastContCE = currContCE & 0xFFFFFFBF; 1271 } 1272 free(rulesCopy); 1273 } 1274 ucol_close(UCA); 1275 } 1276 1277 #if 0 1278 /* these locales are now picked from index RB */ 1279 static const char* localesToTest[] = { 1280 "ar", "bg", "ca", "cs", "da", 1281 "el", "en_BE", "en_US_POSIX", 1282 "es", "et", "fi", "fr", "hi", 1283 "hr", "hu", "is", "iw", "ja", 1284 "ko", "lt", "lv", "mk", "mt", 1285 "nb", "nn", "nn_NO", "pl", "ro", 1286 "ru", "sh", "sk", "sl", "sq", 1287 "sr", "sv", "th", "tr", "uk", 1288 "vi", "zh", "zh_TW" 1289 }; 1290 #endif 1291 1292 static const char* rulesToTest[] = { 1293 /* Funky fa rule */ 1294 "&\\u0622 < \\u0627 << \\u0671 < \\u0621", 1295 /*"& Z < p, P",*/ 1296 /* Cui Mins rules */ 1297 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/ 1298 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/ 1299 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/ 1300 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/ 1301 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/ 1302 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/ 1303 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/ 1304 }; 1305 1306 1307 static void TestCollations(void) { 1308 int32_t noOfLoc = uloc_countAvailable(); 1309 int32_t i = 0, j = 0; 1310 1311 UErrorCode status = U_ZERO_ERROR; 1312 char cName[256]; 1313 UChar name[256]; 1314 int32_t nameSize; 1315 1316 1317 const char *locName = NULL; 1318 UCollator *coll = NULL; 1319 UCollator *UCA = ucol_open("", &status); 1320 UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status); 1321 if (U_FAILURE(status)) { 1322 log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status)); 1323 return; 1324 } 1325 ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status); 1326 1327 for(i = 0; i<noOfLoc; i++) { 1328 status = U_ZERO_ERROR; 1329 locName = uloc_getAvailable(i); 1330 if(uprv_strcmp("ja", locName) == 0) { 1331 log_verbose("Don't know how to test prefixes\n"); 1332 continue; 1333 } 1334 if(hasCollationElements(locName)) { 1335 nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status); 1336 for(j = 0; j<nameSize; j++) { 1337 cName[j] = (char)name[j]; 1338 } 1339 cName[nameSize] = 0; 1340 log_verbose("\nTesting locale %s (%s)\n", locName, cName); 1341 coll = ucol_open(locName, &status); 1342 if(U_SUCCESS(status)) { 1343 testAgainstUCA(coll, UCA, "UCA", FALSE, &status); 1344 ucol_close(coll); 1345 } else { 1346 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status)); 1347 status = U_ZERO_ERROR; 1348 } 1349 } 1350 } 1351 ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status); 1352 ucol_close(UCA); 1353 } 1354 1355 static void RamsRulesTest(void) { 1356 UErrorCode status = U_ZERO_ERROR; 1357 int32_t i = 0; 1358 UCollator *coll = NULL; 1359 UChar rule[2048]; 1360 uint32_t ruleLen; 1361 int32_t noOfLoc = uloc_countAvailable(); 1362 const char *locName = NULL; 1363 1364 log_verbose("RamsRulesTest\n"); 1365 1366 for(i = 0; i<noOfLoc; i++) { 1367 status = U_ZERO_ERROR; 1368 locName = uloc_getAvailable(i); 1369 if(hasCollationElements(locName)) { 1370 if (uprv_strcmp("ja", locName)==0) { 1371 log_verbose("Don't know how to test Japanese because of prefixes\n"); 1372 continue; 1373 } 1374 if (uprv_strcmp("de__PHONEBOOK", locName)==0) { 1375 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n"); 1376 continue; 1377 } 1378 if (uprv_strcmp("km", locName)==0 || 1379 uprv_strcmp("km_KH", locName)==0 || 1380 uprv_strcmp("si", locName)==0 || 1381 uprv_strcmp("si_LK", locName)==0 || 1382 uprv_strcmp("zh", locName)==0 || 1383 uprv_strcmp("zh_Hant", locName)==0 ) { 1384 continue; /* TODO: enable these locale tests after trac#6040 is fixed. */ 1385 } 1386 log_verbose("Testing locale %s\n", locName); 1387 coll = ucol_open(locName, &status); 1388 if(U_SUCCESS(status)) { 1389 if(coll->image->jamoSpecial == TRUE) { 1390 log_err("%s has special JAMOs\n", locName); 1391 } 1392 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status); 1393 testCollator(coll, &status); 1394 testCEs(coll, &status); 1395 ucol_close(coll); 1396 } 1397 } 1398 } 1399 1400 for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) { 1401 log_verbose("Testing rule: %s\n", rulesToTest[i]); 1402 ruleLen = u_unescape(rulesToTest[i], rule, 2048); 1403 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 1404 if(U_SUCCESS(status)) { 1405 testCollator(coll, &status); 1406 testCEs(coll, &status); 1407 ucol_close(coll); 1408 } 1409 } 1410 1411 } 1412 1413 static void IsTailoredTest(void) { 1414 UErrorCode status = U_ZERO_ERROR; 1415 uint32_t i = 0; 1416 UCollator *coll = NULL; 1417 UChar rule[2048]; 1418 UChar tailored[2048]; 1419 UChar notTailored[2048]; 1420 uint32_t ruleLen, tailoredLen, notTailoredLen; 1421 1422 log_verbose("IsTailoredTest\n"); 1423 1424 u_uastrcpy(rule, "&Z < A, B, C;c < d"); 1425 ruleLen = u_strlen(rule); 1426 1427 u_uastrcpy(tailored, "ABCcd"); 1428 tailoredLen = u_strlen(tailored); 1429 1430 u_uastrcpy(notTailored, "ZabD"); 1431 notTailoredLen = u_strlen(notTailored); 1432 1433 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 1434 if(U_SUCCESS(status)) { 1435 for(i = 0; i<tailoredLen; i++) { 1436 if(!ucol_isTailored(coll, tailored[i], &status)) { 1437 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]); 1438 } 1439 } 1440 for(i = 0; i<notTailoredLen; i++) { 1441 if(ucol_isTailored(coll, notTailored[i], &status)) { 1442 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]); 1443 } 1444 } 1445 ucol_close(coll); 1446 } 1447 else { 1448 log_err_status(status, "Can't tailor rules\n"); 1449 } 1450 /* Code coverage */ 1451 status = U_ZERO_ERROR; 1452 coll = ucol_open("ja", &status); 1453 if(!ucol_isTailored(coll, 0x4E9C, &status)) { 1454 log_err_status(status, "0x4E9C should be tailored - it is reported as not\n"); 1455 } 1456 ucol_close(coll); 1457 } 1458 1459 1460 const static char chTest[][20] = { 1461 "c", 1462 "C", 1463 "ca", "cb", "cx", "cy", "CZ", 1464 "c\\u030C", "C\\u030C", 1465 "h", 1466 "H", 1467 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY", 1468 "ch", "cH", "Ch", "CH", 1469 "cha", "charly", "che", "chh", "chch", "chr", 1470 "i", "I", "iarly", 1471 "r", "R", 1472 "r\\u030C", "R\\u030C", 1473 "s", 1474 "S", 1475 "s\\u030C", "S\\u030C", 1476 "z", "Z", 1477 "z\\u030C", "Z\\u030C" 1478 }; 1479 1480 static void TestChMove(void) { 1481 UChar t1[256] = {0}; 1482 UChar t2[256] = {0}; 1483 1484 uint32_t i = 0, j = 0; 1485 uint32_t size = 0; 1486 UErrorCode status = U_ZERO_ERROR; 1487 1488 UCollator *coll = ucol_open("cs", &status); 1489 1490 if(U_SUCCESS(status)) { 1491 size = sizeof(chTest)/sizeof(chTest[0]); 1492 for(i = 0; i < size-1; i++) { 1493 for(j = i+1; j < size; j++) { 1494 u_unescape(chTest[i], t1, 256); 1495 u_unescape(chTest[j], t2, 256); 1496 doTest(coll, t1, t2, UCOL_LESS); 1497 } 1498 } 1499 } 1500 else { 1501 log_err("Can't open collator"); 1502 } 1503 ucol_close(coll); 1504 } 1505 1506 1507 1508 1509 const static char impTest[][20] = { 1510 "\\u4e00", 1511 "a", 1512 "A", 1513 "b", 1514 "B", 1515 "\\u4e01" 1516 }; 1517 1518 1519 static void TestImplicitTailoring(void) { 1520 static const struct { 1521 const char *rules; 1522 const char *data[10]; 1523 const uint32_t len; 1524 } tests[] = { 1525 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 }, 1526 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 }, 1527 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3}, 1528 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3} 1529 }; 1530 1531 int32_t i = 0; 1532 1533 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { 1534 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 1535 } 1536 1537 /* 1538 UChar t1[256] = {0}; 1539 UChar t2[256] = {0}; 1540 1541 const char *rule = "&\\u4e00 < a <<< A < b <<< B"; 1542 1543 uint32_t i = 0, j = 0; 1544 uint32_t size = 0; 1545 uint32_t ruleLen = 0; 1546 UErrorCode status = U_ZERO_ERROR; 1547 UCollator *coll = NULL; 1548 ruleLen = u_unescape(rule, t1, 256); 1549 1550 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status); 1551 1552 if(U_SUCCESS(status)) { 1553 size = sizeof(impTest)/sizeof(impTest[0]); 1554 for(i = 0; i < size-1; i++) { 1555 for(j = i+1; j < size; j++) { 1556 u_unescape(impTest[i], t1, 256); 1557 u_unescape(impTest[j], t2, 256); 1558 doTest(coll, t1, t2, UCOL_LESS); 1559 } 1560 } 1561 } 1562 else { 1563 log_err("Can't open collator"); 1564 } 1565 ucol_close(coll); 1566 */ 1567 } 1568 1569 static void TestFCDProblem(void) { 1570 UChar t1[256] = {0}; 1571 UChar t2[256] = {0}; 1572 1573 const char *s1 = "\\u0430\\u0306\\u0325"; 1574 const char *s2 = "\\u04D1\\u0325"; 1575 1576 UErrorCode status = U_ZERO_ERROR; 1577 UCollator *coll = ucol_open("", &status); 1578 u_unescape(s1, t1, 256); 1579 u_unescape(s2, t2, 256); 1580 1581 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status); 1582 doTest(coll, t1, t2, UCOL_EQUAL); 1583 1584 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 1585 doTest(coll, t1, t2, UCOL_EQUAL); 1586 1587 ucol_close(coll); 1588 } 1589 1590 /* 1591 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC 1592 We're only using NFC/NFD in this test. 1593 */ 1594 #define NORM_BUFFER_TEST_LEN 18 1595 typedef struct { 1596 UChar32 u; 1597 UChar NFC[NORM_BUFFER_TEST_LEN]; 1598 UChar NFD[NORM_BUFFER_TEST_LEN]; 1599 } tester; 1600 1601 static void TestComposeDecompose(void) { 1602 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */ 1603 static const UChar UNICODESET_STR[] = { 1604 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61, 1605 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72, 1606 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0 1607 }; 1608 int32_t noOfLoc; 1609 int32_t i = 0, j = 0; 1610 1611 UErrorCode status = U_ZERO_ERROR; 1612 const char *locName = NULL; 1613 uint32_t nfcSize; 1614 uint32_t nfdSize; 1615 tester **t; 1616 uint32_t noCases = 0; 1617 UCollator *coll = NULL; 1618 UChar32 u = 0; 1619 UChar comp[NORM_BUFFER_TEST_LEN]; 1620 uint32_t len = 0; 1621 UCollationElements *iter; 1622 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status); 1623 int32_t charsToTestSize; 1624 1625 noOfLoc = uloc_countAvailable(); 1626 1627 coll = ucol_open("", &status); 1628 if (U_FAILURE(status)) { 1629 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status)); 1630 return; 1631 } 1632 charsToTestSize = uset_size(charsToTest); 1633 if (charsToTestSize <= 0) { 1634 log_err("Set was zero. Missing data?\n"); 1635 return; 1636 } 1637 t = malloc(charsToTestSize * sizeof(tester *)); 1638 t[0] = (tester *)malloc(sizeof(tester)); 1639 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize); 1640 1641 for(u = 0; u < charsToTestSize; u++) { 1642 UChar32 ch = uset_charAt(charsToTest, u); 1643 len = 0; 1644 UTF_APPEND_CHAR_UNSAFE(comp, len, ch); 1645 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status); 1646 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status); 1647 1648 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0) 1649 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) { 1650 t[noCases]->u = ch; 1651 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) { 1652 u_strncpy(t[noCases]->NFC, comp, len); 1653 t[noCases]->NFC[len] = 0; 1654 } 1655 noCases++; 1656 t[noCases] = (tester *)malloc(sizeof(tester)); 1657 uprv_memset(t[noCases], 0, sizeof(tester)); 1658 } 1659 } 1660 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize); 1661 uset_close(charsToTest); 1662 charsToTest = NULL; 1663 1664 for(u=0; u<(UChar32)noCases; u++) { 1665 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) { 1666 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u); 1667 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL); 1668 } 1669 } 1670 /* 1671 for(u = 0; u < charsToTestSize; u++) { 1672 if(!(u&0xFFFF)) { 1673 log_verbose("%08X ", u); 1674 } 1675 uprv_memset(t[noCases], 0, sizeof(tester)); 1676 t[noCases]->u = u; 1677 len = 0; 1678 UTF_APPEND_CHAR_UNSAFE(comp, len, u); 1679 comp[len] = 0; 1680 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status); 1681 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status); 1682 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL); 1683 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL); 1684 } 1685 */ 1686 1687 ucol_close(coll); 1688 1689 log_verbose("Testing locales, number of cases = %i\n", noCases); 1690 for(i = 0; i<noOfLoc; i++) { 1691 status = U_ZERO_ERROR; 1692 locName = uloc_getAvailable(i); 1693 if(hasCollationElements(locName)) { 1694 char cName[256]; 1695 UChar name[256]; 1696 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status); 1697 1698 for(j = 0; j<nameSize; j++) { 1699 cName[j] = (char)name[j]; 1700 } 1701 cName[nameSize] = 0; 1702 log_verbose("\nTesting locale %s (%s)\n", locName, cName); 1703 1704 coll = ucol_open(locName, &status); 1705 ucol_setStrength(coll, UCOL_IDENTICAL); 1706 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status); 1707 1708 for(u=0; u<(UChar32)noCases; u++) { 1709 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) { 1710 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName); 1711 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL); 1712 log_verbose("Testing NFC\n"); 1713 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status); 1714 backAndForth(iter); 1715 log_verbose("Testing NFD\n"); 1716 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status); 1717 backAndForth(iter); 1718 } 1719 } 1720 ucol_closeElements(iter); 1721 ucol_close(coll); 1722 } 1723 } 1724 for(u = 0; u <= (UChar32)noCases; u++) { 1725 free(t[u]); 1726 } 1727 free(t); 1728 } 1729 1730 static void TestEmptyRule(void) { 1731 UErrorCode status = U_ZERO_ERROR; 1732 UChar rulez[] = { 0 }; 1733 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status); 1734 1735 ucol_close(coll); 1736 } 1737 1738 static void TestUCARules(void) { 1739 UErrorCode status = U_ZERO_ERROR; 1740 UChar b[256]; 1741 UChar *rules = b; 1742 uint32_t ruleLen = 0; 1743 UCollator *UCAfromRules = NULL; 1744 UCollator *coll = ucol_open("", &status); 1745 if(status == U_FILE_ACCESS_ERROR) { 1746 log_data_err("Is your data around?\n"); 1747 return; 1748 } else if(U_FAILURE(status)) { 1749 log_err("Error opening collator\n"); 1750 return; 1751 } 1752 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256); 1753 1754 log_verbose("TestUCARules\n"); 1755 if(ruleLen > 256) { 1756 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar)); 1757 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen); 1758 } 1759 log_verbose("Rules length is %d\n", ruleLen); 1760 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 1761 if(U_SUCCESS(status)) { 1762 ucol_close(UCAfromRules); 1763 } else { 1764 log_verbose("Unable to create a collator from UCARules!\n"); 1765 } 1766 /* 1767 u_unescape(blah, b, 256); 1768 ucol_getSortKey(coll, b, 1, res, 256); 1769 */ 1770 ucol_close(coll); 1771 if(rules != b) { 1772 free(rules); 1773 } 1774 } 1775 1776 1777 /* Pinyin tonal order */ 1778 /* 1779 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0) 1780 (w/macron)< (w/acute)< (w/caron)< (w/grave) 1781 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8) 1782 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec) 1783 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2) 1784 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9) 1785 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) < 1786 .. (\u00fc) 1787 1788 However, in testing we got the following order: 1789 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101) 1790 (w/acute)< (w/grave)< (w/caron)< (w/macron) 1791 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) < 1792 .. (\u0113) 1793 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b) 1794 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d) 1795 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) < 1796 .. (\u01d8) 1797 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b) 1798 */ 1799 1800 static void TestBefore(void) { 1801 const static char *data[] = { 1802 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A", 1803 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E", 1804 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I", 1805 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O", 1806 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U", 1807 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc" 1808 }; 1809 genericRulesStarter( 1810 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0" 1811 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8" 1812 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec" 1813 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2" 1814 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9" 1815 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc", 1816 data, sizeof(data)/sizeof(data[0])); 1817 } 1818 1819 #if 0 1820 /* superceded by TestBeforePinyin */ 1821 static void TestJ784(void) { 1822 const static char *data[] = { 1823 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", 1824 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", 1825 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", 1826 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", 1827 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", 1828 "\\u00fc", 1829 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc" 1830 }; 1831 genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0])); 1832 } 1833 #endif 1834 1835 #if 0 1836 /* superceded by the changes to the lv locale */ 1837 static void TestJ831(void) { 1838 const static char *data[] = { 1839 "I", 1840 "i", 1841 "Y", 1842 "y" 1843 }; 1844 genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0])); 1845 } 1846 #endif 1847 1848 static void TestJ815(void) { 1849 const static char *data[] = { 1850 "aa", 1851 "Aa", 1852 "ab", 1853 "Ab", 1854 "ad", 1855 "Ad", 1856 "ae", 1857 "Ae", 1858 "\\u00e6", 1859 "\\u00c6", 1860 "af", 1861 "Af", 1862 "b", 1863 "B" 1864 }; 1865 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0])); 1866 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0])); 1867 } 1868 1869 1870 /* 1871 "& a < b < c < d& r < c", "& a < b < d& r < c", 1872 "& a < b < c < d& c < m", "& a < b < c < m < d", 1873 "& a < b < c < d& a < m", "& a < m < b < c < d", 1874 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d", 1875 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d", 1876 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e", 1877 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e", 1878 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e", 1879 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g", 1880 */ 1881 static void TestRedundantRules(void) { 1882 int32_t i; 1883 1884 static const struct { 1885 const char *rules; 1886 const char *expectedRules; 1887 const char *testdata[8]; 1888 uint32_t testdatalen; 1889 } tests[] = { 1890 /* this test conflicts with positioning of CODAN placeholder */ 1891 /*{ 1892 "& a <<< b <<< c << d <<< e& [before 1] e <<< x", 1893 "&\\u2089<<<x", 1894 {"\\u2089", "x"}, 2 1895 }, */ 1896 /* this test conflicts with the [before x] syntax tightening */ 1897 /*{ 1898 "& b <<< c <<< d << e <<< f& [before 1] f <<< x", 1899 "&\\u0252<<<x", 1900 {"\\u0252", "x"}, 2 1901 }, */ 1902 /* this test conflicts with the [before x] syntax tightening */ 1903 /*{ 1904 "& a < b <<< c << d <<< e& [before 1] e <<< x", 1905 "& a <<< x < b <<< c << d <<< e", 1906 {"a", "x", "b", "c", "d", "e"}, 6 1907 }, */ 1908 { 1909 "& a < b < c < d& [before 1] c < m", 1910 "& a < b < m < c < d", 1911 {"a", "b", "m", "c", "d"}, 5 1912 }, 1913 { 1914 "& a < b <<< c << d <<< e& [before 3] e <<< x", 1915 "& a < b <<< c << d <<< x <<< e", 1916 {"a", "b", "c", "d", "x", "e"}, 6 1917 }, 1918 /* this test conflicts with the [before x] syntax tightening */ 1919 /* { 1920 "& a < b <<< c << d <<< e& [before 2] e <<< x", 1921 "& a < b <<< c <<< x << d <<< e", 1922 {"a", "b", "c", "x", "d", "e"},, 6 1923 }, */ 1924 { 1925 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", 1926 "& a < b <<< c << d <<< e <<< f < x < g", 1927 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8 1928 }, 1929 { 1930 "& a <<< b << c < d& a < m", 1931 "& a <<< b << c < m < d", 1932 {"a", "b", "c", "m", "d"}, 5 1933 }, 1934 { 1935 "&a<b<<b\\u0301 &z<b", 1936 "&a<b\\u0301 &z<b", 1937 {"a", "b\\u0301", "z", "b"}, 4 1938 }, 1939 { 1940 "&z<m<<<q<<<m", 1941 "&z<q<<<m", 1942 {"z", "q", "m"},3 1943 }, 1944 { 1945 "&z<<<m<q<<<m", 1946 "&z<q<<<m", 1947 {"z", "q", "m"}, 3 1948 }, 1949 { 1950 "& a < b < c < d& r < c", 1951 "& a < b < d& r < c", 1952 {"a", "b", "d"}, 3 1953 }, 1954 { 1955 "& a < b < c < d& r < c", 1956 "& a < b < d& r < c", 1957 {"r", "c"}, 2 1958 }, 1959 { 1960 "& a < b < c < d& c < m", 1961 "& a < b < c < m < d", 1962 {"a", "b", "c", "m", "d"}, 5 1963 }, 1964 { 1965 "& a < b < c < d& a < m", 1966 "& a < m < b < c < d", 1967 {"a", "m", "b", "c", "d"}, 5 1968 } 1969 }; 1970 1971 1972 UCollator *credundant = NULL; 1973 UCollator *cresulting = NULL; 1974 UErrorCode status = U_ZERO_ERROR; 1975 UChar rlz[2048] = { 0 }; 1976 uint32_t rlen = 0; 1977 1978 for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) { 1979 log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules); 1980 rlen = u_unescape(tests[i].rules, rlz, 2048); 1981 1982 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status); 1983 if(status == U_FILE_ACCESS_ERROR) { 1984 log_data_err("Is your data around?\n"); 1985 return; 1986 } else if(U_FAILURE(status)) { 1987 log_err("Error opening collator\n"); 1988 return; 1989 } 1990 1991 rlen = u_unescape(tests[i].expectedRules, rlz, 2048); 1992 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status); 1993 1994 testAgainstUCA(cresulting, credundant, "expected", TRUE, &status); 1995 1996 ucol_close(credundant); 1997 ucol_close(cresulting); 1998 1999 log_verbose("testing using data\n"); 2000 2001 genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen); 2002 } 2003 2004 } 2005 2006 static void TestExpansionSyntax(void) { 2007 int32_t i; 2008 2009 const static char *rules[] = { 2010 "&AE <<< a << b <<< c &d <<< f", 2011 "&AE <<< a <<< b << c << d < e < f <<< g", 2012 "&AE <<< B <<< C / D <<< F" 2013 }; 2014 2015 const static char *expectedRules[] = { 2016 "&A <<< a / E << b / E <<< c /E &d <<< f", 2017 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g", 2018 "&A <<< B / E <<< C / ED <<< F / E" 2019 }; 2020 2021 const static char *testdata[][8] = { 2022 {"AE", "a", "b", "c"}, 2023 {"AE", "a", "b", "c", "d", "e", "f", "g"}, 2024 {"AE", "B", "C"} /* / ED <<< F / E"},*/ 2025 }; 2026 2027 const static uint32_t testdatalen[] = { 2028 4, 2029 8, 2030 3 2031 }; 2032 2033 2034 2035 UCollator *credundant = NULL; 2036 UCollator *cresulting = NULL; 2037 UErrorCode status = U_ZERO_ERROR; 2038 UChar rlz[2048] = { 0 }; 2039 uint32_t rlen = 0; 2040 2041 for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) { 2042 log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]); 2043 rlen = u_unescape(rules[i], rlz, 2048); 2044 2045 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 2046 if(status == U_FILE_ACCESS_ERROR) { 2047 log_data_err("Is your data around?\n"); 2048 return; 2049 } else if(U_FAILURE(status)) { 2050 log_err("Error opening collator\n"); 2051 return; 2052 } 2053 rlen = u_unescape(expectedRules[i], rlz, 2048); 2054 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status); 2055 2056 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */ 2057 /* as a hard error test, but only in information mode */ 2058 testAgainstUCA(cresulting, credundant, "expected", FALSE, &status); 2059 2060 ucol_close(credundant); 2061 ucol_close(cresulting); 2062 2063 log_verbose("testing using data\n"); 2064 2065 genericRulesStarter(rules[i], testdata[i], testdatalen[i]); 2066 } 2067 } 2068 2069 static void TestCase(void) 2070 { 2071 const static UChar gRules[MAX_TOKEN_LEN] = 2072 /*" & 0 < 1,\u2461<a,A"*/ 2073 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 }; 2074 2075 const static UChar testCase[][MAX_TOKEN_LEN] = 2076 { 2077 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000}, 2078 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000}, 2079 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000}, 2080 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000} 2081 }; 2082 2083 const static UCollationResult caseTestResults[][9] = 2084 { 2085 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS }, 2086 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }, 2087 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS }, 2088 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER } 2089 }; 2090 2091 const static UColAttributeValue caseTestAttributes[][2] = 2092 { 2093 { UCOL_LOWER_FIRST, UCOL_OFF}, 2094 { UCOL_UPPER_FIRST, UCOL_OFF}, 2095 { UCOL_LOWER_FIRST, UCOL_ON}, 2096 { UCOL_UPPER_FIRST, UCOL_ON} 2097 }; 2098 int32_t i,j,k; 2099 UErrorCode status = U_ZERO_ERROR; 2100 UCollationElements *iter; 2101 UCollator *myCollation; 2102 myCollation = ucol_open("en_US", &status); 2103 2104 if(U_FAILURE(status)){ 2105 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 2106 return; 2107 } 2108 log_verbose("Testing different case settings\n"); 2109 ucol_setStrength(myCollation, UCOL_TERTIARY); 2110 2111 for(k = 0; k<4; k++) { 2112 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status); 2113 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status); 2114 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]); 2115 for (i = 0; i < 3 ; i++) { 2116 for(j = i+1; j<4; j++) { 2117 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]); 2118 } 2119 } 2120 } 2121 ucol_close(myCollation); 2122 2123 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status); 2124 if(U_FAILURE(status)){ 2125 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 2126 return; 2127 } 2128 log_verbose("Testing different case settings with custom rules\n"); 2129 ucol_setStrength(myCollation, UCOL_TERTIARY); 2130 2131 for(k = 0; k<4; k++) { 2132 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status); 2133 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status); 2134 for (i = 0; i < 3 ; i++) { 2135 for(j = i+1; j<4; j++) { 2136 log_verbose("k:%d, i:%d, j:%d\n", k, i, j); 2137 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]); 2138 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status); 2139 backAndForth(iter); 2140 ucol_closeElements(iter); 2141 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status); 2142 backAndForth(iter); 2143 ucol_closeElements(iter); 2144 } 2145 } 2146 } 2147 ucol_close(myCollation); 2148 { 2149 const static char *lowerFirst[] = { 2150 "h", 2151 "H", 2152 "ch", 2153 "Ch", 2154 "CH", 2155 "cha", 2156 "chA", 2157 "Cha", 2158 "ChA", 2159 "CHa", 2160 "CHA", 2161 "i", 2162 "I" 2163 }; 2164 2165 const static char *upperFirst[] = { 2166 "H", 2167 "h", 2168 "CH", 2169 "Ch", 2170 "ch", 2171 "CHA", 2172 "CHa", 2173 "ChA", 2174 "Cha", 2175 "chA", 2176 "cha", 2177 "I", 2178 "i" 2179 }; 2180 log_verbose("mixed case test\n"); 2181 log_verbose("lower first, case level off\n"); 2182 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); 2183 log_verbose("upper first, case level off\n"); 2184 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); 2185 log_verbose("lower first, case level on\n"); 2186 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); 2187 log_verbose("upper first, case level on\n"); 2188 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); 2189 } 2190 2191 } 2192 2193 static void TestIncrementalNormalize(void) { 2194 2195 /*UChar baseA =0x61;*/ 2196 UChar baseA =0x41; 2197 /* UChar baseB = 0x42;*/ 2198 static const UChar ccMix[] = {0x316, 0x321, 0x300}; 2199 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/ 2200 /* 2201 0x316 is combining grave accent below, cc=220 2202 0x321 is combining palatalized hook below, cc=202 2203 0x300 is combining grave accent, cc=230 2204 */ 2205 2206 #define MAXSLEN 2000 2207 /*int maxSLen = 64000;*/ 2208 int sLen; 2209 int i; 2210 2211 UCollator *coll; 2212 UErrorCode status = U_ZERO_ERROR; 2213 UCollationResult result; 2214 2215 int32_t myQ = QUICK; 2216 2217 if(QUICK < 0) { 2218 QUICK = 1; 2219 } 2220 2221 { 2222 /* Test 1. Run very long unnormalized strings, to force overflow of*/ 2223 /* most buffers along the way.*/ 2224 UChar strA[MAXSLEN+1]; 2225 UChar strB[MAXSLEN+1]; 2226 2227 coll = ucol_open("en_US", &status); 2228 if(status == U_FILE_ACCESS_ERROR) { 2229 log_data_err("Is your data around?\n"); 2230 return; 2231 } else if(U_FAILURE(status)) { 2232 log_err("Error opening collator\n"); 2233 return; 2234 } 2235 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 2236 2237 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/ 2238 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/ 2239 /*for (sLen = 1000; sLen<1001; sLen++) {*/ 2240 for (sLen = 500; sLen<501; sLen++) { 2241 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/ 2242 strA[0] = baseA; 2243 strB[0] = baseA; 2244 for (i=1; i<=sLen-1; i++) { 2245 strA[i] = ccMix[i % 3]; 2246 strB[sLen-i] = ccMix[i % 3]; 2247 } 2248 strA[sLen] = 0; 2249 strB[sLen] = 0; 2250 2251 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/ 2252 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/ 2253 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/ 2254 doTest(coll, strA, strB, UCOL_EQUAL); 2255 } 2256 } 2257 2258 QUICK = myQ; 2259 2260 2261 /* Test 2: Non-normal sequence in a string that extends to the last character*/ 2262 /* of the string. Checks a couple of edge cases.*/ 2263 2264 { 2265 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0}; 2266 static const UChar strB[] = {0x41, 0xc0, 0x316, 0}; 2267 ucol_setStrength(coll, UCOL_TERTIARY); 2268 doTest(coll, strA, strB, UCOL_EQUAL); 2269 } 2270 2271 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/ 2272 2273 { 2274 /* New UCA 3.1.1. 2275 * test below used a code point from Desseret, which sorts differently 2276 * than d800 dc00 2277 */ 2278 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/ 2279 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0}; 2280 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0}; 2281 ucol_setStrength(coll, UCOL_TERTIARY); 2282 doTest(coll, strA, strB, UCOL_GREATER); 2283 } 2284 2285 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/ 2286 2287 { 2288 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00}; 2289 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00}; 2290 char sortKeyA[50]; 2291 char sortKeyAz[50]; 2292 char sortKeyB[50]; 2293 char sortKeyBz[50]; 2294 int r; 2295 2296 /* there used to be -3 here. Hmmmm.... */ 2297 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/ 2298 result = ucol_strcoll(coll, strA, 3, strB, 3); 2299 if (result != UCOL_GREATER) { 2300 log_err("ERROR 1 in test 4\n"); 2301 } 2302 result = ucol_strcoll(coll, strA, -1, strB, -1); 2303 if (result != UCOL_EQUAL) { 2304 log_err("ERROR 2 in test 4\n"); 2305 } 2306 2307 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 2308 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 2309 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 2310 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 2311 2312 r = strcmp(sortKeyA, sortKeyAz); 2313 if (r <= 0) { 2314 log_err("Error 3 in test 4\n"); 2315 } 2316 r = strcmp(sortKeyA, sortKeyB); 2317 if (r <= 0) { 2318 log_err("Error 4 in test 4\n"); 2319 } 2320 r = strcmp(sortKeyAz, sortKeyBz); 2321 if (r != 0) { 2322 log_err("Error 5 in test 4\n"); 2323 } 2324 2325 ucol_setStrength(coll, UCOL_IDENTICAL); 2326 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 2327 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 2328 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 2329 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 2330 2331 r = strcmp(sortKeyA, sortKeyAz); 2332 if (r <= 0) { 2333 log_err("Error 6 in test 4\n"); 2334 } 2335 r = strcmp(sortKeyA, sortKeyB); 2336 if (r <= 0) { 2337 log_err("Error 7 in test 4\n"); 2338 } 2339 r = strcmp(sortKeyAz, sortKeyBz); 2340 if (r != 0) { 2341 log_err("Error 8 in test 4\n"); 2342 } 2343 ucol_setStrength(coll, UCOL_TERTIARY); 2344 } 2345 2346 2347 /* Test 5: Null characters in non-normal source strings.*/ 2348 2349 { 2350 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00}; 2351 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00}; 2352 char sortKeyA[50]; 2353 char sortKeyAz[50]; 2354 char sortKeyB[50]; 2355 char sortKeyBz[50]; 2356 int r; 2357 2358 result = ucol_strcoll(coll, strA, 6, strB, 6); 2359 if (result != UCOL_GREATER) { 2360 log_err("ERROR 1 in test 5\n"); 2361 } 2362 result = ucol_strcoll(coll, strA, -1, strB, -1); 2363 if (result != UCOL_EQUAL) { 2364 log_err("ERROR 2 in test 5\n"); 2365 } 2366 2367 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 2368 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 2369 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 2370 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 2371 2372 r = strcmp(sortKeyA, sortKeyAz); 2373 if (r <= 0) { 2374 log_err("Error 3 in test 5\n"); 2375 } 2376 r = strcmp(sortKeyA, sortKeyB); 2377 if (r <= 0) { 2378 log_err("Error 4 in test 5\n"); 2379 } 2380 r = strcmp(sortKeyAz, sortKeyBz); 2381 if (r != 0) { 2382 log_err("Error 5 in test 5\n"); 2383 } 2384 2385 ucol_setStrength(coll, UCOL_IDENTICAL); 2386 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 2387 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 2388 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 2389 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 2390 2391 r = strcmp(sortKeyA, sortKeyAz); 2392 if (r <= 0) { 2393 log_err("Error 6 in test 5\n"); 2394 } 2395 r = strcmp(sortKeyA, sortKeyB); 2396 if (r <= 0) { 2397 log_err("Error 7 in test 5\n"); 2398 } 2399 r = strcmp(sortKeyAz, sortKeyBz); 2400 if (r != 0) { 2401 log_err("Error 8 in test 5\n"); 2402 } 2403 ucol_setStrength(coll, UCOL_TERTIARY); 2404 } 2405 2406 2407 /* Test 6: Null character as base of a non-normal combining sequence.*/ 2408 2409 { 2410 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00}; 2411 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00}; 2412 2413 result = ucol_strcoll(coll, strA, 5, strB, 5); 2414 if (result != UCOL_LESS) { 2415 log_err("Error 1 in test 6\n"); 2416 } 2417 result = ucol_strcoll(coll, strA, -1, strB, -1); 2418 if (result != UCOL_EQUAL) { 2419 log_err("Error 2 in test 6\n"); 2420 } 2421 } 2422 2423 ucol_close(coll); 2424 } 2425 2426 2427 2428 #if 0 2429 static void TestGetCaseBit(void) { 2430 static const char *caseBitData[] = { 2431 "a", "A", "ch", "Ch", "CH", 2432 "\\uFF9E", "\\u0009" 2433 }; 2434 2435 static const uint8_t results[] = { 2436 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE, 2437 UCOL_UPPER_CASE, UCOL_LOWER_CASE 2438 }; 2439 2440 uint32_t i, blen = 0; 2441 UChar b[256] = {0}; 2442 UErrorCode status = U_ZERO_ERROR; 2443 UCollator *UCA = ucol_open("", &status); 2444 uint8_t res = 0; 2445 2446 for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) { 2447 blen = u_unescape(caseBitData[i], b, 256); 2448 res = ucol_uprv_getCaseBits(UCA, b, blen, &status); 2449 if(results[i] != res) { 2450 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]); 2451 } 2452 } 2453 } 2454 #endif 2455 2456 static void TestHangulTailoring(void) { 2457 static const char *koreanData[] = { 2458 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475", 2459 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef", 2460 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888", 2461 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5", 2462 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E", 2463 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C" 2464 }; 2465 2466 const char *rules = 2467 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 " 2468 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef " 2469 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 " 2470 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 " 2471 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E " 2472 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C"; 2473 2474 2475 UErrorCode status = U_ZERO_ERROR; 2476 UChar rlz[2048] = { 0 }; 2477 uint32_t rlen = u_unescape(rules, rlz, 2048); 2478 2479 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); 2480 if(status == U_FILE_ACCESS_ERROR) { 2481 log_data_err("Is your data around?\n"); 2482 return; 2483 } else if(U_FAILURE(status)) { 2484 log_err("Error opening collator\n"); 2485 return; 2486 } 2487 2488 log_verbose("Using start of korean rules\n"); 2489 2490 if(U_SUCCESS(status)) { 2491 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0])); 2492 } else { 2493 log_err("Unable to open collator with rules %s\n", rules); 2494 } 2495 2496 log_verbose("Setting jamoSpecial to TRUE and testing once more\n"); 2497 ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home */ 2498 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0])); 2499 2500 ucol_close(coll); 2501 2502 log_verbose("Using ko__LOTUS locale\n"); 2503 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0])); 2504 } 2505 2506 static void TestCompressOverlap(void) { 2507 UChar secstr[150]; 2508 UChar tertstr[150]; 2509 UErrorCode status = U_ZERO_ERROR; 2510 UCollator *coll; 2511 char result[200]; 2512 uint32_t resultlen; 2513 int count = 0; 2514 char *tempptr; 2515 2516 coll = ucol_open("", &status); 2517 2518 if (U_FAILURE(status)) { 2519 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status)); 2520 return; 2521 } 2522 while (count < 149) { 2523 secstr[count] = 0x0020; /* [06, 05, 05] */ 2524 tertstr[count] = 0x0020; 2525 count ++; 2526 } 2527 2528 /* top down compression ----------------------------------- */ 2529 secstr[count] = 0x0332; /* [, 87, 05] */ 2530 tertstr[count] = 0x3000; /* [06, 05, 07] */ 2531 2532 /* no compression secstr should have 150 secondary bytes, tertstr should 2533 have 150 tertiary bytes. 2534 with correct overlapping compression, secstr should have 4 secondary 2535 bytes, tertstr should have > 2 tertiary bytes */ 2536 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250); 2537 tempptr = uprv_strchr(result, 1) + 1; 2538 while (*(tempptr + 1) != 1) { 2539 /* the last secondary collation element is not checked since it is not 2540 part of the compression */ 2541 if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) { 2542 log_err("Secondary compression overlapped\n"); 2543 } 2544 tempptr ++; 2545 } 2546 2547 /* tertiary top/bottom/common for en_US is similar to the secondary 2548 top/bottom/common */ 2549 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250); 2550 tempptr = uprv_strrchr(result, 1) + 1; 2551 while (*(tempptr + 1) != 0) { 2552 /* the last secondary collation element is not checked since it is not 2553 part of the compression */ 2554 if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) { 2555 log_err("Tertiary compression overlapped\n"); 2556 } 2557 tempptr ++; 2558 } 2559 2560 /* bottom up compression ------------------------------------- */ 2561 secstr[count] = 0; 2562 tertstr[count] = 0; 2563 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250); 2564 tempptr = uprv_strchr(result, 1) + 1; 2565 while (*(tempptr + 1) != 1) { 2566 /* the last secondary collation element is not checked since it is not 2567 part of the compression */ 2568 if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) { 2569 log_err("Secondary compression overlapped\n"); 2570 } 2571 tempptr ++; 2572 } 2573 2574 /* tertiary top/bottom/common for en_US is similar to the secondary 2575 top/bottom/common */ 2576 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250); 2577 tempptr = uprv_strrchr(result, 1) + 1; 2578 while (*(tempptr + 1) != 0) { 2579 /* the last secondary collation element is not checked since it is not 2580 part of the compression */ 2581 if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) { 2582 log_err("Tertiary compression overlapped\n"); 2583 } 2584 tempptr ++; 2585 } 2586 2587 ucol_close(coll); 2588 } 2589 2590 static void TestCyrillicTailoring(void) { 2591 static const char *test[] = { 2592 "\\u0410b", 2593 "\\u0410\\u0306a", 2594 "\\u04d0A" 2595 }; 2596 2597 /* Russian overrides contractions, so this test is not valid anymore */ 2598 /*genericLocaleStarter("ru", test, 3);*/ 2599 2600 genericLocaleStarter("root", test, 3); 2601 genericRulesStarter("&\\u0410 = \\u0410", test, 3); 2602 genericRulesStarter("&Z < \\u0410", test, 3); 2603 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3); 2604 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3); 2605 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3); 2606 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3); 2607 } 2608 2609 static void TestSuppressContractions(void) { 2610 2611 static const char *testNoCont2[] = { 2612 "\\u0410\\u0302a", 2613 "\\u0410\\u0306b", 2614 "\\u0410c" 2615 }; 2616 static const char *testNoCont[] = { 2617 "a\\u0410", 2618 "A\\u0410\\u0306", 2619 "\\uFF21\\u0410\\u0302" 2620 }; 2621 2622 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3); 2623 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3); 2624 } 2625 2626 static void TestContraction(void) { 2627 const static char *testrules[] = { 2628 "&A = AB / B", 2629 "&A = A\\u0306/\\u0306", 2630 "&c = ch / h" 2631 }; 2632 const static UChar testdata[][2] = { 2633 {0x0041 /* 'A' */, 0x0042 /* 'B' */}, 2634 {0x0041 /* 'A' */, 0x0306 /* combining breve */}, 2635 {0x0063 /* 'c' */, 0x0068 /* 'h' */} 2636 }; 2637 const static UChar testdata2[][2] = { 2638 {0x0063 /* 'c' */, 0x0067 /* 'g' */}, 2639 {0x0063 /* 'c' */, 0x0068 /* 'h' */}, 2640 {0x0063 /* 'c' */, 0x006C /* 'l' */} 2641 }; 2642 const static char *testrules3[] = { 2643 "&z < xyz &xyzw << B", 2644 "&z < xyz &xyz << B / w", 2645 "&z < ch &achm << B", 2646 "&z < ch &a << B / chm", 2647 "&\\ud800\\udc00w << B", 2648 "&\\ud800\\udc00 << B / w", 2649 "&a\\ud800\\udc00m << B", 2650 "&a << B / \\ud800\\udc00m", 2651 }; 2652 2653 UErrorCode status = U_ZERO_ERROR; 2654 UCollator *coll; 2655 UChar rule[256] = {0}; 2656 uint32_t rlen = 0; 2657 int i; 2658 2659 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) { 2660 UCollationElements *iter1; 2661 int j = 0; 2662 log_verbose("Rule %s for testing\n", testrules[i]); 2663 rlen = u_unescape(testrules[i], rule, 32); 2664 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2665 if (U_FAILURE(status)) { 2666 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status)); 2667 return; 2668 } 2669 iter1 = ucol_openElements(coll, testdata[i], 2, &status); 2670 if (U_FAILURE(status)) { 2671 log_err("Collation iterator creation failed\n"); 2672 return; 2673 } 2674 while (j < 2) { 2675 UCollationElements *iter2 = ucol_openElements(coll, 2676 &(testdata[i][j]), 2677 1, &status); 2678 uint32_t ce; 2679 if (U_FAILURE(status)) { 2680 log_err("Collation iterator creation failed\n"); 2681 return; 2682 } 2683 ce = ucol_next(iter2, &status); 2684 while (ce != UCOL_NULLORDER) { 2685 if ((uint32_t)ucol_next(iter1, &status) != ce) { 2686 log_err("Collation elements in contraction split does not match\n"); 2687 return; 2688 } 2689 ce = ucol_next(iter2, &status); 2690 } 2691 j ++; 2692 ucol_closeElements(iter2); 2693 } 2694 if (ucol_next(iter1, &status) != UCOL_NULLORDER) { 2695 log_err("Collation elements not exhausted\n"); 2696 return; 2697 } 2698 ucol_closeElements(iter1); 2699 ucol_close(coll); 2700 } 2701 2702 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256); 2703 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2704 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) { 2705 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n", 2706 testdata2[0][0], testdata2[0][1], testdata2[1][0], 2707 testdata2[1][1]); 2708 return; 2709 } 2710 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) { 2711 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n", 2712 testdata2[1][0], testdata2[1][1], testdata2[2][0], 2713 testdata2[2][1]); 2714 return; 2715 } 2716 ucol_close(coll); 2717 2718 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) { 2719 UCollator *coll1, 2720 *coll2; 2721 UCollationElements *iter1, 2722 *iter2; 2723 UChar ch = 0x0042 /* 'B' */; 2724 uint32_t ce; 2725 rlen = u_unescape(testrules3[i], rule, 32); 2726 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2727 rlen = u_unescape(testrules3[i + 1], rule, 32); 2728 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2729 if (U_FAILURE(status)) { 2730 log_err("Collator creation failed %s\n", testrules[i]); 2731 return; 2732 } 2733 iter1 = ucol_openElements(coll1, &ch, 1, &status); 2734 iter2 = ucol_openElements(coll2, &ch, 1, &status); 2735 if (U_FAILURE(status)) { 2736 log_err("Collation iterator creation failed\n"); 2737 return; 2738 } 2739 ce = ucol_next(iter1, &status); 2740 if (U_FAILURE(status)) { 2741 log_err("Retrieving ces failed\n"); 2742 return; 2743 } 2744 while (ce != UCOL_NULLORDER) { 2745 if (ce != (uint32_t)ucol_next(iter2, &status)) { 2746 log_err("CEs does not match\n"); 2747 return; 2748 } 2749 ce = ucol_next(iter1, &status); 2750 if (U_FAILURE(status)) { 2751 log_err("Retrieving ces failed\n"); 2752 return; 2753 } 2754 } 2755 if (ucol_next(iter2, &status) != UCOL_NULLORDER) { 2756 log_err("CEs not exhausted\n"); 2757 return; 2758 } 2759 ucol_closeElements(iter1); 2760 ucol_closeElements(iter2); 2761 ucol_close(coll1); 2762 ucol_close(coll2); 2763 } 2764 } 2765 2766 static void TestExpansion(void) { 2767 const static char *testrules[] = { 2768 "&J << K / B & K << M", 2769 "&J << K / B << M" 2770 }; 2771 const static UChar testdata[][3] = { 2772 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0}, 2773 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0}, 2774 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0}, 2775 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0}, 2776 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0}, 2777 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0} 2778 }; 2779 2780 UErrorCode status = U_ZERO_ERROR; 2781 UCollator *coll; 2782 UChar rule[256] = {0}; 2783 uint32_t rlen = 0; 2784 int i; 2785 2786 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) { 2787 int j = 0; 2788 log_verbose("Rule %s for testing\n", testrules[i]); 2789 rlen = u_unescape(testrules[i], rule, 32); 2790 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2791 if (U_FAILURE(status)) { 2792 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status)); 2793 return; 2794 } 2795 2796 for (j = 0; j < 5; j ++) { 2797 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS); 2798 } 2799 ucol_close(coll); 2800 } 2801 } 2802 2803 #if 0 2804 /* this test tests the current limitations of the engine */ 2805 /* it always fail, so it is disabled by default */ 2806 static void TestLimitations(void) { 2807 /* recursive expansions */ 2808 { 2809 static const char *rule = "&a=b/c&d=c/e"; 2810 static const char *tlimit01[] = {"add","b","adf"}; 2811 static const char *tlimit02[] = {"aa","b","af"}; 2812 log_verbose("recursive expansions\n"); 2813 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0])); 2814 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0])); 2815 } 2816 /* contractions spanning expansions */ 2817 { 2818 static const char *rule = "&a<<<c/e&g<<<eh"; 2819 static const char *tlimit01[] = {"ad","c","af","f","ch","h"}; 2820 static const char *tlimit02[] = {"ad","c","ch","af","f","h"}; 2821 log_verbose("contractions spanning expansions\n"); 2822 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0])); 2823 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0])); 2824 } 2825 /* normalization: nulls in contractions */ 2826 { 2827 static const char *rule = "&a<<<\\u0000\\u0302"; 2828 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"}; 2829 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"}; 2830 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE }; 2831 static const UColAttributeValue valOn[] = { UCOL_ON }; 2832 static const UColAttributeValue valOff[] = { UCOL_OFF }; 2833 2834 log_verbose("NULL in contractions\n"); 2835 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1); 2836 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1); 2837 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1); 2838 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1); 2839 2840 } 2841 /* normalization: contractions spanning normalization */ 2842 { 2843 static const char *rule = "&a<<<\\u0000\\u0302"; 2844 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"}; 2845 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"}; 2846 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE }; 2847 static const UColAttributeValue valOn[] = { UCOL_ON }; 2848 static const UColAttributeValue valOff[] = { UCOL_OFF }; 2849 2850 log_verbose("contractions spanning normalization\n"); 2851 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1); 2852 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1); 2853 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1); 2854 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1); 2855 2856 } 2857 /* variable top: */ 2858 { 2859 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/ 2860 static const char *rule = "&\\u2010<x<[variable top]=z"; 2861 /*static const char *rule3 = "&' '<x<[variable top]=z";*/ 2862 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" }; 2863 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"}; 2864 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" }; 2865 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH }; 2866 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY }; 2867 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY }; 2868 2869 log_verbose("variable top\n"); 2870 genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2871 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2872 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2873 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0])); 2874 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0])); 2875 2876 } 2877 /* case level */ 2878 { 2879 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH"; 2880 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"}; 2881 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"}; 2882 static const UColAttribute att[] = { UCOL_CASE_FIRST}; 2883 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST}; 2884 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/ 2885 log_verbose("case level\n"); 2886 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2887 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2888 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/ 2889 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/ 2890 } 2891 2892 } 2893 #endif 2894 2895 static void TestBocsuCoverage(void) { 2896 UErrorCode status = U_ZERO_ERROR; 2897 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041"; 2898 UChar test[256] = {0}; 2899 uint32_t tlen = u_unescape(testString, test, 32); 2900 uint8_t key[256] = {0}; 2901 uint32_t klen = 0; 2902 2903 UCollator *coll = ucol_open("", &status); 2904 if(U_SUCCESS(status)) { 2905 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status); 2906 2907 klen = ucol_getSortKey(coll, test, tlen, key, 256); 2908 2909 ucol_close(coll); 2910 } else { 2911 log_data_err("Couldn't open UCA\n"); 2912 } 2913 } 2914 2915 static void TestVariableTopSetting(void) { 2916 UErrorCode status = U_ZERO_ERROR; 2917 const UChar *current = NULL; 2918 uint32_t varTopOriginal = 0, varTop1, varTop2; 2919 UCollator *coll = ucol_open("", &status); 2920 if(U_SUCCESS(status)) { 2921 2922 uint32_t strength = 0; 2923 uint16_t specs = 0; 2924 uint32_t chOffset = 0; 2925 uint32_t chLen = 0; 2926 uint32_t exOffset = 0; 2927 uint32_t exLen = 0; 2928 uint32_t oldChOffset = 0; 2929 uint32_t oldChLen = 0; 2930 uint32_t oldExOffset = 0; 2931 uint32_t oldExLen = 0; 2932 uint32_t prefixOffset = 0; 2933 uint32_t prefixLen = 0; 2934 2935 UBool startOfRules = TRUE; 2936 UColTokenParser src; 2937 UColOptionSet opts; 2938 2939 UChar *rulesCopy = NULL; 2940 uint32_t rulesLen; 2941 2942 UCollationResult result; 2943 2944 UChar first[256] = { 0 }; 2945 UChar second[256] = { 0 }; 2946 UParseError parseError; 2947 int32_t myQ = QUICK; 2948 2949 src.opts = &opts; 2950 2951 if(QUICK <= 0) { 2952 QUICK = 1; 2953 } 2954 2955 /* this test will fail when normalization is turned on */ 2956 /* therefore we always turn off exhaustive mode for it */ 2957 { /* QUICK > 0*/ 2958 log_verbose("Slide variable top over UCARules\n"); 2959 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0); 2960 rulesCopy = (UChar *)malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); 2961 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE); 2962 2963 if(U_SUCCESS(status) && rulesLen > 0) { 2964 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 2965 src.current = src.source = rulesCopy; 2966 src.end = rulesCopy+rulesLen; 2967 src.extraCurrent = src.end; 2968 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 2969 2970 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) { 2971 strength = src.parsedToken.strength; 2972 chOffset = src.parsedToken.charsOffset; 2973 chLen = src.parsedToken.charsLen; 2974 exOffset = src.parsedToken.extensionOffset; 2975 exLen = src.parsedToken.extensionLen; 2976 prefixOffset = src.parsedToken.prefixOffset; 2977 prefixLen = src.parsedToken.prefixLen; 2978 specs = src.parsedToken.flags; 2979 2980 startOfRules = FALSE; 2981 { 2982 log_verbose("%04X %d ", *(rulesCopy+chOffset), chLen); 2983 } 2984 if(strength == UCOL_PRIMARY) { 2985 status = U_ZERO_ERROR; 2986 varTopOriginal = ucol_getVariableTop(coll, &status); 2987 varTop1 = ucol_setVariableTop(coll, rulesCopy+oldChOffset, oldChLen, &status); 2988 if(U_FAILURE(status)) { 2989 char buffer[256]; 2990 char *buf = buffer; 2991 uint32_t i = 0, j; 2992 uint32_t CE = UCOL_NO_MORE_CES; 2993 2994 /* before we start screaming, let's see if there is a problem with the rules */ 2995 collIterate s; 2996 uprv_init_collIterate(coll, rulesCopy+oldChOffset, oldChLen, &s); 2997 2998 CE = ucol_getNextCE(coll, &s, &status); 2999 3000 for(i = 0; i < oldChLen; i++) { 3001 j = sprintf(buf, "%04X ", *(rulesCopy+oldChOffset+i)); 3002 buf += j; 3003 } 3004 if(status == U_PRIMARY_TOO_LONG_ERROR) { 3005 log_verbose("= Expected failure for %s =", buffer); 3006 } else { 3007 if(s.pos == s.endp) { 3008 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n", 3009 oldChOffset, u_errorName(status), buffer); 3010 } else { 3011 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n", 3012 buffer); 3013 } 3014 } 3015 } 3016 varTop2 = ucol_getVariableTop(coll, &status); 3017 if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) { 3018 log_err("cannot retrieve set varTop value!\n"); 3019 continue; 3020 } 3021 3022 if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) { 3023 3024 u_strncpy(first, rulesCopy+oldChOffset, oldChLen); 3025 u_strncpy(first+oldChLen, rulesCopy+chOffset, chLen); 3026 u_strncpy(first+oldChLen+chLen, rulesCopy+oldChOffset, oldChLen); 3027 first[2*oldChLen+chLen] = 0; 3028 3029 if(oldExLen == 0) { 3030 u_strncpy(second, rulesCopy+chOffset, chLen); 3031 second[chLen] = 0; 3032 } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */ 3033 u_strncpy(second, rulesCopy+oldExOffset, oldExLen); 3034 u_strncpy(second+oldChLen, rulesCopy+chOffset, chLen); 3035 u_strncpy(second+oldChLen+chLen, rulesCopy+oldExOffset, oldExLen); 3036 second[2*oldExLen+chLen] = 0; 3037 } 3038 result = ucol_strcoll(coll, first, -1, second, -1); 3039 if(result == UCOL_EQUAL) { 3040 doTest(coll, first, second, UCOL_EQUAL); 3041 } else { 3042 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(rulesCopy+oldChOffset), *(rulesCopy+chOffset)); 3043 } 3044 } 3045 } 3046 if(strength != UCOL_TOK_RESET) { 3047 oldChOffset = chOffset; 3048 oldChLen = chLen; 3049 oldExOffset = exOffset; 3050 oldExLen = exLen; 3051 } 3052 } 3053 status = U_ZERO_ERROR; 3054 } 3055 else { 3056 log_err("Unexpected failure getting rules %s\n", u_errorName(status)); 3057 return; 3058 } 3059 if (U_FAILURE(status)) { 3060 log_err("Error parsing rules %s\n", u_errorName(status)); 3061 return; 3062 } 3063 status = U_ZERO_ERROR; 3064 } 3065 3066 QUICK = myQ; 3067 3068 log_verbose("Testing setting variable top to contractions\n"); 3069 { 3070 /* uint32_t tailoredCE = UCOL_NOT_FOUND; */ 3071 /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/ 3072 UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos); 3073 while(*conts != 0) { 3074 if((*(conts+2) == 0) || (*(conts+1)==0)) { /* contracts or pre-context contractions */ 3075 varTop1 = ucol_setVariableTop(coll, conts, -1, &status); 3076 } else { 3077 varTop1 = ucol_setVariableTop(coll, conts, 3, &status); 3078 } 3079 if(U_FAILURE(status)) { 3080 log_err("Couldn't set variable top to a contraction %04X %04X %04X\n", 3081 *conts, *(conts+1), *(conts+2)); 3082 status = U_ZERO_ERROR; 3083 } 3084 conts+=3; 3085 } 3086 3087 status = U_ZERO_ERROR; 3088 3089 first[0] = 0x0040; 3090 first[1] = 0x0050; 3091 first[2] = 0x0000; 3092 3093 ucol_setVariableTop(coll, first, -1, &status); 3094 3095 if(U_SUCCESS(status)) { 3096 log_err("Invalid contraction succeded in setting variable top!\n"); 3097 } 3098 3099 } 3100 3101 log_verbose("Test restoring variable top\n"); 3102 3103 status = U_ZERO_ERROR; 3104 ucol_restoreVariableTop(coll, varTopOriginal, &status); 3105 if(varTopOriginal != ucol_getVariableTop(coll, &status)) { 3106 log_err("Couldn't restore old variable top\n"); 3107 } 3108 3109 log_verbose("Testing calling with error set\n"); 3110 3111 status = U_INTERNAL_PROGRAM_ERROR; 3112 varTop1 = ucol_setVariableTop(coll, first, 1, &status); 3113 varTop2 = ucol_getVariableTop(coll, &status); 3114 ucol_restoreVariableTop(coll, varTop2, &status); 3115 varTop1 = ucol_setVariableTop(NULL, first, 1, &status); 3116 varTop2 = ucol_getVariableTop(NULL, &status); 3117 ucol_restoreVariableTop(NULL, varTop2, &status); 3118 if(status != U_INTERNAL_PROGRAM_ERROR) { 3119 log_err("Bad reaction to passed error!\n"); 3120 } 3121 free(rulesCopy); 3122 ucol_close(coll); 3123 } else { 3124 log_data_err("Couldn't open UCA collator\n"); 3125 } 3126 3127 } 3128 3129 static void TestNonChars(void) { 3130 static const char *test[] = { 3131 "\\u0000", 3132 "\\uFFFE", "\\uFFFF", 3133 "\\U0001FFFE", "\\U0001FFFF", 3134 "\\U0002FFFE", "\\U0002FFFF", 3135 "\\U0003FFFE", "\\U0003FFFF", 3136 "\\U0004FFFE", "\\U0004FFFF", 3137 "\\U0005FFFE", "\\U0005FFFF", 3138 "\\U0006FFFE", "\\U0006FFFF", 3139 "\\U0007FFFE", "\\U0007FFFF", 3140 "\\U0008FFFE", "\\U0008FFFF", 3141 "\\U0009FFFE", "\\U0009FFFF", 3142 "\\U000AFFFE", "\\U000AFFFF", 3143 "\\U000BFFFE", "\\U000BFFFF", 3144 "\\U000CFFFE", "\\U000CFFFF", 3145 "\\U000DFFFE", "\\U000DFFFF", 3146 "\\U000EFFFE", "\\U000EFFFF", 3147 "\\U000FFFFE", "\\U000FFFFF", 3148 "\\U0010FFFE", "\\U0010FFFF" 3149 }; 3150 UErrorCode status = U_ZERO_ERROR; 3151 UCollator *coll = ucol_open("en_US", &status); 3152 3153 log_verbose("Test non characters\n"); 3154 3155 if(U_SUCCESS(status)) { 3156 genericOrderingTestWithResult(coll, test, 35, UCOL_EQUAL); 3157 } else { 3158 log_err_status(status, "Unable to open collator\n"); 3159 } 3160 3161 ucol_close(coll); 3162 } 3163 3164 static void TestExtremeCompression(void) { 3165 static char *test[4]; 3166 int32_t j = 0, i = 0; 3167 3168 for(i = 0; i<4; i++) { 3169 test[i] = (char *)malloc(2048*sizeof(char)); 3170 } 3171 3172 for(j = 20; j < 500; j++) { 3173 for(i = 0; i<4; i++) { 3174 uprv_memset(test[i], 'a', (j-1)*sizeof(char)); 3175 test[i][j-1] = (char)('a'+i); 3176 test[i][j] = 0; 3177 } 3178 genericLocaleStarter("en_US", (const char **)test, 4); 3179 } 3180 3181 3182 for(i = 0; i<4; i++) { 3183 free(test[i]); 3184 } 3185 } 3186 3187 #if 0 3188 static void TestExtremeCompression(void) { 3189 static char *test[4]; 3190 int32_t j = 0, i = 0; 3191 UErrorCode status = U_ZERO_ERROR; 3192 UCollator *coll = ucol_open("en_US", status); 3193 for(i = 0; i<4; i++) { 3194 test[i] = (char *)malloc(2048*sizeof(char)); 3195 } 3196 for(j = 10; j < 2048; j++) { 3197 for(i = 0; i<4; i++) { 3198 uprv_memset(test[i], 'a', (j-2)*sizeof(char)); 3199 test[i][j-1] = (char)('a'+i); 3200 test[i][j] = 0; 3201 } 3202 } 3203 genericLocaleStarter("en_US", (const char **)test, 4); 3204 3205 for(j = 10; j < 2048; j++) { 3206 for(i = 0; i<1; i++) { 3207 uprv_memset(test[i], 'a', (j-1)*sizeof(char)); 3208 test[i][j] = 0; 3209 } 3210 } 3211 for(i = 0; i<4; i++) { 3212 free(test[i]); 3213 } 3214 } 3215 #endif 3216 3217 static void TestSurrogates(void) { 3218 static const char *test[] = { 3219 "z","\\ud900\\udc25", "\\ud805\\udc50", 3220 "\\ud800\\udc00y", "\\ud800\\udc00r", 3221 "\\ud800\\udc00f", "\\ud800\\udc00", 3222 "\\ud800\\udc00c", "\\ud800\\udc00b", 3223 "\\ud800\\udc00fa", "\\ud800\\udc00fb", 3224 "\\ud800\\udc00a", 3225 "c", "b" 3226 }; 3227 3228 static const char *rule = 3229 "&z < \\ud900\\udc25 < \\ud805\\udc50" 3230 "< \\ud800\\udc00y < \\ud800\\udc00r" 3231 "< \\ud800\\udc00f << \\ud800\\udc00" 3232 "< \\ud800\\udc00fa << \\ud800\\udc00fb" 3233 "< \\ud800\\udc00a < c < b" ; 3234 3235 genericRulesStarter(rule, test, 14); 3236 } 3237 3238 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */ 3239 static void TestPrefix(void) { 3240 uint32_t i; 3241 3242 static const struct { 3243 const char *rules; 3244 const char *data[50]; 3245 const uint32_t len; 3246 } tests[] = { 3247 { "&z <<< z|a", 3248 {"zz", "za"}, 2 }, 3249 3250 { "&z <<< z| a", 3251 {"zz", "za"}, 2 }, 3252 { "[strength I]" 3253 "&a=\\ud900\\udc25" 3254 "&z<<<\\ud900\\udc25|a", 3255 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 }, 3256 }; 3257 3258 3259 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3260 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 3261 } 3262 } 3263 3264 /* This test uses data suplied by Masashiko Maedera to test the implementation */ 3265 /* JIS X 4061 collation order implementation */ 3266 static void TestNewJapanese(void) { 3267 3268 static const char * const test1[] = { 3269 "\\u30b7\\u30e3\\u30fc\\u30ec", 3270 "\\u30b7\\u30e3\\u30a4", 3271 "\\u30b7\\u30e4\\u30a3", 3272 "\\u30b7\\u30e3\\u30ec", 3273 "\\u3061\\u3087\\u3053", 3274 "\\u3061\\u3088\\u3053", 3275 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8", 3276 "\\u3066\\u30fc\\u305f", 3277 "\\u30c6\\u30fc\\u30bf", 3278 "\\u30c6\\u30a7\\u30bf", 3279 "\\u3066\\u3048\\u305f", 3280 "\\u3067\\u30fc\\u305f", 3281 "\\u30c7\\u30fc\\u30bf", 3282 "\\u30c7\\u30a7\\u30bf", 3283 "\\u3067\\u3048\\u305f", 3284 "\\u3066\\u30fc\\u305f\\u30fc", 3285 "\\u30c6\\u30fc\\u30bf\\u30a1", 3286 "\\u30c6\\u30a7\\u30bf\\u30fc", 3287 "\\u3066\\u3047\\u305f\\u3041", 3288 "\\u3066\\u3048\\u305f\\u30fc", 3289 "\\u3067\\u30fc\\u305f\\u30fc", 3290 "\\u30c7\\u30fc\\u30bf\\u30a1", 3291 "\\u3067\\u30a7\\u305f\\u30a1", 3292 "\\u30c7\\u3047\\u30bf\\u3041", 3293 "\\u30c7\\u30a8\\u30bf\\u30a2", 3294 "\\u3072\\u3086", 3295 "\\u3073\\u3085\\u3042", 3296 "\\u3074\\u3085\\u3042", 3297 "\\u3073\\u3085\\u3042\\u30fc", 3298 "\\u30d3\\u30e5\\u30a2\\u30fc", 3299 "\\u3074\\u3085\\u3042\\u30fc", 3300 "\\u30d4\\u30e5\\u30a2\\u30fc", 3301 "\\u30d2\\u30e5\\u30a6", 3302 "\\u30d2\\u30e6\\u30a6", 3303 "\\u30d4\\u30e5\\u30a6\\u30a2", 3304 "\\u3073\\u3085\\u30fc\\u3042\\u30fc", 3305 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc", 3306 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc", 3307 "\\u3072\\u3085\\u3093", 3308 "\\u3074\\u3085\\u3093", 3309 "\\u3075\\u30fc\\u308a", 3310 "\\u30d5\\u30fc\\u30ea", 3311 "\\u3075\\u3045\\u308a", 3312 "\\u3075\\u30a5\\u308a", 3313 "\\u3075\\u30a5\\u30ea", 3314 "\\u30d5\\u30a6\\u30ea", 3315 "\\u3076\\u30fc\\u308a", 3316 "\\u30d6\\u30fc\\u30ea", 3317 "\\u3076\\u3045\\u308a", 3318 "\\u30d6\\u30a5\\u308a", 3319 "\\u3077\\u3046\\u308a", 3320 "\\u30d7\\u30a6\\u30ea", 3321 "\\u3075\\u30fc\\u308a\\u30fc", 3322 "\\u30d5\\u30a5\\u30ea\\u30fc", 3323 "\\u3075\\u30a5\\u308a\\u30a3", 3324 "\\u30d5\\u3045\\u308a\\u3043", 3325 "\\u30d5\\u30a6\\u30ea\\u30fc", 3326 "\\u3075\\u3046\\u308a\\u3043", 3327 "\\u30d6\\u30a6\\u30ea\\u30a4", 3328 "\\u3077\\u30fc\\u308a\\u30fc", 3329 "\\u3077\\u30a5\\u308a\\u30a4", 3330 "\\u3077\\u3046\\u308a\\u30fc", 3331 "\\u30d7\\u30a6\\u30ea\\u30a4", 3332 "\\u30d5\\u30fd", 3333 "\\u3075\\u309e", 3334 "\\u3076\\u309d", 3335 "\\u3076\\u3075", 3336 "\\u3076\\u30d5", 3337 "\\u30d6\\u3075", 3338 "\\u30d6\\u30d5", 3339 "\\u3076\\u309e", 3340 "\\u3076\\u3077", 3341 "\\u30d6\\u3077", 3342 "\\u3077\\u309d", 3343 "\\u30d7\\u30fd", 3344 "\\u3077\\u3075", 3345 }; 3346 3347 static const char *test2[] = { 3348 "\\u306f\\u309d", /* H\\u309d */ 3349 "\\u30cf\\u30fd", /* K\\u30fd */ 3350 "\\u306f\\u306f", /* HH */ 3351 "\\u306f\\u30cf", /* HK */ 3352 "\\u30cf\\u30cf", /* KK */ 3353 "\\u306f\\u309e", /* H\\u309e */ 3354 "\\u30cf\\u30fe", /* K\\u30fe */ 3355 "\\u306f\\u3070", /* HH\\u309b */ 3356 "\\u30cf\\u30d0", /* KK\\u309b */ 3357 "\\u306f\\u3071", /* HH\\u309c */ 3358 "\\u30cf\\u3071", /* KH\\u309c */ 3359 "\\u30cf\\u30d1", /* KK\\u309c */ 3360 "\\u3070\\u309d", /* H\\u309b\\u309d */ 3361 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */ 3362 "\\u3070\\u306f", /* H\\u309bH */ 3363 "\\u30d0\\u30cf", /* K\\u309bK */ 3364 "\\u3070\\u309e", /* H\\u309b\\u309e */ 3365 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */ 3366 "\\u3070\\u3070", /* H\\u309bH\\u309b */ 3367 "\\u30d0\\u3070", /* K\\u309bH\\u309b */ 3368 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */ 3369 "\\u3070\\u3071", /* H\\u309bH\\u309c */ 3370 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */ 3371 "\\u3071\\u309d", /* H\\u309c\\u309d */ 3372 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */ 3373 "\\u3071\\u306f", /* H\\u309cH */ 3374 "\\u30d1\\u30cf", /* K\\u309cK */ 3375 "\\u3071\\u3070", /* H\\u309cH\\u309b */ 3376 "\\u3071\\u30d0", /* H\\u309cK\\u309b */ 3377 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */ 3378 "\\u3071\\u3071", /* H\\u309cH\\u309c */ 3379 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */ 3380 }; 3381 /* 3382 static const char *test3[] = { 3383 "\\u221er\\u221e", 3384 "\\u221eR#", 3385 "\\u221et\\u221e", 3386 "#r\\u221e", 3387 "#R#", 3388 "#t%", 3389 "#T%", 3390 "8t\\u221e", 3391 "8T\\u221e", 3392 "8t#", 3393 "8T#", 3394 "8t%", 3395 "8T%", 3396 "8t8", 3397 "8T8", 3398 "\\u03c9r\\u221e", 3399 "\\u03a9R%", 3400 "rr\\u221e", 3401 "rR\\u221e", 3402 "Rr\\u221e", 3403 "RR\\u221e", 3404 "RT%", 3405 "rt8", 3406 "tr\\u221e", 3407 "tr8", 3408 "TR8", 3409 "tt8", 3410 "\\u30b7\\u30e3\\u30fc\\u30ec", 3411 }; 3412 */ 3413 static const UColAttribute att[] = { UCOL_STRENGTH }; 3414 static const UColAttributeValue val[] = { UCOL_QUATERNARY }; 3415 3416 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING}; 3417 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED }; 3418 3419 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1); 3420 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1); 3421 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/ 3422 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2); 3423 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2); 3424 } 3425 3426 static void TestStrCollIdenticalPrefix(void) { 3427 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71"; 3428 const char* test[] = { 3429 "ab\\ud9b0\\udc70", 3430 "ab\\ud9b0\\udc71" 3431 }; 3432 genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL); 3433 } 3434 /* Contractions should have all their canonically equivalent */ 3435 /* strings included */ 3436 static void TestContractionClosure(void) { 3437 static const struct { 3438 const char *rules; 3439 const char *data[10]; 3440 const uint32_t len; 3441 } tests[] = { 3442 { "&b=\\u00e4\\u00e4", 3443 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5}, 3444 { "&b=\\u00C5", 3445 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4}, 3446 }; 3447 uint32_t i; 3448 3449 3450 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3451 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL); 3452 } 3453 } 3454 3455 /* This tests also fails*/ 3456 static void TestBeforePrefixFailure(void) { 3457 static const struct { 3458 const char *rules; 3459 const char *data[10]; 3460 const uint32_t len; 3461 } tests[] = { 3462 { "&g <<< a" 3463 "&[before 3]\\uff41 <<< x", 3464 {"x", "\\uff41"}, 2 }, 3465 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 3466 "&\\u30A8=\\u30A8=\\u3048=\\uff74" 3467 "&[before 3]\\u30a7<<<\\u30a9", 3468 {"\\u30a9", "\\u30a7"}, 2 }, 3469 { "&[before 3]\\u30a7<<<\\u30a9" 3470 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 3471 "&\\u30A8=\\u30A8=\\u3048=\\uff74", 3472 {"\\u30a9", "\\u30a7"}, 2 }, 3473 }; 3474 uint32_t i; 3475 3476 3477 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3478 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 3479 } 3480 3481 #if 0 3482 const char* rule1 = 3483 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 3484 "&\\u30A8=\\u30A8=\\u3048=\\uff74" 3485 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"; 3486 const char* rule2 = 3487 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc" 3488 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 3489 "&\\u30A8=\\u30A8=\\u3048=\\uff74"; 3490 const char* test[] = { 3491 "\\u30c6\\u30fc\\u30bf", 3492 "\\u30c6\\u30a7\\u30bf", 3493 }; 3494 genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0])); 3495 genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0])); 3496 /* this piece of code should be in some sort of verbose mode */ 3497 /* it gets the collation elements for elements and prints them */ 3498 /* This is useful when trying to see whether the problem is */ 3499 { 3500 UErrorCode status = U_ZERO_ERROR; 3501 uint32_t i = 0; 3502 UCollationElements *it = NULL; 3503 uint32_t CE; 3504 UChar string[256]; 3505 uint32_t uStringLen; 3506 UCollator *coll = NULL; 3507 3508 uStringLen = u_unescape(rule1, string, 256); 3509 3510 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 3511 3512 /*coll = ucol_open("ja_JP_JIS", &status);*/ 3513 it = ucol_openElements(coll, string, 0, &status); 3514 3515 for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) { 3516 log_verbose("%s\n", test[i]); 3517 uStringLen = u_unescape(test[i], string, 256); 3518 ucol_setText(it, string, uStringLen, &status); 3519 3520 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) { 3521 log_verbose("%08X\n", CE); 3522 } 3523 log_verbose("\n"); 3524 3525 } 3526 3527 ucol_closeElements(it); 3528 ucol_close(coll); 3529 } 3530 #endif 3531 } 3532 3533 static void TestPrefixCompose(void) { 3534 const char* rule1 = 3535 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc"; 3536 /* 3537 const char* test[] = { 3538 "\\u30c6\\u30fc\\u30bf", 3539 "\\u30c6\\u30a7\\u30bf", 3540 }; 3541 */ 3542 { 3543 UErrorCode status = U_ZERO_ERROR; 3544 /*uint32_t i = 0;*/ 3545 /*UCollationElements *it = NULL;*/ 3546 /* uint32_t CE;*/ 3547 UChar string[256]; 3548 uint32_t uStringLen; 3549 UCollator *coll = NULL; 3550 3551 uStringLen = u_unescape(rule1, string, 256); 3552 3553 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 3554 ucol_close(coll); 3555 } 3556 3557 3558 } 3559 3560 /* 3561 [last variable] last variable value 3562 [last primary ignorable] largest CE for primary ignorable 3563 [last secondary ignorable] largest CE for secondary ignorable 3564 [last tertiary ignorable] largest CE for tertiary ignorable 3565 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8) 3566 */ 3567 3568 static void TestRuleOptions(void) { 3569 /* values here are hardcoded and are correct for the current UCA 3570 * when the UCA changes, one might be forced to change these 3571 * values. (\\u02d0, \\U00010FFFC etc...) 3572 */ 3573 static const struct { 3574 const char *rules; 3575 const char *data[10]; 3576 const uint32_t len; 3577 } tests[] = { 3578 /* - all befores here amount to zero */ 3579 { "&[before 3][first tertiary ignorable]<<<a", 3580 { "\\u0000", "a"}, 2 3581 }, /* you cannot go before first tertiary ignorable */ 3582 3583 { "&[before 3][last tertiary ignorable]<<<a", 3584 { "\\u0000", "a"}, 2 3585 }, /* you cannot go before last tertiary ignorable */ 3586 3587 { "&[before 3][first secondary ignorable]<<<a", 3588 { "\\u0000", "a"}, 2 3589 }, /* you cannot go before first secondary ignorable */ 3590 3591 { "&[before 3][last secondary ignorable]<<<a", 3592 { "\\u0000", "a"}, 2 3593 }, /* you cannot go before first secondary ignorable */ 3594 3595 /* 'normal' befores */ 3596 3597 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a", 3598 { "c", "b", "\\u0332", "a" }, 4 3599 }, 3600 3601 /* we don't have a code point that corresponds to 3602 * the last primary ignorable 3603 */ 3604 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a", 3605 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5 3606 }, 3607 3608 { "&[before 3][first variable]<<<c<<<b &[first variable]<a", 3609 { "c", "b", "\\u0009", "a", "\\u000a" }, 5 3610 }, 3611 3612 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ", 3613 { "c", "b", "\\uD834\\uDF71", "a", "\\u02d0" }, 5 3614 }, 3615 3616 { "&[first regular]<a" 3617 "&[before 1][first regular]<b", 3618 { "b", "\\u02d0", "a", "\\u02d1"}, 4 3619 }, 3620 3621 { "&[before 1][last regular]<b" 3622 "&[last regular]<a", 3623 { "b", "\\uD808\\uDF6E", "a", "\\u4e00" }, 4 3624 }, 3625 3626 { "&[before 1][first implicit]<b" 3627 "&[first implicit]<a", 3628 { "b", "\\u4e00", "a", "\\u4e01"}, 4 3629 }, 3630 3631 { "&[before 1][last implicit]<b" 3632 "&[last implicit]<a", 3633 { "b", "\\U0010FFFD", "a" }, 3 3634 }, 3635 3636 { "&[last variable]<z" 3637 "&[last primary ignorable]<x" 3638 "&[last secondary ignorable]<<y" 3639 "&[last tertiary ignorable]<<<w" 3640 "&[top]<u", 3641 {"\\ufffb", "w", "y", "\\u20e3", "x", "\\u137c", "z", "u"}, 7 3642 } 3643 3644 }; 3645 uint32_t i; 3646 3647 3648 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3649 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 3650 } 3651 } 3652 3653 3654 static void TestOptimize(void) { 3655 /* this is not really a test - just trying out 3656 * whether copying of UCA contents will fail 3657 * Cannot really test, since the functionality 3658 * remains the same. 3659 */ 3660 static const struct { 3661 const char *rules; 3662 const char *data[10]; 3663 const uint32_t len; 3664 } tests[] = { 3665 /* - all befores here amount to zero */ 3666 { "[optimize [\\uAC00-\\uD7FF]]", 3667 { "a", "b"}, 2} 3668 }; 3669 uint32_t i; 3670 3671 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3672 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 3673 } 3674 } 3675 3676 /* 3677 cycheng (at) ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator. 3678 weiv ucol_strcollIter? 3679 cycheng (at) ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021 3680 weiv these are the input strings? 3681 cycheng (at) ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2 3682 weiv will check - could be a problem with utf-8 iterator 3683 cycheng (at) ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2 3684 weiv hmmm 3685 cycheng (at) ca.ibm.c... note that we have a standalone high surrogate 3686 weiv that doesn't sound right 3687 cycheng (at) ca.ibm.c... we got the same inconsistent results on AIX and Win2000 3688 weiv so you have two strings, you convert them to utf-8 and to utf-16BE 3689 cycheng (at) ca.ibm.c... yes 3690 weiv and then do the comparison 3691 cycheng (at) ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be 3692 weiv utf-16 strings look like a little endian ones in the example you sent me 3693 weiv It could be a bug - let me try to test it out 3694 cycheng (at) ca.ibm.c... ok 3695 cycheng (at) ca.ibm.c... we can wait till the conf. call 3696 cycheng (at) ca.ibm.c... next weke 3697 weiv that would be great 3698 weiv hmmm 3699 weiv I might be wrong 3700 weiv let me play with it some more 3701 cycheng (at) ca.ibm.c... ok 3702 cycheng (at) ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be 3703 cycheng (at) ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2 3704 cycheng (at) ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be 3705 weiv ok 3706 cycheng (at) ca.ibm.c... i ask sherman to send you more inconsistent data 3707 weiv thanks 3708 cycheng (at) ca.ibm.c... the 4 strings we sent are just samples 3709 */ 3710 #if 0 3711 static void Alexis(void) { 3712 UErrorCode status = U_ZERO_ERROR; 3713 UCollator *coll = ucol_open("", &status); 3714 3715 3716 const char utf16be[2][4] = { 3717 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 }, 3718 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 } 3719 }; 3720 3721 const char utf8[2][4] = { 3722 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 }, 3723 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 }, 3724 }; 3725 3726 UCharIterator iterU161, iterU162; 3727 UCharIterator iterU81, iterU82; 3728 3729 UCollationResult resU16, resU8; 3730 3731 uiter_setUTF16BE(&iterU161, utf16be[0], 4); 3732 uiter_setUTF16BE(&iterU162, utf16be[1], 4); 3733 3734 uiter_setUTF8(&iterU81, utf8[0], 4); 3735 uiter_setUTF8(&iterU82, utf8[1], 4); 3736 3737 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 3738 3739 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status); 3740 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status); 3741 3742 3743 if(resU16 != resU8) { 3744 log_err("different results\n"); 3745 } 3746 3747 ucol_close(coll); 3748 } 3749 #endif 3750 3751 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256 3752 static void Alexis2(void) { 3753 UErrorCode status = U_ZERO_ERROR; 3754 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE]; 3755 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE]; 3756 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE]; 3757 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0; 3758 3759 UConverter *conv = NULL; 3760 3761 UCharIterator U16BEItS, U16BEItT; 3762 UCharIterator U8ItS, U8ItT; 3763 3764 UCollationResult resU16, resU16BE, resU8; 3765 3766 static const char* const pairs[][2] = { 3767 { "\\ud800\\u0021", "\\uFFFC\\u0062"}, 3768 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" }, 3769 { "\\u0E40\\u0021", "\\u00A1\\u0021"}, 3770 { "\\u0E40\\u0021", "\\uFE57\\u0062"}, 3771 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"}, 3772 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"}, 3773 { "\\u0020", "\\u0020\\u0000"} 3774 /* 3775 5F20 (my result here) 3776 5F204E008E3F 3777 5F20 (your result here) 3778 */ 3779 }; 3780 3781 int32_t i = 0; 3782 3783 UCollator *coll = ucol_open("", &status); 3784 if(status == U_FILE_ACCESS_ERROR) { 3785 log_data_err("Is your data around?\n"); 3786 return; 3787 } else if(U_FAILURE(status)) { 3788 log_err("Error opening collator\n"); 3789 return; 3790 } 3791 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 3792 conv = ucnv_open("UTF16BE", &status); 3793 for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) { 3794 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE); 3795 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE); 3796 3797 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT); 3798 3799 log_verbose("Result of strcoll is %i\n", resU16); 3800 3801 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status); 3802 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status); 3803 3804 /* use the original sizes, as the result from converter is in bytes */ 3805 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS); 3806 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT); 3807 3808 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status); 3809 3810 log_verbose("Result of U16BE is %i\n", resU16BE); 3811 3812 if(resU16 != resU16BE) { 3813 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]); 3814 } 3815 3816 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status); 3817 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status); 3818 3819 uiter_setUTF8(&U8ItS, U8Source, U8LenS); 3820 uiter_setUTF8(&U8ItT, U8Target, U8LenT); 3821 3822 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status); 3823 3824 if(resU16 != resU8) { 3825 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]); 3826 } 3827 3828 } 3829 3830 ucol_close(coll); 3831 ucnv_close(conv); 3832 } 3833 3834 static void TestHebrewUCA(void) { 3835 UErrorCode status = U_ZERO_ERROR; 3836 static const char *first[] = { 3837 "d790d6b8d79cd795d6bcd7a9", 3838 "d790d79cd79ed7a7d799d799d7a1", 3839 "d790d6b4d79ed795d6bcd7a9", 3840 }; 3841 3842 char utf8String[3][256]; 3843 UChar utf16String[3][256]; 3844 3845 int32_t i = 0, j = 0; 3846 int32_t sizeUTF8[3]; 3847 int32_t sizeUTF16[3]; 3848 3849 UCollator *coll = ucol_open("", &status); 3850 if (U_FAILURE(status)) { 3851 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status)); 3852 return; 3853 } 3854 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/ 3855 3856 for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) { 3857 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status); 3858 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status); 3859 log_verbose("%i: "); 3860 for(j = 0; j < sizeUTF16[i]; j++) { 3861 /*log_verbose("\\u%04X", utf16String[i][j]);*/ 3862 log_verbose("%04X", utf16String[i][j]); 3863 } 3864 log_verbose("\n"); 3865 } 3866 for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) { 3867 for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) { 3868 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS); 3869 } 3870 } 3871 3872 ucol_close(coll); 3873 3874 } 3875 3876 static void TestPartialSortKeyTermination(void) { 3877 static const char* cases[] = { 3878 "\\u1234\\u1234\\udc00", 3879 "\\udc00\\ud800\\ud800" 3880 }; 3881 3882 int32_t i = sizeof(UCollator); 3883 3884 UErrorCode status = U_ZERO_ERROR; 3885 3886 UCollator *coll = ucol_open("", &status); 3887 3888 UCharIterator iter; 3889 3890 UChar currCase[256]; 3891 int32_t length = 0; 3892 int32_t pKeyLen = 0; 3893 3894 uint8_t key[256]; 3895 3896 for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) { 3897 uint32_t state[2] = {0, 0}; 3898 length = u_unescape(cases[i], currCase, 256); 3899 uiter_setString(&iter, currCase, length); 3900 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status); 3901 3902 log_verbose("Done\n"); 3903 3904 } 3905 ucol_close(coll); 3906 } 3907 3908 static void TestSettings(void) { 3909 static const char* cases[] = { 3910 "apple", 3911 "Apple" 3912 }; 3913 3914 static const char* locales[] = { 3915 "", 3916 "en" 3917 }; 3918 3919 UErrorCode status = U_ZERO_ERROR; 3920 3921 int32_t i = 0, j = 0; 3922 3923 UChar source[256], target[256]; 3924 int32_t sLen = 0, tLen = 0; 3925 3926 UCollator *collateObject = NULL; 3927 for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) { 3928 collateObject = ucol_open(locales[i], &status); 3929 ucol_setStrength(collateObject, UCOL_PRIMARY); 3930 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status); 3931 for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) { 3932 sLen = u_unescape(cases[j-1], source, 256); 3933 source[sLen] = 0; 3934 tLen = u_unescape(cases[j], target, 256); 3935 source[tLen] = 0; 3936 doTest(collateObject, source, target, UCOL_EQUAL); 3937 } 3938 ucol_close(collateObject); 3939 } 3940 } 3941 3942 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) { 3943 UErrorCode status = U_ZERO_ERROR; 3944 int32_t errorNo = 0; 3945 /*const UChar *sourceRules = NULL;*/ 3946 /*int32_t sourceRulesLen = 0;*/ 3947 UColAttributeValue french = UCOL_OFF; 3948 int32_t cloneSize = 0; 3949 3950 if(!ucol_equals(source, target)) { 3951 log_err("Same collators, different address not equal\n"); 3952 errorNo++; 3953 } 3954 ucol_close(target); 3955 if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) { 3956 /* currently, safeClone is implemented through getRules/openRules 3957 * so it is the same as the test below - I will comment that test out. 3958 */ 3959 /* real thing */ 3960 target = ucol_safeClone(source, NULL, &cloneSize, &status); 3961 if(U_FAILURE(status)) { 3962 log_err("Error creating clone\n"); 3963 errorNo++; 3964 return errorNo; 3965 } 3966 if(!ucol_equals(source, target)) { 3967 log_err("Collator different from it's clone\n"); 3968 errorNo++; 3969 } 3970 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status); 3971 if(french == UCOL_ON) { 3972 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status); 3973 } else { 3974 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status); 3975 } 3976 if(U_FAILURE(status)) { 3977 log_err("Error setting attributes\n"); 3978 errorNo++; 3979 return errorNo; 3980 } 3981 if(ucol_equals(source, target)) { 3982 log_err("Collators same even when options changed\n"); 3983 errorNo++; 3984 } 3985 ucol_close(target); 3986 /* commented out since safeClone uses exactly the same technique */ 3987 /* 3988 sourceRules = ucol_getRules(source, &sourceRulesLen); 3989 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 3990 if(U_FAILURE(status)) { 3991 log_err("Error instantiating target from rules\n"); 3992 errorNo++; 3993 return errorNo; 3994 } 3995 if(!ucol_equals(source, target)) { 3996 log_err("Collator different from collator that was created from the same rules\n"); 3997 errorNo++; 3998 } 3999 ucol_close(target); 4000 */ 4001 } 4002 return errorNo; 4003 } 4004 4005 4006 static void TestEquals(void) { 4007 /* ucol_equals is not currently a public API. There is a chance that it will become 4008 * something like this, but currently it is only used by RuleBasedCollator::operator== 4009 */ 4010 /* test whether the two collators instantiated from the same locale are equal */ 4011 UErrorCode status = U_ZERO_ERROR; 4012 UParseError parseError; 4013 int32_t noOfLoc = uloc_countAvailable(); 4014 const char *locName = NULL; 4015 UCollator *source = NULL, *target = NULL; 4016 int32_t i = 0; 4017 4018 const char* rules[] = { 4019 "&l < lj <<< Lj <<< LJ", 4020 "&n < nj <<< Nj <<< NJ", 4021 "&ae <<< \\u00e4", 4022 "&AE <<< \\u00c4" 4023 }; 4024 /* 4025 const char* badRules[] = { 4026 "&l <<< Lj", 4027 "&n < nj <<< nJ <<< NJ", 4028 "&a <<< \\u00e4", 4029 "&AE <<< \\u00c4 <<< x" 4030 }; 4031 */ 4032 4033 UChar sourceRules[1024], targetRules[1024]; 4034 int32_t sourceRulesSize = 0, targetRulesSize = 0; 4035 int32_t rulesSize = sizeof(rules)/sizeof(rules[0]); 4036 4037 for(i = 0; i < rulesSize; i++) { 4038 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize); 4039 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize); 4040 } 4041 4042 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 4043 if(status == U_FILE_ACCESS_ERROR) { 4044 log_data_err("Is your data around?\n"); 4045 return; 4046 } else if(U_FAILURE(status)) { 4047 log_err("Error opening collator\n"); 4048 return; 4049 } 4050 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 4051 if(!ucol_equals(source, target)) { 4052 log_err("Equivalent collators not equal!\n"); 4053 } 4054 ucol_close(source); 4055 ucol_close(target); 4056 4057 source = ucol_open("root", &status); 4058 target = ucol_open("root", &status); 4059 log_verbose("Testing root\n"); 4060 if(!ucol_equals(source, source)) { 4061 log_err("Same collator not equal\n"); 4062 } 4063 if(TestEqualsForCollator(locName, source, target)) { 4064 log_err("Errors for root\n", locName); 4065 } 4066 ucol_close(source); 4067 4068 for(i = 0; i<noOfLoc; i++) { 4069 status = U_ZERO_ERROR; 4070 locName = uloc_getAvailable(i); 4071 /*if(hasCollationElements(locName)) {*/ 4072 log_verbose("Testing equality for locale %s\n", locName); 4073 source = ucol_open(locName, &status); 4074 target = ucol_open(locName, &status); 4075 if (U_FAILURE(status)) { 4076 log_err("Error opening collator for locale %s %s\n", locName, u_errorName(status)); 4077 continue; 4078 } 4079 if(TestEqualsForCollator(locName, source, target)) { 4080 log_err("Errors for locale %s\n", locName); 4081 } 4082 ucol_close(source); 4083 /*}*/ 4084 } 4085 } 4086 4087 static void TestJ2726(void) { 4088 UChar a[2] = { 0x61, 0x00 }; /*"a"*/ 4089 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/ 4090 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/ 4091 UErrorCode status = U_ZERO_ERROR; 4092 UCollator *coll = ucol_open("en", &status); 4093 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 4094 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); 4095 doTest(coll, a, aSpace, UCOL_EQUAL); 4096 doTest(coll, aSpace, a, UCOL_EQUAL); 4097 doTest(coll, a, spaceA, UCOL_EQUAL); 4098 doTest(coll, spaceA, a, UCOL_EQUAL); 4099 doTest(coll, spaceA, aSpace, UCOL_EQUAL); 4100 doTest(coll, aSpace, spaceA, UCOL_EQUAL); 4101 ucol_close(coll); 4102 } 4103 4104 static void NullRule(void) { 4105 UChar r[3] = {0}; 4106 UErrorCode status = U_ZERO_ERROR; 4107 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 4108 if(U_SUCCESS(status)) { 4109 log_err("This should have been an error!\n"); 4110 ucol_close(coll); 4111 } else { 4112 status = U_ZERO_ERROR; 4113 } 4114 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 4115 if(U_FAILURE(status)) { 4116 log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status)); 4117 } else { 4118 ucol_close(coll); 4119 } 4120 } 4121 4122 /** 4123 * Test for CollationElementIterator previous and next for the whole set of 4124 * unicode characters with normalization on. 4125 */ 4126 static void TestNumericCollation(void) 4127 { 4128 UErrorCode status = U_ZERO_ERROR; 4129 4130 const static char *basicTestStrings[]={ 4131 "hello1", 4132 "hello2", 4133 "hello2002", 4134 "hello2003", 4135 "hello123456", 4136 "hello1234567", 4137 "hello10000000", 4138 "hello100000000", 4139 "hello1000000000", 4140 "hello10000000000", 4141 }; 4142 4143 const static char *preZeroTestStrings[]={ 4144 "avery10000", 4145 "avery010000", 4146 "avery0010000", 4147 "avery00010000", 4148 "avery000010000", 4149 "avery0000010000", 4150 "avery00000010000", 4151 "avery000000010000", 4152 }; 4153 4154 const static char *thirtyTwoBitNumericStrings[]={ 4155 "avery42949672960", 4156 "avery42949672961", 4157 "avery42949672962", 4158 "avery429496729610" 4159 }; 4160 4161 const static char *longNumericStrings[]={ 4162 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings. 4163 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that 4164 are treated as multiple collation elements. */ 4165 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */ 4166 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */ 4167 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */ 4168 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */ 4169 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */ 4170 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */ 4171 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */ 4172 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */ 4173 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */ 4174 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */ 4175 }; 4176 4177 const static char *supplementaryDigits[] = { 4178 "\\uD835\\uDFCE", /* 0 */ 4179 "\\uD835\\uDFCF", /* 1 */ 4180 "\\uD835\\uDFD0", /* 2 */ 4181 "\\uD835\\uDFD1", /* 3 */ 4182 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */ 4183 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */ 4184 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */ 4185 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */ 4186 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */ 4187 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */ 4188 }; 4189 4190 const static char *foreignDigits[] = { 4191 "\\u0661", 4192 "\\u0662", 4193 "\\u0663", 4194 "\\u0661\\u0660", 4195 "\\u0661\\u0662", 4196 "\\u0661\\u0663", 4197 "\\u0662\\u0660", 4198 "\\u0662\\u0662", 4199 "\\u0662\\u0663", 4200 "\\u0663\\u0660", 4201 "\\u0663\\u0662", 4202 "\\u0663\\u0663" 4203 }; 4204 4205 const static char *evenZeroes[] = { 4206 "2000", 4207 "2001", 4208 "2002", 4209 "2003" 4210 }; 4211 4212 UColAttribute att = UCOL_NUMERIC_COLLATION; 4213 UColAttributeValue val = UCOL_ON; 4214 4215 /* Open our collator. */ 4216 UCollator* coll = ucol_open("root", &status); 4217 if (U_FAILURE(status)){ 4218 log_err_status(status, "ERROR: in using ucol_open() -> %s\n", 4219 myErrorName(status)); 4220 return; 4221 } 4222 genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1); 4223 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1); 4224 genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1); 4225 genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1); 4226 genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1); 4227 genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1); 4228 4229 /* Setting up our collator to do digits. */ 4230 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); 4231 if (U_FAILURE(status)){ 4232 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n", 4233 myErrorName(status)); 4234 return; 4235 } 4236 4237 /* 4238 Testing that prepended zeroes still yield the correct collation behavior. 4239 We expect that every element in our strings array will be equal. 4240 */ 4241 genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL); 4242 4243 ucol_close(coll); 4244 } 4245 4246 static void TestTibetanConformance(void) 4247 { 4248 const char* test[] = { 4249 "\\u0FB2\\u0591\\u0F71\\u0061", 4250 "\\u0FB2\\u0F71\\u0061" 4251 }; 4252 4253 UErrorCode status = U_ZERO_ERROR; 4254 UCollator *coll = ucol_open("", &status); 4255 UChar source[100]; 4256 UChar target[100]; 4257 int result; 4258 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 4259 if (U_SUCCESS(status)) { 4260 u_unescape(test[0], source, 100); 4261 u_unescape(test[1], target, 100); 4262 doTest(coll, source, target, UCOL_EQUAL); 4263 result = ucol_strcoll(coll, source, -1, target, -1); 4264 log_verbose("result %d\n", result); 4265 if (UCOL_EQUAL != result) { 4266 log_err("Tibetan comparison error\n"); 4267 } 4268 } 4269 ucol_close(coll); 4270 4271 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL); 4272 } 4273 4274 static void TestPinyinProblem(void) { 4275 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" }; 4276 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0])); 4277 } 4278 4279 #define TST_UCOL_MAX_INPUT 0x220001 4280 #define topByte 0xFF000000; 4281 #define bottomByte 0xFF; 4282 #define fourBytes 0xFFFFFFFF; 4283 4284 4285 static void showImplicit(UChar32 i) { 4286 if (i >= 0 && i <= TST_UCOL_MAX_INPUT) { 4287 log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i)); 4288 } 4289 } 4290 4291 static void TestImplicitGeneration(void) { 4292 UErrorCode status = U_ZERO_ERROR; 4293 UChar32 last = 0; 4294 UChar32 current; 4295 UChar32 i = 0, j = 0; 4296 UChar32 roundtrip = 0; 4297 UChar32 lastBottom = 0; 4298 UChar32 currentBottom = 0; 4299 UChar32 lastTop = 0; 4300 UChar32 currentTop = 0; 4301 4302 UCollator *coll = ucol_open("root", &status); 4303 if(U_FAILURE(status)) { 4304 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); 4305 return; 4306 } 4307 4308 uprv_uca_getRawFromImplicit(0xE20303E7); 4309 4310 for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) { 4311 current = uprv_uca_getImplicitFromRaw(i) & fourBytes; 4312 4313 /* check that it round-trips AND that all intervening ones are illegal*/ 4314 roundtrip = uprv_uca_getRawFromImplicit(current); 4315 if (roundtrip != i) { 4316 log_err("No roundtrip %08X\n", i); 4317 } 4318 if (last != 0) { 4319 for (j = last + 1; j < current; ++j) { 4320 roundtrip = uprv_uca_getRawFromImplicit(j); 4321 /* raise an error if it *doesn't* find an error*/ 4322 if (roundtrip != -1) { 4323 log_err("Fails to recognize illegal %08X\n", j); 4324 } 4325 } 4326 } 4327 /* now do other consistency checks*/ 4328 lastBottom = last & bottomByte; 4329 currentBottom = current & bottomByte; 4330 lastTop = last & topByte; 4331 currentTop = current & topByte; 4332 4333 /* print out some values for spot-checking*/ 4334 if (lastTop != currentTop || i == 0x10000 || i == 0x110000) { 4335 showImplicit(i-3); 4336 showImplicit(i-2); 4337 showImplicit(i-1); 4338 showImplicit(i); 4339 showImplicit(i+1); 4340 showImplicit(i+2); 4341 } 4342 last = current; 4343 4344 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) { 4345 log_err("No raw <-> code point roundtrip for 0x%08X\n", i); 4346 } 4347 } 4348 showImplicit(TST_UCOL_MAX_INPUT-2); 4349 showImplicit(TST_UCOL_MAX_INPUT-1); 4350 showImplicit(TST_UCOL_MAX_INPUT); 4351 ucol_close(coll); 4352 } 4353 4354 /** 4355 * Iterate through the given iterator, checking to see that all the strings 4356 * in the expected array are present. 4357 * @param expected array of strings we expect to see, or NULL 4358 * @param expectedCount number of elements of expected, or 0 4359 */ 4360 static int32_t checkUEnumeration(const char* msg, 4361 UEnumeration* iter, 4362 const char** expected, 4363 int32_t expectedCount) { 4364 UErrorCode ec = U_ZERO_ERROR; 4365 int32_t i = 0, n, j, bit; 4366 int32_t seenMask = 0; 4367 4368 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */ 4369 n = uenum_count(iter, &ec); 4370 if (!assertSuccess("count", &ec)) return -1; 4371 log_verbose("%s = [", msg); 4372 for (;; ++i) { 4373 const char* s = uenum_next(iter, NULL, &ec); 4374 if (!assertSuccess("snext", &ec) || s == NULL) break; 4375 if (i != 0) log_verbose(","); 4376 log_verbose("%s", s); 4377 /* check expected list */ 4378 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) { 4379 if ((seenMask&bit) == 0 && 4380 uprv_strcmp(s, expected[j]) == 0) { 4381 seenMask |= bit; 4382 break; 4383 } 4384 } 4385 } 4386 log_verbose("] (%d)\n", i); 4387 assertTrue("count verified", i==n); 4388 /* did we see all expected strings? */ 4389 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) { 4390 if ((seenMask&bit)!=0) { 4391 log_verbose("Ok: \"%s\" seen\n", expected[j]); 4392 } else { 4393 log_err("FAIL: \"%s\" not seen\n", expected[j]); 4394 } 4395 } 4396 return n; 4397 } 4398 4399 /** 4400 * Test new API added for separate collation tree. 4401 */ 4402 static void TestSeparateTrees(void) { 4403 UErrorCode ec = U_ZERO_ERROR; 4404 UEnumeration *e = NULL; 4405 int32_t n = -1; 4406 UBool isAvailable; 4407 char loc[256]; 4408 4409 static const char* AVAIL[] = { "en", "de" }; 4410 4411 static const char* KW[] = { "collation" }; 4412 4413 static const char* KWVAL[] = { "phonebook", "stroke" }; 4414 4415 #if !UCONFIG_NO_SERVICE 4416 e = ucol_openAvailableLocales(&ec); 4417 if (e != NULL) { 4418 assertSuccess("ucol_openAvailableLocales", &ec); 4419 assertTrue("ucol_openAvailableLocales!=0", e!=0); 4420 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL)); 4421 /* Don't need to check n because we check list */ 4422 uenum_close(e); 4423 } else { 4424 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec)); 4425 } 4426 #endif 4427 4428 e = ucol_getKeywords(&ec); 4429 if (e != NULL) { 4430 assertSuccess("ucol_getKeywords", &ec); 4431 assertTrue("ucol_getKeywords!=0", e!=0); 4432 n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW)); 4433 /* Don't need to check n because we check list */ 4434 uenum_close(e); 4435 } else { 4436 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec)); 4437 } 4438 4439 e = ucol_getKeywordValues(KW[0], &ec); 4440 if (e != NULL) { 4441 assertSuccess("ucol_getKeywordValues", &ec); 4442 assertTrue("ucol_getKeywordValues!=0", e!=0); 4443 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL)); 4444 /* Don't need to check n because we check list */ 4445 uenum_close(e); 4446 } else { 4447 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec)); 4448 } 4449 4450 /* Try setting a warning before calling ucol_getKeywordValues */ 4451 ec = U_USING_FALLBACK_WARNING; 4452 e = ucol_getKeywordValues(KW[0], &ec); 4453 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) { 4454 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0); 4455 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL)); 4456 /* Don't need to check n because we check list */ 4457 uenum_close(e); 4458 } 4459 4460 /* 4461 U_DRAFT int32_t U_EXPORT2 4462 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, 4463 const char* locale, UBool* isAvailable, 4464 UErrorCode* status); 4465 } 4466 */ 4467 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "fr", 4468 &isAvailable, &ec); 4469 if (assertSuccess("getFunctionalEquivalent", &ec)) { 4470 assertEquals("getFunctionalEquivalent(fr)", "fr", loc); 4471 assertTrue("getFunctionalEquivalent(fr).isAvailable==TRUE", 4472 isAvailable == TRUE); 4473 } 4474 4475 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "fr_FR", 4476 &isAvailable, &ec); 4477 if (assertSuccess("getFunctionalEquivalent", &ec)) { 4478 assertEquals("getFunctionalEquivalent(fr_FR)", "fr", loc); 4479 assertTrue("getFunctionalEquivalent(fr_FR).isAvailable==TRUE", 4480 isAvailable == TRUE); 4481 } 4482 } 4483 4484 /* supercedes TestJ784 */ 4485 static void TestBeforePinyin(void) { 4486 const static char rules[] = { 4487 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0" 4488 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8" 4489 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC" 4490 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2" 4491 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9" 4492 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC" 4493 }; 4494 4495 const static char *test[] = { 4496 "l\\u0101", 4497 "la", 4498 "l\\u0101n", 4499 "lan ", 4500 "l\\u0113", 4501 "le", 4502 "l\\u0113n", 4503 "len" 4504 }; 4505 4506 const static char *test2[] = { 4507 "x\\u0101", 4508 "x\\u0100", 4509 "X\\u0101", 4510 "X\\u0100", 4511 "x\\u00E1", 4512 "x\\u00C1", 4513 "X\\u00E1", 4514 "X\\u00C1", 4515 "x\\u01CE", 4516 "x\\u01CD", 4517 "X\\u01CE", 4518 "X\\u01CD", 4519 "x\\u00E0", 4520 "x\\u00C0", 4521 "X\\u00E0", 4522 "X\\u00C0", 4523 "xa", 4524 "xA", 4525 "Xa", 4526 "XA", 4527 "x\\u0101x", 4528 "x\\u0100x", 4529 "x\\u00E1x", 4530 "x\\u00C1x", 4531 "x\\u01CEx", 4532 "x\\u01CDx", 4533 "x\\u00E0x", 4534 "x\\u00C0x", 4535 "xax", 4536 "xAx" 4537 }; 4538 4539 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0])); 4540 genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0])); 4541 genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0])); 4542 genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0])); 4543 } 4544 4545 static void TestBeforeTightening(void) { 4546 static const struct { 4547 const char *rules; 4548 UErrorCode expectedStatus; 4549 } tests[] = { 4550 { "&[before 1]a<x", U_ZERO_ERROR }, 4551 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR }, 4552 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR }, 4553 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR }, 4554 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR }, 4555 { "&[before 2]a<<x",U_ZERO_ERROR }, 4556 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR }, 4557 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR }, 4558 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR }, 4559 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR }, 4560 { "&[before 3]a<<<x",U_ZERO_ERROR }, 4561 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR }, 4562 { "&[before I]a = x",U_INVALID_FORMAT_ERROR } 4563 }; 4564 4565 int32_t i = 0; 4566 4567 UErrorCode status = U_ZERO_ERROR; 4568 UChar rlz[RULE_BUFFER_LEN] = { 0 }; 4569 uint32_t rlen = 0; 4570 4571 UCollator *coll = NULL; 4572 4573 4574 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { 4575 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN); 4576 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); 4577 if(status != tests[i].expectedStatus) { 4578 log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n", 4579 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus)); 4580 } 4581 ucol_close(coll); 4582 status = U_ZERO_ERROR; 4583 } 4584 4585 } 4586 4587 #if 0 4588 &m < a 4589 &[before 1] a < x <<< X << q <<< Q < z 4590 assert: m <<< M < x <<< X << q <<< Q < z < a < n 4591 4592 &m < a 4593 &[before 2] a << x <<< X << q <<< Q < z 4594 assert: m <<< M < x <<< X << q <<< Q << a < z < n 4595 4596 &m < a 4597 &[before 3] a <<< x <<< X << q <<< Q < z 4598 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n 4599 4600 4601 &m << a 4602 &[before 1] a < x <<< X << q <<< Q < z 4603 assert: x <<< X << q <<< Q < z < m <<< M << a < n 4604 4605 &m << a 4606 &[before 2] a << x <<< X << q <<< Q < z 4607 assert: m <<< M << x <<< X << q <<< Q << a < z < n 4608 4609 &m << a 4610 &[before 3] a <<< x <<< X << q <<< Q < z 4611 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n 4612 4613 4614 &m <<< a 4615 &[before 1] a < x <<< X << q <<< Q < z 4616 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M 4617 4618 &m <<< a 4619 &[before 2] a << x <<< X << q <<< Q < z 4620 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n 4621 4622 &m <<< a 4623 &[before 3] a <<< x <<< X << q <<< Q < z 4624 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n 4625 4626 4627 &[before 1] s < x <<< X << q <<< Q < z 4628 assert: r <<< R < x <<< X << q <<< Q < z < s < n 4629 4630 &[before 2] s << x <<< X << q <<< Q < z 4631 assert: r <<< R < x <<< X << q <<< Q << s < z < n 4632 4633 &[before 3] s <<< x <<< X << q <<< Q < z 4634 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n 4635 4636 4637 &[before 1] \u24DC < x <<< X << q <<< Q < z 4638 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M 4639 4640 &[before 2] \u24DC << x <<< X << q <<< Q < z 4641 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n 4642 4643 &[before 3] \u24DC <<< x <<< X << q <<< Q < z 4644 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n 4645 #endif 4646 4647 4648 #if 0 4649 /* requires features not yet supported */ 4650 static void TestMoreBefore(void) { 4651 static const struct { 4652 const char* rules; 4653 const char* order[16]; 4654 int32_t size; 4655 } tests[] = { 4656 { "&m < a &[before 1] a < x <<< X << q <<< Q < z", 4657 { "m","M","x","X","q","Q","z","a","n" }, 9}, 4658 { "&m < a &[before 2] a << x <<< X << q <<< Q < z", 4659 { "m","M","x","X","q","Q","a","z","n" }, 9}, 4660 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z", 4661 { "m","M","x","X","a","q","Q","z","n" }, 9}, 4662 { "&m << a &[before 1] a < x <<< X << q <<< Q < z", 4663 { "x","X","q","Q","z","m","M","a","n" }, 9}, 4664 { "&m << a &[before 2] a << x <<< X << q <<< Q < z", 4665 { "m","M","x","X","q","Q","a","z","n" }, 9}, 4666 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z", 4667 { "m","M","x","X","a","q","Q","z","n" }, 9}, 4668 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z", 4669 { "x","X","q","Q","z","n","m","a","M" }, 9}, 4670 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z", 4671 { "x","X","q","Q","m","a","M","z","n" }, 9}, 4672 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z", 4673 { "m","x","X","a","M","q","Q","z","n" }, 9}, 4674 { "&[before 1] s < x <<< X << q <<< Q < z", 4675 { "r","R","x","X","q","Q","z","s","n" }, 9}, 4676 { "&[before 2] s << x <<< X << q <<< Q < z", 4677 { "r","R","x","X","q","Q","s","z","n" }, 9}, 4678 { "&[before 3] s <<< x <<< X << q <<< Q < z", 4679 { "r","R","x","X","s","q","Q","z","n" }, 9}, 4680 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z", 4681 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9}, 4682 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z", 4683 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9}, 4684 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z", 4685 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9} 4686 }; 4687 4688 int32_t i = 0; 4689 4690 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { 4691 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size); 4692 } 4693 } 4694 #endif 4695 4696 static void TestTailorNULL( void ) { 4697 const static char* rule = "&a <<< '\\u0000'"; 4698 UErrorCode status = U_ZERO_ERROR; 4699 UChar rlz[RULE_BUFFER_LEN] = { 0 }; 4700 uint32_t rlen = 0; 4701 UChar a = 1, null = 0; 4702 UCollationResult res = UCOL_EQUAL; 4703 4704 UCollator *coll = NULL; 4705 4706 4707 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN); 4708 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); 4709 4710 if(U_FAILURE(status)) { 4711 log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status)); 4712 } else { 4713 res = ucol_strcoll(coll, &a, 1, &null, 1); 4714 4715 if(res != UCOL_LESS) { 4716 log_err("NULL was not tailored properly!\n"); 4717 } 4718 } 4719 4720 ucol_close(coll); 4721 } 4722 4723 static void 4724 TestThaiSortKey(void) 4725 { 4726 UChar yamakan = 0x0E4E; 4727 UErrorCode status = U_ZERO_ERROR; 4728 uint8_t key[256]; 4729 int32_t keyLen = 0; 4730 /* NOTE: there is a Thai tailoring that moves Yammakan. It should not move it, */ 4731 /* since it stays in the same relative position. This should be addressed in CLDR */ 4732 /* UCA 4.0 uint8_t expectedKey[256] = { 0x01, 0xd9, 0xb2, 0x01, 0x05, 0x00 }; */ 4733 /* UCA 4.1 uint8_t expectedKey[256] = { 0x01, 0xdb, 0x3a, 0x01, 0x05, 0x00 }; */ 4734 /* UCA 5.0 uint8_t expectedKey[256] = { 0x01, 0xdc, 0xce, 0x01, 0x05, 0x00 }; */ 4735 /* UCA 5.1 moves Yammakan */ 4736 uint8_t expectedKey[256] = { 0x01, 0xe0, 0x4e, 0x01, 0x05, 0x00 }; 4737 UCollator *coll = ucol_open("th", &status); 4738 if(U_FAILURE(status)) { 4739 log_err_status(status, "Could not open a collator, exiting (%s)\n", u_errorName(status)); 4740 return; 4741 } 4742 4743 keyLen = ucol_getSortKey(coll, &yamakan, 1, key, 256); 4744 if(strcmp((char *)key, (char *)expectedKey)) { 4745 log_err("Yammakan key is different from ICU 4.0!\n"); 4746 } 4747 4748 ucol_close(coll); 4749 } 4750 4751 static void 4752 TestUpperFirstQuaternary(void) 4753 { 4754 const char* tests[] = { "B", "b", "Bb", "bB" }; 4755 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST }; 4756 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST }; 4757 genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0])); 4758 } 4759 4760 static void 4761 TestJ4960(void) 4762 { 4763 const char* tests[] = { "\\u00e2T", "aT" }; 4764 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL }; 4765 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON }; 4766 const char* tests2[] = { "a", "A" }; 4767 const char* rule = "&[first tertiary ignorable]=A=a"; 4768 UColAttribute att2[] = { UCOL_CASE_LEVEL }; 4769 UColAttributeValue attVals2[] = { UCOL_ON }; 4770 /* Test whether we correctly ignore primary ignorables on case level when */ 4771 /* we have only primary & case level */ 4772 genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL); 4773 /* Test whether ICU4J will make case level for sortkeys that have primary strength */ 4774 /* and case level */ 4775 genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0])); 4776 /* Test whether completely ignorable letters have case level info (they shouldn't) */ 4777 genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL); 4778 } 4779 4780 static void 4781 TestJ5223(void) 4782 { 4783 static const char *test = "this is a test string"; 4784 UChar ustr[256]; 4785 int32_t ustr_length = u_unescape(test, ustr, 256); 4786 unsigned char sortkey[256]; 4787 int32_t sortkey_length; 4788 UErrorCode status = U_ZERO_ERROR; 4789 static UCollator *coll = NULL; 4790 coll = ucol_open("root", &status); 4791 if(U_FAILURE(status)) { 4792 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); 4793 return; 4794 } 4795 ucol_setStrength(coll, UCOL_PRIMARY); 4796 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); 4797 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 4798 if (U_FAILURE(status)) { 4799 log_err("Failed setting atributes\n"); 4800 return; 4801 } 4802 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0); 4803 if (sortkey_length > 256) return; 4804 4805 /* we mark the position where the null byte should be written in advance */ 4806 sortkey[sortkey_length-1] = 0xAA; 4807 4808 /* we set the buffer size one byte higher than needed */ 4809 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey, 4810 sortkey_length+1); 4811 4812 /* no error occurs (for me) */ 4813 if (sortkey[sortkey_length-1] == 0xAA) { 4814 log_err("Hit bug at first try\n"); 4815 } 4816 4817 /* we mark the position where the null byte should be written again */ 4818 sortkey[sortkey_length-1] = 0xAA; 4819 4820 /* this time we set the buffer size to the exact amount needed */ 4821 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey, 4822 sortkey_length); 4823 4824 /* now the trailing null byte is not written */ 4825 if (sortkey[sortkey_length-1] == 0xAA) { 4826 log_err("Hit bug at second try\n"); 4827 } 4828 4829 ucol_close(coll); 4830 } 4831 4832 /* Regression test for Thai partial sort key problem */ 4833 static void 4834 TestJ5232(void) 4835 { 4836 const static char *test[] = { 4837 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21", 4838 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21" 4839 }; 4840 4841 genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0])); 4842 } 4843 4844 static void 4845 TestJ5367(void) 4846 { 4847 const static char *test[] = { "a", "y" }; 4848 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a"; 4849 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0])); 4850 } 4851 4852 static void 4853 TestVI5913(void) 4854 { 4855 UErrorCode status = U_ZERO_ERROR; 4856 int32_t i, j; 4857 UCollator *coll =NULL; 4858 uint8_t resColl[100], expColl[100]; 4859 int32_t rLen, tLen, ruleLen, sLen, kLen; 4860 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypogegrammeni*/ 4861 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/ 4862 UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circumflex.*/ 4863 static const UChar tData[][20]={ 4864 {0x1EAC, 0}, 4865 {0x0041, 0x0323, 0x0302, 0}, 4866 {0x1EA0, 0x0302, 0}, 4867 {0x00C2, 0x0323, 0}, 4868 {0x1ED8, 0}, /* O with dot and circumflex */ 4869 {0x1ECC, 0x0302, 0}, 4870 {0x1EB7, 0}, 4871 {0x1EA1, 0x0306, 0}, 4872 }; 4873 static const UChar tailorData[][20]={ 4874 {0x1FA2, 0}, /* Omega with 3 combining marks */ 4875 {0x03C9, 0x0313, 0x0300, 0x0345, 0}, 4876 {0x1FF3, 0x0313, 0x0300, 0}, 4877 {0x1F60, 0x0300, 0x0345, 0}, 4878 {0x1F62, 0x0345, 0}, 4879 {0x1FA0, 0x0300, 0}, 4880 }; 4881 static const UChar tailorData2[][20]={ 4882 {0x1E63, 0x030C, 0}, /* s with dot below + caron */ 4883 {0x0073, 0x0323, 0x030C, 0}, 4884 {0x0073, 0x030C, 0x0323, 0}, 4885 }; 4886 static const UChar tailorData3[][20]={ 4887 {0x007a, 0}, /* z */ 4888 {0x0061, 0x0065, 0}, /* a + e */ 4889 {0x0061, 0x00ea, 0}, /* a + e with circumflex */ 4890 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */ 4891 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */ 4892 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */ 4893 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */ 4894 {0x00EA, 0}, /* e with circumflex */ 4895 }; 4896 4897 /* Test Vietnamese sort. */ 4898 coll = ucol_open("vi", &status); 4899 if(U_FAILURE(status)) { 4900 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status)); 4901 return; 4902 } 4903 log_verbose("\n\nVI collation:"); 4904 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) { 4905 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n"); 4906 } 4907 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) { 4908 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n"); 4909 } 4910 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) { 4911 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n"); 4912 } 4913 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) { 4914 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n"); 4915 } 4916 4917 for (j=0; j<8; j++) { 4918 tLen = u_strlen(tData[j]); 4919 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen); 4920 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100); 4921 for(i = 0; i<rLen; i++) { 4922 log_verbose(" %02X", resColl[i]); 4923 } 4924 } 4925 4926 ucol_close(coll); 4927 4928 /* Test Romanian sort. */ 4929 coll = ucol_open("ro", &status); 4930 log_verbose("\n\nRO collation:"); 4931 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) { 4932 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n"); 4933 } 4934 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) { 4935 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n"); 4936 } 4937 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) { 4938 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n"); 4939 } 4940 4941 for (j=4; j<8; j++) { 4942 tLen = u_strlen(tData[j]); 4943 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen); 4944 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100); 4945 for(i = 0; i<rLen; i++) { 4946 log_verbose(" %02X", resColl[i]); 4947 } 4948 } 4949 ucol_close(coll); 4950 4951 /* Test the precomposed Greek character with 3 combining marks. */ 4952 log_verbose("\n\nTailoring test: Greek character with 3 combining marks"); 4953 ruleLen = u_strlen(rule); 4954 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 4955 if (U_FAILURE(status)) { 4956 log_err("ucol_openRules failed with %s\n", u_errorName(status)); 4957 return; 4958 } 4959 sLen = u_strlen(tailorData[0]); 4960 for (j=1; j<6; j++) { 4961 tLen = u_strlen(tailorData[j]); 4962 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) { 4963 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]); 4964 } 4965 } 4966 /* Test getSortKey. */ 4967 tLen = u_strlen(tailorData[0]); 4968 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100); 4969 for (j=0; j<6; j++) { 4970 tLen = u_strlen(tailorData[j]); 4971 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100); 4972 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) { 4973 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 4974 for(i = 0; i<rLen; i++) { 4975 log_err(" %02X", resColl[i]); 4976 } 4977 } 4978 } 4979 ucol_close(coll); 4980 4981 log_verbose("\n\nTailoring test for s with caron:"); 4982 ruleLen = u_strlen(rule2); 4983 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 4984 tLen = u_strlen(tailorData2[0]); 4985 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100); 4986 for (j=1; j<3; j++) { 4987 tLen = u_strlen(tailorData2[j]); 4988 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100); 4989 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) { 4990 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 4991 for(i = 0; i<rLen; i++) { 4992 log_err(" %02X", resColl[i]); 4993 } 4994 } 4995 } 4996 ucol_close(coll); 4997 4998 log_verbose("\n\nTailoring test for &z< ae with circumflex:"); 4999 ruleLen = u_strlen(rule3); 5000 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5001 tLen = u_strlen(tailorData3[3]); 5002 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100); 5003 for (j=4; j<6; j++) { 5004 tLen = u_strlen(tailorData3[j]); 5005 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100); 5006 5007 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) { 5008 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 5009 for(i = 0; i<rLen; i++) { 5010 log_err(" %02X", resColl[i]); 5011 } 5012 } 5013 5014 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 5015 for(i = 0; i<rLen; i++) { 5016 log_verbose(" %02X", resColl[i]); 5017 } 5018 } 5019 ucol_close(coll); 5020 } 5021 5022 static void 5023 TestTailor6179(void) 5024 { 5025 UErrorCode status = U_ZERO_ERROR; 5026 int32_t i; 5027 UCollator *coll =NULL; 5028 uint8_t resColl[100]; 5029 int32_t rLen, tLen, ruleLen; 5030 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */ 5031 UChar rule1[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79, 5032 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20, 5033 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20, 5034 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0}; 5035 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */ 5036 UChar rule2[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61, 5037 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C, 5038 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E, 5039 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C, 5040 0x3C,0x3C,0x20,0x62,0}; 5041 5042 UChar tData1[][20]={ 5043 {0x61, 0}, 5044 {0x62, 0}, 5045 { 0xFDD0,0x009E, 0} 5046 }; 5047 UChar tData2[][20]={ 5048 {0x61, 0}, 5049 {0x62, 0}, 5050 { 0xFDD0,0x009E, 0} 5051 }; 5052 5053 /* UCA5.1, the value may increase in later version. */ 5054 uint8_t firstPrimaryIgnCE[6]={1, 87, 1, 5, 1, 0}; 5055 uint8_t lastPrimaryIgnCE[6]={1, 0xE7, 0xB9, 1, 5, 0}; 5056 uint8_t firstSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0}; 5057 uint8_t lastSecondaryIgnCE[6]={1, 1, 0x05, 0}; 5058 5059 /* Test [Last Primary ignorable] */ 5060 5061 log_verbose("\n\nTailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b "); 5062 ruleLen = u_strlen(rule1); 5063 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5064 if (U_FAILURE(status)) { 5065 log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status)); 5066 return; 5067 } 5068 tLen = u_strlen(tData1[0]); 5069 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100); 5070 if (uprv_memcmp(resColl, lastPrimaryIgnCE, uprv_min(rLen,6)) < 0) { 5071 log_err("\n Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen); 5072 for(i = 0; i<rLen; i++) { 5073 log_err(" %02X", resColl[i]); 5074 } 5075 } 5076 tLen = u_strlen(tData1[1]); 5077 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100); 5078 if (uprv_memcmp(resColl, firstPrimaryIgnCE, uprv_min(rLen, 6)) < 0) { 5079 log_err("\n Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen); 5080 for(i = 0; i<rLen; i++) { 5081 log_err(" %02X", resColl[i]); 5082 } 5083 } 5084 ucol_close(coll); 5085 5086 5087 /* Test [Last Secondary ignorable] */ 5088 log_verbose("\n\nTailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b "); 5089 ruleLen = u_strlen(rule1); 5090 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5091 if (U_FAILURE(status)) { 5092 log_err("Tailoring test: &[last primary ignorable] failed!"); 5093 return; 5094 } 5095 tLen = u_strlen(tData2[0]); 5096 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100); 5097 log_verbose("\n Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen); 5098 for(i = 0; i<rLen; i++) { 5099 log_verbose(" %02X", resColl[i]); 5100 } 5101 if (uprv_memcmp(resColl, lastSecondaryIgnCE, uprv_min(rLen, 3)) < 0) { 5102 log_err("\n Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen); 5103 for(i = 0; i<rLen; i++) { 5104 log_err(" %02X", resColl[i]); 5105 } 5106 } 5107 tLen = u_strlen(tData2[1]); 5108 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100); 5109 log_verbose("\n Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen); 5110 for(i = 0; i<rLen; i++) { 5111 log_verbose(" %02X", resColl[i]); 5112 } 5113 if (uprv_memcmp(resColl, firstSecondaryIgnCE, uprv_min(rLen, 4)) < 0) { 5114 log_err("\n Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen); 5115 for(i = 0; i<rLen; i++) { 5116 log_err(" %02X", resColl[i]); 5117 } 5118 } 5119 ucol_close(coll); 5120 } 5121 5122 static void 5123 TestUCAPrecontext(void) 5124 { 5125 UErrorCode status = U_ZERO_ERROR; 5126 int32_t i, j; 5127 UCollator *coll =NULL; 5128 uint8_t resColl[100], prevColl[100]; 5129 int32_t rLen, tLen, ruleLen; 5130 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */ 5131 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0}; 5132 /* & l middle-dot << a a is an expansion. */ 5133 5134 UChar tData1[][20]={ 5135 { 0xb7, 0}, /* standalone middle dot(0xb7) */ 5136 { 0x387, 0}, /* standalone middle dot(0x387) */ 5137 { 0x61, 0}, /* a */ 5138 { 0x6C, 0}, /* l */ 5139 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */ 5140 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */ 5141 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */ 5142 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */ 5143 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */ 5144 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */ 5145 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */ 5146 }; 5147 5148 log_verbose("\n\nEN collation:"); 5149 coll = ucol_open("en", &status); 5150 if (U_FAILURE(status)) { 5151 log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status)); 5152 return; 5153 } 5154 for (j=0; j<11; j++) { 5155 tLen = u_strlen(tData1[j]); 5156 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 5157 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 5158 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 5159 j, tData1[j]); 5160 } 5161 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 5162 for(i = 0; i<rLen; i++) { 5163 log_verbose(" %02X", resColl[i]); 5164 } 5165 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 5166 } 5167 ucol_close(coll); 5168 5169 5170 log_verbose("\n\nJA collation:"); 5171 coll = ucol_open("ja", &status); 5172 if (U_FAILURE(status)) { 5173 log_err("Tailoring test: &z <<a|- failed!"); 5174 return; 5175 } 5176 for (j=0; j<11; j++) { 5177 tLen = u_strlen(tData1[j]); 5178 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 5179 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 5180 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 5181 j, tData1[j]); 5182 } 5183 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 5184 for(i = 0; i<rLen; i++) { 5185 log_verbose(" %02X", resColl[i]); 5186 } 5187 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 5188 } 5189 ucol_close(coll); 5190 5191 5192 log_verbose("\n\nTailoring test: & middle dot < a "); 5193 ruleLen = u_strlen(rule1); 5194 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5195 if (U_FAILURE(status)) { 5196 log_err("Tailoring test: & middle dot < a failed!"); 5197 return; 5198 } 5199 for (j=0; j<11; j++) { 5200 tLen = u_strlen(tData1[j]); 5201 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 5202 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 5203 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 5204 j, tData1[j]); 5205 } 5206 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 5207 for(i = 0; i<rLen; i++) { 5208 log_verbose(" %02X", resColl[i]); 5209 } 5210 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 5211 } 5212 ucol_close(coll); 5213 5214 5215 log_verbose("\n\nTailoring test: & l middle-dot << a "); 5216 ruleLen = u_strlen(rule2); 5217 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5218 if (U_FAILURE(status)) { 5219 log_err("Tailoring test: & l middle-dot << a failed!"); 5220 return; 5221 } 5222 for (j=0; j<11; j++) { 5223 tLen = u_strlen(tData1[j]); 5224 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 5225 if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 5226 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 5227 j, tData1[j]); 5228 } 5229 if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) { 5230 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.", 5231 j, tData1[j]); 5232 } 5233 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 5234 for(i = 0; i<rLen; i++) { 5235 log_verbose(" %02X", resColl[i]); 5236 } 5237 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 5238 } 5239 ucol_close(coll); 5240 } 5241 5242 static void 5243 TestOutOfBuffer5468(void) 5244 { 5245 static const char *test = "\\u4e00"; 5246 UChar ustr[256]; 5247 int32_t ustr_length = u_unescape(test, ustr, 256); 5248 unsigned char shortKeyBuf[1]; 5249 int32_t sortkey_length; 5250 UErrorCode status = U_ZERO_ERROR; 5251 static UCollator *coll = NULL; 5252 5253 coll = ucol_open("root", &status); 5254 if(U_FAILURE(status)) { 5255 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); 5256 return; 5257 } 5258 ucol_setStrength(coll, UCOL_PRIMARY); 5259 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); 5260 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 5261 if (U_FAILURE(status)) { 5262 log_err("Failed setting atributes\n"); 5263 return; 5264 } 5265 5266 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf)); 5267 if (sortkey_length != 4) { 5268 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length); 5269 } 5270 log_verbose("length of sortKey is %d", sortkey_length); 5271 ucol_close(coll); 5272 } 5273 5274 #define TSKC_DATA_SIZE 5 5275 #define TSKC_BUF_SIZE 50 5276 static void 5277 TestSortKeyConsistency(void) 5278 { 5279 UErrorCode icuRC = U_ZERO_ERROR; 5280 UCollator* ucol; 5281 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD}; 5282 5283 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE]; 5284 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE]; 5285 int32_t i, j, i2; 5286 5287 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC); 5288 if (U_FAILURE(icuRC)) 5289 { 5290 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC)); 5291 return; 5292 } 5293 5294 for (i = 0; i < TSKC_DATA_SIZE; i++) 5295 { 5296 UCharIterator uiter; 5297 uint32_t state[2] = { 0, 0 }; 5298 int32_t dataLen = i+1; 5299 for (j=0; j<TSKC_BUF_SIZE; j++) 5300 bufFull[i][j] = bufPart[i][j] = 0; 5301 5302 /* Full sort key */ 5303 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE); 5304 5305 /* Partial sort key */ 5306 uiter_setString(&uiter, data, dataLen); 5307 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC); 5308 if (U_FAILURE(icuRC)) 5309 { 5310 log_err("ucol_nextSortKeyPart failed\n"); 5311 ucol_close(ucol); 5312 return; 5313 } 5314 5315 for (i2=0; i2<i; i2++) 5316 { 5317 UBool fullMatch = TRUE; 5318 UBool partMatch = TRUE; 5319 for (j=0; j<TSKC_BUF_SIZE; j++) 5320 { 5321 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]); 5322 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]); 5323 } 5324 if (fullMatch != partMatch) { 5325 log_err(fullMatch ? "full key was consistent, but partial key changed\n" 5326 : "partial key was consistent, but full key changed\n"); 5327 ucol_close(ucol); 5328 return; 5329 } 5330 } 5331 } 5332 5333 /*=============================================*/ 5334 ucol_close(ucol); 5335 } 5336 5337 /* ticket: 6101 */ 5338 static void TestCroatianSortKey(void) { 5339 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3"; 5340 UErrorCode status = U_ZERO_ERROR; 5341 UCollator *ucol; 5342 UCharIterator iter; 5343 5344 static const UChar text[] = { 0x0044, 0xD81A }; 5345 5346 size_t length = sizeof(text)/sizeof(*text); 5347 5348 uint8_t textSortKey[32]; 5349 size_t lenSortKey = 32; 5350 size_t actualSortKeyLen; 5351 uint32_t uStateInfo[2] = { 0, 0 }; 5352 5353 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status); 5354 if (U_FAILURE(status)) { 5355 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status)); 5356 return; 5357 } 5358 5359 uiter_setString(&iter, text, length); 5360 5361 actualSortKeyLen = ucol_nextSortKeyPart( 5362 ucol, &iter, (uint32_t*)uStateInfo, 5363 textSortKey, lenSortKey, &status 5364 ); 5365 5366 if (actualSortKeyLen == lenSortKey) { 5367 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n"); 5368 } 5369 5370 ucol_close(ucol); 5371 } 5372 5373 /* ticket: 6140 */ 5374 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since 5375 * they are both Hiragana and Katakana 5376 */ 5377 #define SORTKEYLEN 50 5378 static void TestHiragana(void) { 5379 UErrorCode status = U_ZERO_ERROR; 5380 UCollator* ucol; 5381 UCollationResult strcollresult; 5382 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */ 5383 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 }; 5384 int32_t data1Len = sizeof(data1)/sizeof(*data1); 5385 int32_t data2Len = sizeof(data2)/sizeof(*data2); 5386 int32_t i, j; 5387 uint8_t sortKey1[SORTKEYLEN]; 5388 uint8_t sortKey2[SORTKEYLEN]; 5389 5390 UCharIterator uiter1; 5391 UCharIterator uiter2; 5392 uint32_t state1[2] = { 0, 0 }; 5393 uint32_t state2[2] = { 0, 0 }; 5394 int32_t keySize1; 5395 int32_t keySize2; 5396 5397 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL, 5398 &status); 5399 if (U_FAILURE(status)) { 5400 log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status)); 5401 return; 5402 } 5403 5404 /* Start of full sort keys */ 5405 /* Full sort key1 */ 5406 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN); 5407 /* Full sort key2 */ 5408 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN); 5409 if (keySize1 == keySize2) { 5410 for (i = 0; i < keySize1; i++) { 5411 if (sortKey1[i] != sortKey2[i]) { 5412 log_err("Full sort keys are different. Should be equal."); 5413 } 5414 } 5415 } else { 5416 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2); 5417 } 5418 /* End of full sort keys */ 5419 5420 /* Start of partial sort keys */ 5421 /* Partial sort key1 */ 5422 uiter_setString(&uiter1, data1, data1Len); 5423 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status); 5424 /* Partial sort key2 */ 5425 uiter_setString(&uiter2, data2, data2Len); 5426 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status); 5427 if (U_SUCCESS(status) && keySize1 == keySize2) { 5428 for (j = 0; j < keySize1; j++) { 5429 if (sortKey1[j] != sortKey2[j]) { 5430 log_err("Partial sort keys are different. Should be equal"); 5431 } 5432 } 5433 } else { 5434 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2); 5435 } 5436 /* End of partial sort keys */ 5437 5438 /* Start of strcoll */ 5439 /* Use ucol_strcoll() to determine ordering */ 5440 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len); 5441 if (strcollresult != UCOL_EQUAL) { 5442 log_err("Result from ucol_strcoll() should be UCOL_EQUAL."); 5443 } 5444 5445 ucol_close(ucol); 5446 } 5447 5448 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x) 5449 5450 void addMiscCollTest(TestNode** root) 5451 { 5452 TEST(TestRuleOptions); 5453 TEST(TestBeforePrefixFailure); 5454 TEST(TestContractionClosure); 5455 TEST(TestPrefixCompose); 5456 TEST(TestStrCollIdenticalPrefix); 5457 TEST(TestPrefix); 5458 TEST(TestNewJapanese); 5459 /*TEST(TestLimitations);*/ 5460 TEST(TestNonChars); 5461 TEST(TestExtremeCompression); 5462 TEST(TestSurrogates); 5463 TEST(TestVariableTopSetting); 5464 TEST(TestBocsuCoverage); 5465 TEST(TestCyrillicTailoring); 5466 TEST(TestCase); 5467 TEST(IncompleteCntTest); 5468 TEST(BlackBirdTest); 5469 TEST(FunkyATest); 5470 TEST(BillFairmanTest); 5471 TEST(RamsRulesTest); 5472 TEST(IsTailoredTest); 5473 TEST(TestCollations); 5474 TEST(TestChMove); 5475 TEST(TestImplicitTailoring); 5476 TEST(TestFCDProblem); 5477 TEST(TestEmptyRule); 5478 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */ 5479 TEST(TestJ815); 5480 /*TEST(TestJ831);*/ /* we changed lv locale */ 5481 TEST(TestBefore); 5482 TEST(TestRedundantRules); 5483 TEST(TestExpansionSyntax); 5484 TEST(TestHangulTailoring); 5485 TEST(TestUCARules); 5486 TEST(TestIncrementalNormalize); 5487 TEST(TestComposeDecompose); 5488 TEST(TestCompressOverlap); 5489 TEST(TestContraction); 5490 TEST(TestExpansion); 5491 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */ 5492 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */ 5493 TEST(TestOptimize); 5494 TEST(TestSuppressContractions); 5495 TEST(Alexis2); 5496 TEST(TestHebrewUCA); 5497 TEST(TestPartialSortKeyTermination); 5498 TEST(TestSettings); 5499 TEST(TestEquals); 5500 TEST(TestJ2726); 5501 TEST(NullRule); 5502 TEST(TestNumericCollation); 5503 TEST(TestTibetanConformance); 5504 TEST(TestPinyinProblem); 5505 TEST(TestImplicitGeneration); 5506 TEST(TestSeparateTrees); 5507 TEST(TestBeforePinyin); 5508 TEST(TestBeforeTightening); 5509 /*TEST(TestMoreBefore);*/ 5510 TEST(TestTailorNULL); 5511 TEST(TestThaiSortKey); 5512 TEST(TestUpperFirstQuaternary); 5513 TEST(TestJ4960); 5514 TEST(TestJ5223); 5515 TEST(TestJ5232); 5516 TEST(TestJ5367); 5517 TEST(TestHiragana); 5518 TEST(TestSortKeyConsistency); 5519 TEST(TestVI5913); /* VI, RO tailored rules */ 5520 TEST(TestCroatianSortKey); 5521 TEST(TestTailor6179); 5522 TEST(TestUCAPrecontext); 5523 TEST(TestOutOfBuffer5468); 5524 } 5525 5526 #endif /* #if !UCONFIG_NO_COLLATION */ 5527