1 2 /******************************************************************** 3 * COPYRIGHT: 4 * Copyright (c) 2001-2010, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ********************************************************************/ 7 /******************************************************************************* 8 * 9 * File cmsccoll.C 10 * 11 *******************************************************************************/ 12 /** 13 * These are the tests specific to ICU 1.8 and above, that I didn't know where 14 * to fit. 15 */ 16 17 #include <stdio.h> 18 19 #include "unicode/utypes.h" 20 21 #if !UCONFIG_NO_COLLATION 22 23 #include "unicode/ucol.h" 24 #include "unicode/ucoleitr.h" 25 #include "unicode/uloc.h" 26 #include "cintltst.h" 27 #include "ccolltst.h" 28 #include "callcoll.h" 29 #include "unicode/ustring.h" 30 #include "string.h" 31 #include "ucol_imp.h" 32 #include "ucol_tok.h" 33 #include "cmemory.h" 34 #include "cstring.h" 35 #include "uassert.h" 36 #include "unicode/parseerr.h" 37 #include "unicode/ucnv.h" 38 #include "unicode/ures.h" 39 #include "unicode/uscript.h" 40 #include "uparse.h" 41 #include "putilimp.h" 42 43 44 #define LEN(a) (sizeof(a)/sizeof(a[0])) 45 46 #define MAX_TOKEN_LEN 16 47 48 typedef UCollationResult tst_strcoll(void *collator, const int object, 49 const UChar *source, const int sLen, 50 const UChar *target, const int tLen); 51 52 53 54 const static char cnt1[][10] = { 55 56 "AA", 57 "AC", 58 "AZ", 59 "AQ", 60 "AB", 61 "ABZ", 62 "ABQ", 63 "Z", 64 "ABC", 65 "Q", 66 "B" 67 }; 68 69 const static char cnt2[][10] = { 70 "DA", 71 "DAD", 72 "DAZ", 73 "MAR", 74 "Z", 75 "DAVIS", 76 "MARK", 77 "DAV", 78 "DAVI" 79 }; 80 81 static void IncompleteCntTest(void) 82 { 83 UErrorCode status = U_ZERO_ERROR; 84 UChar temp[90]; 85 UChar t1[90]; 86 UChar t2[90]; 87 88 UCollator *coll = NULL; 89 uint32_t i = 0, j = 0; 90 uint32_t size = 0; 91 92 u_uastrcpy(temp, " & Z < ABC < Q < B"); 93 94 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status); 95 96 if(U_SUCCESS(status)) { 97 size = sizeof(cnt1)/sizeof(cnt1[0]); 98 for(i = 0; i < size-1; i++) { 99 for(j = i+1; j < size; j++) { 100 UCollationElements *iter; 101 u_uastrcpy(t1, cnt1[i]); 102 u_uastrcpy(t2, cnt1[j]); 103 doTest(coll, t1, t2, UCOL_LESS); 104 /* synwee : added collation element iterator test */ 105 iter = ucol_openElements(coll, t2, u_strlen(t2), &status); 106 if (U_FAILURE(status)) { 107 log_err("Creation of iterator failed\n"); 108 break; 109 } 110 backAndForth(iter); 111 ucol_closeElements(iter); 112 } 113 } 114 } 115 116 ucol_close(coll); 117 118 119 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV"); 120 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); 121 122 if(U_SUCCESS(status)) { 123 size = sizeof(cnt2)/sizeof(cnt2[0]); 124 for(i = 0; i < size-1; i++) { 125 for(j = i+1; j < size; j++) { 126 UCollationElements *iter; 127 u_uastrcpy(t1, cnt2[i]); 128 u_uastrcpy(t2, cnt2[j]); 129 doTest(coll, t1, t2, UCOL_LESS); 130 131 /* synwee : added collation element iterator test */ 132 iter = ucol_openElements(coll, t2, u_strlen(t2), &status); 133 if (U_FAILURE(status)) { 134 log_err("Creation of iterator failed\n"); 135 break; 136 } 137 backAndForth(iter); 138 ucol_closeElements(iter); 139 } 140 } 141 } 142 143 ucol_close(coll); 144 145 146 } 147 148 const static char shifted[][20] = { 149 "black bird", 150 "black-bird", 151 "blackbird", 152 "black Bird", 153 "black-Bird", 154 "blackBird", 155 "black birds", 156 "black-birds", 157 "blackbirds" 158 }; 159 160 const static UCollationResult shiftedTert[] = { 161 UCOL_EQUAL, 162 UCOL_EQUAL, 163 UCOL_EQUAL, 164 UCOL_LESS, 165 UCOL_EQUAL, 166 UCOL_EQUAL, 167 UCOL_LESS, 168 UCOL_EQUAL, 169 UCOL_EQUAL 170 }; 171 172 const static char nonignorable[][20] = { 173 "black bird", 174 "black Bird", 175 "black birds", 176 "black-bird", 177 "black-Bird", 178 "black-birds", 179 "blackbird", 180 "blackBird", 181 "blackbirds" 182 }; 183 184 static void BlackBirdTest(void) { 185 UErrorCode status = U_ZERO_ERROR; 186 UChar t1[90]; 187 UChar t2[90]; 188 189 uint32_t i = 0, j = 0; 190 uint32_t size = 0; 191 UCollator *coll = ucol_open("en_US", &status); 192 193 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status); 194 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status); 195 196 if(U_SUCCESS(status)) { 197 size = sizeof(nonignorable)/sizeof(nonignorable[0]); 198 for(i = 0; i < size-1; i++) { 199 for(j = i+1; j < size; j++) { 200 u_uastrcpy(t1, nonignorable[i]); 201 u_uastrcpy(t2, nonignorable[j]); 202 doTest(coll, t1, t2, UCOL_LESS); 203 } 204 } 205 } 206 207 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 208 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status); 209 210 if(U_SUCCESS(status)) { 211 size = sizeof(shifted)/sizeof(shifted[0]); 212 for(i = 0; i < size-1; i++) { 213 for(j = i+1; j < size; j++) { 214 u_uastrcpy(t1, shifted[i]); 215 u_uastrcpy(t2, shifted[j]); 216 doTest(coll, t1, t2, UCOL_LESS); 217 } 218 } 219 } 220 221 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status); 222 if(U_SUCCESS(status)) { 223 size = sizeof(shifted)/sizeof(shifted[0]); 224 for(i = 1; i < size; i++) { 225 u_uastrcpy(t1, shifted[i-1]); 226 u_uastrcpy(t2, shifted[i]); 227 doTest(coll, t1, t2, shiftedTert[i]); 228 } 229 } 230 231 ucol_close(coll); 232 } 233 234 const static UChar testSourceCases[][MAX_TOKEN_LEN] = { 235 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000}, 236 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000}, 237 {0x0041/*'A'*/, 0x0300, 0x0000}, 238 {0x00C0, 0x0301, 0x0000}, 239 /* this would work with forced normalization */ 240 {0x00C0, 0x0316, 0x0000} 241 }; 242 243 const static UChar testTargetCases[][MAX_TOKEN_LEN] = { 244 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000}, 245 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}, 246 {0x00C0, 0}, 247 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000}, 248 /* this would work with forced normalization */ 249 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000} 250 }; 251 252 const static UCollationResult results[] = { 253 UCOL_GREATER, 254 UCOL_EQUAL, 255 UCOL_EQUAL, 256 UCOL_GREATER, 257 UCOL_EQUAL 258 }; 259 260 static void FunkyATest(void) 261 { 262 263 int32_t i; 264 UErrorCode status = U_ZERO_ERROR; 265 UCollator *myCollation; 266 myCollation = ucol_open("en_US", &status); 267 if(U_FAILURE(status)){ 268 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 269 return; 270 } 271 log_verbose("Testing some A letters, for some reason\n"); 272 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 273 ucol_setStrength(myCollation, UCOL_TERTIARY); 274 for (i = 0; i < 4 ; i++) 275 { 276 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); 277 } 278 ucol_close(myCollation); 279 } 280 281 UColAttributeValue caseFirst[] = { 282 UCOL_OFF, 283 UCOL_LOWER_FIRST, 284 UCOL_UPPER_FIRST 285 }; 286 287 288 UColAttributeValue alternateHandling[] = { 289 UCOL_NON_IGNORABLE, 290 UCOL_SHIFTED 291 }; 292 293 UColAttributeValue caseLevel[] = { 294 UCOL_OFF, 295 UCOL_ON 296 }; 297 298 UColAttributeValue strengths[] = { 299 UCOL_PRIMARY, 300 UCOL_SECONDARY, 301 UCOL_TERTIARY, 302 UCOL_QUATERNARY, 303 UCOL_IDENTICAL 304 }; 305 306 #if 0 307 static const char * strengthsC[] = { 308 "UCOL_PRIMARY", 309 "UCOL_SECONDARY", 310 "UCOL_TERTIARY", 311 "UCOL_QUATERNARY", 312 "UCOL_IDENTICAL" 313 }; 314 315 static const char * caseFirstC[] = { 316 "UCOL_OFF", 317 "UCOL_LOWER_FIRST", 318 "UCOL_UPPER_FIRST" 319 }; 320 321 322 static const char * alternateHandlingC[] = { 323 "UCOL_NON_IGNORABLE", 324 "UCOL_SHIFTED" 325 }; 326 327 static const char * caseLevelC[] = { 328 "UCOL_OFF", 329 "UCOL_ON" 330 }; 331 332 /* not used currently - does not test only prints */ 333 static void PrintMarkDavis(void) 334 { 335 UErrorCode status = U_ZERO_ERROR; 336 UChar m[256]; 337 uint8_t sortkey[256]; 338 UCollator *coll = ucol_open("en_US", &status); 339 uint32_t h,i,j,k, sortkeysize; 340 uint32_t sizem = 0; 341 char buffer[512]; 342 uint32_t len = 512; 343 344 log_verbose("PrintMarkDavis"); 345 346 u_uastrcpy(m, "Mark Davis"); 347 sizem = u_strlen(m); 348 349 350 m[1] = 0xe4; 351 352 for(i = 0; i<sizem; i++) { 353 fprintf(stderr, "\\u%04X ", m[i]); 354 } 355 fprintf(stderr, "\n"); 356 357 for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) { 358 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status); 359 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]); 360 361 for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) { 362 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status); 363 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]); 364 365 for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) { 366 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status); 367 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]); 368 369 for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) { 370 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status); 371 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256); 372 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]); 373 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len)); 374 } 375 376 } 377 378 } 379 380 } 381 } 382 #endif 383 384 static void BillFairmanTest(void) { 385 /* 386 ** check for actual locale via ICU resource bundles 387 ** 388 ** lp points to the original locale ("fr_FR_....") 389 */ 390 391 UResourceBundle *lr,*cr; 392 UErrorCode lec = U_ZERO_ERROR; 393 const char *lp = "fr_FR_you_ll_never_find_this_locale"; 394 395 log_verbose("BillFairmanTest\n"); 396 397 lr = ures_open(NULL,lp,&lec); 398 if (lr) { 399 cr = ures_getByKey(lr,"collations",0,&lec); 400 if (cr) { 401 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec); 402 if (lp) { 403 if (U_SUCCESS(lec)) { 404 if(strcmp(lp, "fr") != 0) { 405 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp); 406 } 407 } 408 } 409 ures_close(cr); 410 } 411 ures_close(lr); 412 } 413 } 414 415 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){ 416 UChar source[256] = { '\0'}; 417 UChar target[256] = { '\0'}; 418 UChar preP = 0x31a3; 419 UChar preQ = 0x310d; 420 /* 421 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491; 422 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413; 423 */ 424 /*log_verbose("Testing primary\n");*/ 425 426 doTest(col, p, q, UCOL_LESS); 427 /* 428 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q)); 429 430 if(result!=UCOL_LESS){ 431 aescstrdup(p,utfSource,256); 432 aescstrdup(q,utfTarget,256); 433 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget); 434 } 435 */ 436 source[0] = preP; 437 u_strcpy(source+1,p); 438 target[0] = preQ; 439 u_strcpy(target+1,q); 440 doTest(col, source, target, UCOL_LESS); 441 /* 442 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget); 443 */ 444 } 445 446 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){ 447 UChar source[256] = { '\0'}; 448 UChar target[256] = { '\0'}; 449 450 /*log_verbose("Testing secondary\n");*/ 451 452 doTest(col, p, q, UCOL_LESS); 453 /* 454 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget); 455 */ 456 source[0] = 0x0053; 457 u_strcpy(source+1,p); 458 target[0]= 0x0073; 459 u_strcpy(target+1,q); 460 461 doTest(col, source, target, UCOL_LESS); 462 /* 463 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget); 464 */ 465 466 467 u_strcpy(source,p); 468 source[u_strlen(p)] = 0x62; 469 source[u_strlen(p)+1] = 0; 470 471 472 u_strcpy(target,q); 473 target[u_strlen(q)] = 0x61; 474 target[u_strlen(q)+1] = 0; 475 476 doTest(col, source, target, UCOL_GREATER); 477 478 /* 479 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget); 480 */ 481 } 482 483 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){ 484 UChar source[256] = { '\0'}; 485 UChar target[256] = { '\0'}; 486 487 /*log_verbose("Testing tertiary\n");*/ 488 489 doTest(col, p, q, UCOL_LESS); 490 /* 491 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget); 492 */ 493 source[0] = 0x0020; 494 u_strcpy(source+1,p); 495 target[0]= 0x002D; 496 u_strcpy(target+1,q); 497 498 doTest(col, source, target, UCOL_LESS); 499 /* 500 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget); 501 */ 502 503 u_strcpy(source,p); 504 source[u_strlen(p)] = 0xE0; 505 source[u_strlen(p)+1] = 0; 506 507 u_strcpy(target,q); 508 target[u_strlen(q)] = 0x61; 509 target[u_strlen(q)+1] = 0; 510 511 doTest(col, source, target, UCOL_GREATER); 512 513 /* 514 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget); 515 */ 516 } 517 518 static void testEquality(UCollator* col, const UChar* p,const UChar* q){ 519 /* 520 UChar source[256] = { '\0'}; 521 UChar target[256] = { '\0'}; 522 */ 523 524 doTest(col, p, q, UCOL_EQUAL); 525 /* 526 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget); 527 */ 528 } 529 530 static void testCollator(UCollator *coll, UErrorCode *status) { 531 const UChar *rules = NULL, *current = NULL; 532 int32_t ruleLen = 0; 533 uint32_t strength = 0; 534 uint32_t chOffset = 0; uint32_t chLen = 0; 535 uint32_t exOffset = 0; uint32_t exLen = 0; 536 uint32_t prefixOffset = 0; uint32_t prefixLen = 0; 537 uint32_t firstEx = 0; 538 /* uint32_t rExpsLen = 0; */ 539 uint32_t firstLen = 0; 540 UBool varT = FALSE; UBool top_ = TRUE; 541 uint16_t specs = 0; 542 UBool startOfRules = TRUE; 543 UBool lastReset = FALSE; 544 UBool before = FALSE; 545 uint32_t beforeStrength = 0; 546 UColTokenParser src; 547 UColOptionSet opts; 548 549 UChar first[256]; 550 UChar second[256]; 551 UChar tempB[256]; 552 uint32_t tempLen; 553 UChar *rulesCopy = NULL; 554 UParseError parseError; 555 556 uprv_memset(&src, 0, sizeof(UColTokenParser)); 557 558 src.opts = &opts; 559 560 rules = ucol_getRules(coll, &ruleLen); 561 if(U_SUCCESS(*status) && ruleLen > 0) { 562 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); 563 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); 564 src.current = src.source = rulesCopy; 565 src.end = rulesCopy+ruleLen; 566 src.extraCurrent = src.end; 567 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 568 *first = *second = 0; 569 570 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to 571 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ 572 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) { 573 strength = src.parsedToken.strength; 574 chOffset = src.parsedToken.charsOffset; 575 chLen = src.parsedToken.charsLen; 576 exOffset = src.parsedToken.extensionOffset; 577 exLen = src.parsedToken.extensionLen; 578 prefixOffset = src.parsedToken.prefixOffset; 579 prefixLen = src.parsedToken.prefixLen; 580 specs = src.parsedToken.flags; 581 582 startOfRules = FALSE; 583 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); 584 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); 585 if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */ 586 second[0] = 0; 587 } else { 588 u_strncpy(second,src.source+chOffset, chLen); 589 second[chLen] = 0; 590 591 if(exLen > 0 && firstEx == 0) { 592 u_strncat(first, src.source+exOffset, exLen); 593 first[firstLen+exLen] = 0; 594 } 595 596 if(lastReset == TRUE && prefixLen != 0) { 597 u_strncpy(first+prefixLen, first, firstLen); 598 u_strncpy(first, src.source+prefixOffset, prefixLen); 599 first[firstLen+prefixLen] = 0; 600 firstLen = firstLen+prefixLen; 601 } 602 603 if(before == TRUE) { /* swap first and second */ 604 u_strcpy(tempB, first); 605 u_strcpy(first, second); 606 u_strcpy(second, tempB); 607 608 tempLen = firstLen; 609 firstLen = chLen; 610 chLen = tempLen; 611 612 tempLen = firstEx; 613 firstEx = exLen; 614 exLen = tempLen; 615 if(beforeStrength < strength) { 616 strength = beforeStrength; 617 } 618 } 619 } 620 lastReset = FALSE; 621 622 switch(strength){ 623 case UCOL_IDENTICAL: 624 testEquality(coll,first,second); 625 break; 626 case UCOL_PRIMARY: 627 testPrimary(coll,first,second); 628 break; 629 case UCOL_SECONDARY: 630 testSecondary(coll,first,second); 631 break; 632 case UCOL_TERTIARY: 633 testTertiary(coll,first,second); 634 break; 635 case UCOL_TOK_RESET: 636 lastReset = TRUE; 637 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0); 638 if(before) { 639 beforeStrength = (specs & UCOL_TOK_BEFORE)-1; 640 } 641 break; 642 default: 643 break; 644 } 645 646 if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */ 647 before = FALSE; 648 } else { 649 firstLen = chLen; 650 firstEx = exLen; 651 u_strcpy(first, second); 652 } 653 } 654 uprv_free(src.source); 655 } 656 } 657 658 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) { 659 UCollator *UCA = (UCollator *)collator; 660 return ucol_strcoll(UCA, source, sLen, target, tLen); 661 } 662 663 /* 664 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) { 665 #ifdef U_WINDOWS 666 LCID lcid = (LCID)collator; 667 return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen); 668 #else 669 return 0; 670 #endif 671 } 672 */ 673 674 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts, 675 UChar s1, UChar s2, 676 const UChar *s, const uint32_t sLen, 677 const UChar *t, const uint32_t tLen) { 678 UChar source[256] = {0}; 679 UChar target[256] = {0}; 680 681 source[0] = s1; 682 u_strcpy(source+1, s); 683 target[0] = s2; 684 u_strcpy(target+1, t); 685 686 return func(collator, opts, source, sLen+1, target, tLen+1); 687 } 688 689 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts, 690 UChar s1, UChar s2, 691 const UChar *s, const uint32_t sLen, 692 const UChar *t, const uint32_t tLen) { 693 UChar source[256] = {0}; 694 UChar target[256] = {0}; 695 696 u_strcpy(source, s); 697 source[sLen] = s1; 698 u_strcpy(target, t); 699 target[tLen] = s2; 700 701 return func(collator, opts, source, sLen+1, target, tLen+1); 702 } 703 704 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts, 705 const UChar *s, const uint32_t sLen, 706 const UChar *t, const uint32_t tLen, 707 UCollationResult result) { 708 /*UChar fPrimary = 0x6d;*/ 709 /*UChar sPrimary = 0x6e;*/ 710 UChar fSecondary = 0x310d; 711 UChar sSecondary = 0x31a3; 712 UChar fTertiary = 0x310f; 713 UChar sTertiary = 0x31b7; 714 715 UCollationResult oposite; 716 if(result == UCOL_EQUAL) { 717 return UCOL_IDENTICAL; 718 } else if(result == UCOL_GREATER) { 719 oposite = UCOL_LESS; 720 } else { 721 oposite = UCOL_GREATER; 722 } 723 724 if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) { 725 return UCOL_PRIMARY; 726 } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) && 727 (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) { 728 return UCOL_SECONDARY; 729 } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) && 730 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) { 731 return UCOL_TERTIARY; 732 } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) && 733 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) { 734 return UCOL_QUATERNARY; 735 } else { 736 return UCOL_IDENTICAL; 737 } 738 } 739 740 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) { 741 uint32_t i = 0; 742 743 if(res == UCOL_EQUAL || strength == 0xdeadbeef) { 744 buffer[0] = '='; 745 buffer[1] = '='; 746 buffer[2] = '\0'; 747 } else if(res == UCOL_GREATER) { 748 for(i = 0; i<strength+1; i++) { 749 buffer[i] = '>'; 750 } 751 buffer[strength+1] = '\0'; 752 } else { 753 for(i = 0; i<strength+1; i++) { 754 buffer[i] = '<'; 755 } 756 buffer[strength+1] = '\0'; 757 } 758 759 return buffer; 760 } 761 762 763 764 static void logFailure (const char *platform, const char *test, 765 const UChar *source, const uint32_t sLen, 766 const UChar *target, const uint32_t tLen, 767 UCollationResult realRes, uint32_t realStrength, 768 UCollationResult expRes, uint32_t expStrength, UBool error) { 769 770 uint32_t i = 0; 771 772 char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256]; 773 static int32_t maxOutputLength = 0; 774 int32_t outputLength; 775 776 *sEsc = *tEsc = *s = *t = 0; 777 if(error == TRUE) { 778 log_err("Difference between expected and generated order. Run test with -v for more info\n"); 779 } else if(getTestOption(VERBOSITY_OPTION) == 0) { 780 return; 781 } 782 for(i = 0; i<sLen; i++) { 783 sprintf(b, "%04X", source[i]); 784 strcat(sEsc, "\\u"); 785 strcat(sEsc, b); 786 strcat(s, b); 787 strcat(s, " "); 788 if(source[i] < 0x80) { 789 sprintf(b, "(%c)", source[i]); 790 strcat(sEsc, b); 791 } 792 } 793 for(i = 0; i<tLen; i++) { 794 sprintf(b, "%04X", target[i]); 795 strcat(tEsc, "\\u"); 796 strcat(tEsc, b); 797 strcat(t, b); 798 strcat(t, " "); 799 if(target[i] < 0x80) { 800 sprintf(b, "(%c)", target[i]); 801 strcat(tEsc, b); 802 } 803 } 804 /* 805 strcpy(output, "[[ "); 806 strcat(output, sEsc); 807 strcat(output, getRelationSymbol(expRes, expStrength, relation)); 808 strcat(output, tEsc); 809 810 strcat(output, " : "); 811 812 strcat(output, sEsc); 813 strcat(output, getRelationSymbol(realRes, realStrength, relation)); 814 strcat(output, tEsc); 815 strcat(output, " ]] "); 816 817 log_verbose("%s", output); 818 */ 819 820 821 strcpy(output, "DIFF: "); 822 823 strcat(output, s); 824 strcat(output, " : "); 825 strcat(output, t); 826 827 strcat(output, test); 828 strcat(output, ": "); 829 830 strcat(output, sEsc); 831 strcat(output, getRelationSymbol(expRes, expStrength, relation)); 832 strcat(output, tEsc); 833 834 strcat(output, " "); 835 836 strcat(output, platform); 837 strcat(output, ": "); 838 839 strcat(output, sEsc); 840 strcat(output, getRelationSymbol(realRes, realStrength, relation)); 841 strcat(output, tEsc); 842 843 outputLength = (int32_t)strlen(output); 844 if(outputLength > maxOutputLength) { 845 maxOutputLength = outputLength; 846 U_ASSERT(outputLength < sizeof(output)); 847 } 848 849 log_verbose("%s\n", output); 850 851 } 852 853 /* 854 static void printOutRules(const UChar *rules) { 855 uint32_t len = u_strlen(rules); 856 uint32_t i = 0; 857 char toPrint; 858 uint32_t line = 0; 859 860 fprintf(stdout, "Rules:"); 861 862 for(i = 0; i<len; i++) { 863 if(rules[i]<0x7f && rules[i]>=0x20) { 864 toPrint = (char)rules[i]; 865 if(toPrint == '&') { 866 line = 1; 867 fprintf(stdout, "\n&"); 868 } else if(toPrint == ';') { 869 fprintf(stdout, "<<"); 870 line+=2; 871 } else if(toPrint == ',') { 872 fprintf(stdout, "<<<"); 873 line+=3; 874 } else { 875 fprintf(stdout, "%c", toPrint); 876 line++; 877 } 878 } else if(rules[i]<0x3400 || rules[i]>=0xa000) { 879 fprintf(stdout, "\\u%04X", rules[i]); 880 line+=6; 881 } 882 if(line>72) { 883 fprintf(stdout, "\n"); 884 line = 0; 885 } 886 } 887 888 log_verbose("\n"); 889 890 } 891 */ 892 893 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) { 894 uint32_t diffs = 0; 895 UCollationResult realResult; 896 uint32_t realStrength; 897 898 uint32_t sLen = u_strlen(first); 899 uint32_t tLen = u_strlen(second); 900 901 realResult = func(collator, opts, first, sLen, second, tLen); 902 realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult); 903 904 if(strength == UCOL_IDENTICAL && realResult != UCOL_IDENTICAL) { 905 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error); 906 diffs++; 907 } else if(realResult != UCOL_LESS || realStrength != strength) { 908 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error); 909 diffs++; 910 } 911 return diffs; 912 } 913 914 915 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) { 916 const UChar *rules = NULL, *current = NULL; 917 int32_t ruleLen = 0; 918 uint32_t strength = 0; 919 uint32_t chOffset = 0; uint32_t chLen = 0; 920 uint32_t exOffset = 0; uint32_t exLen = 0; 921 uint32_t prefixOffset = 0; uint32_t prefixLen = 0; 922 /* uint32_t rExpsLen = 0; */ 923 uint32_t firstLen = 0, secondLen = 0; 924 UBool varT = FALSE; UBool top_ = TRUE; 925 uint16_t specs = 0; 926 UBool startOfRules = TRUE; 927 UColTokenParser src; 928 UColOptionSet opts; 929 930 UChar first[256]; 931 UChar second[256]; 932 UChar *rulesCopy = NULL; 933 934 uint32_t UCAdiff = 0; 935 uint32_t Windiff = 1; 936 UParseError parseError; 937 938 uprv_memset(&src, 0, sizeof(UColTokenParser)); 939 src.opts = &opts; 940 941 rules = ucol_getRules(coll, &ruleLen); 942 943 /*printOutRules(rules);*/ 944 945 if(U_SUCCESS(*status) && ruleLen > 0) { 946 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); 947 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); 948 src.current = src.source = rulesCopy; 949 src.end = rulesCopy+ruleLen; 950 src.extraCurrent = src.end; 951 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 952 *first = *second = 0; 953 954 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to 955 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ 956 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) { 957 strength = src.parsedToken.strength; 958 chOffset = src.parsedToken.charsOffset; 959 chLen = src.parsedToken.charsLen; 960 exOffset = src.parsedToken.extensionOffset; 961 exLen = src.parsedToken.extensionLen; 962 prefixOffset = src.parsedToken.prefixOffset; 963 prefixLen = src.parsedToken.prefixLen; 964 specs = src.parsedToken.flags; 965 966 startOfRules = FALSE; 967 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); 968 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); 969 970 u_strncpy(second,src.source+chOffset, chLen); 971 second[chLen] = 0; 972 secondLen = chLen; 973 974 if(exLen > 0) { 975 u_strncat(first, src.source+exOffset, exLen); 976 first[firstLen+exLen] = 0; 977 firstLen += exLen; 978 } 979 980 if(strength != UCOL_TOK_RESET) { 981 if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) { 982 UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error); 983 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/ 984 } 985 } 986 987 988 firstLen = chLen; 989 u_strcpy(first, second); 990 991 } 992 if(UCAdiff != 0 && Windiff != 0) { 993 log_verbose("\n"); 994 } 995 if(UCAdiff == 0) { 996 log_verbose("No immediate difference with %s!\n", refName); 997 } 998 if(Windiff == 0) { 999 log_verbose("No immediate difference with Win32!\n"); 1000 } 1001 uprv_free(src.source); 1002 } 1003 } 1004 1005 /* 1006 * Takes two CEs (lead and continuation) and 1007 * compares them as CEs should be compared: 1008 * primary vs. primary, secondary vs. secondary 1009 * tertiary vs. tertiary 1010 */ 1011 static int32_t compareCEs(uint32_t s1, uint32_t s2, 1012 uint32_t t1, uint32_t t2) { 1013 uint32_t s = 0, t = 0; 1014 if(s1 == t1 && s2 == t2) { 1015 return 0; 1016 } 1017 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16); 1018 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16); 1019 if(s < t) { 1020 return -1; 1021 } else if(s > t) { 1022 return 1; 1023 } else { 1024 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8; 1025 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8; 1026 if(s < t) { 1027 return -1; 1028 } else if(s > t) { 1029 return 1; 1030 } else { 1031 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF); 1032 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF); 1033 if(s < t) { 1034 return -1; 1035 } else { 1036 return 1; 1037 } 1038 } 1039 } 1040 } 1041 1042 typedef struct { 1043 uint32_t startCE; 1044 uint32_t startContCE; 1045 uint32_t limitCE; 1046 uint32_t limitContCE; 1047 } indirectBoundaries; 1048 1049 /* these values are used for finding CE values for indirect positioning. */ 1050 /* Indirect positioning is a mechanism for allowing resets on symbolic */ 1051 /* values. It only works for resets and you cannot tailor indirect names */ 1052 /* An indirect name can define either an anchor point or a range. An */ 1053 /* anchor point behaves in exactly the same way as a code point in reset */ 1054 /* would, except that it cannot be tailored. A range (we currently only */ 1055 /* know for the [top] range will explicitly set the upper bound for */ 1056 /* generated CEs, thus allowing for better control over how many CEs can */ 1057 /* be squeezed between in the range without performance penalty. */ 1058 /* In that respect, we use [top] for tailoring of locales that use CJK */ 1059 /* characters. Other indirect values are currently a pure convenience, */ 1060 /* they can be used to assure that the CEs will be always positioned in */ 1061 /* the same place relative to a point with known properties (e.g. first */ 1062 /* primary ignorable). */ 1063 static indirectBoundaries ucolIndirectBoundaries[15]; 1064 static UBool indirectBoundariesSet = FALSE; 1065 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) { 1066 /* Set values for the top - TODO: once we have values for all the indirects, we are going */ 1067 /* to initalize here. */ 1068 ucolIndirectBoundaries[indexR].startCE = start[0]; 1069 ucolIndirectBoundaries[indexR].startContCE = start[1]; 1070 if(end) { 1071 ucolIndirectBoundaries[indexR].limitCE = end[0]; 1072 ucolIndirectBoundaries[indexR].limitContCE = end[1]; 1073 } else { 1074 ucolIndirectBoundaries[indexR].limitCE = 0; 1075 ucolIndirectBoundaries[indexR].limitContCE = 0; 1076 } 1077 } 1078 1079 static void testCEs(UCollator *coll, UErrorCode *status) { 1080 const UChar *rules = NULL, *current = NULL; 1081 int32_t ruleLen = 0; 1082 1083 uint32_t strength = 0; 1084 uint32_t maxStrength = UCOL_IDENTICAL; 1085 uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE; 1086 uint32_t lastCE; 1087 uint32_t lastContCE; 1088 1089 int32_t result = 0; 1090 uint32_t chOffset = 0; uint32_t chLen = 0; 1091 uint32_t exOffset = 0; uint32_t exLen = 0; 1092 uint32_t prefixOffset = 0; uint32_t prefixLen = 0; 1093 uint32_t oldOffset = 0; 1094 1095 /* uint32_t rExpsLen = 0; */ 1096 /* uint32_t firstLen = 0; */ 1097 uint16_t specs = 0; 1098 UBool varT = FALSE; UBool top_ = TRUE; 1099 UBool startOfRules = TRUE; 1100 UBool before = FALSE; 1101 UColTokenParser src; 1102 UColOptionSet opts; 1103 UParseError parseError; 1104 UChar *rulesCopy = NULL; 1105 collIterate *c = uprv_new_collIterate(status); 1106 UCAConstants *consts = NULL; 1107 uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */ 1108 UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT; 1109 const char *colLoc; 1110 UCollator *UCA = ucol_open("root", status); 1111 1112 if (U_FAILURE(*status)) { 1113 log_err("Could not open root collator %s\n", u_errorName(*status)); 1114 uprv_delete_collIterate(c); 1115 return; 1116 } 1117 1118 colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status); 1119 if (U_FAILURE(*status)) { 1120 log_err("Could not get collator name: %s\n", u_errorName(*status)); 1121 ucol_close(UCA); 1122 uprv_delete_collIterate(c); 1123 return; 1124 } 1125 1126 uprv_memset(&src, 0, sizeof(UColTokenParser)); 1127 1128 consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts); 1129 UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0]; 1130 /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */ 1131 UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0]; 1132 UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1]; 1133 1134 baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND; 1135 1136 src.opts = &opts; 1137 1138 rules = ucol_getRules(coll, &ruleLen); 1139 1140 src.invUCA = ucol_initInverseUCA(status); 1141 1142 if(indirectBoundariesSet == FALSE) { 1143 /* UCOL_RESET_TOP_VALUE */ 1144 setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT); 1145 /* UCOL_FIRST_PRIMARY_IGNORABLE */ 1146 setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0); 1147 /* UCOL_LAST_PRIMARY_IGNORABLE */ 1148 setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0); 1149 /* UCOL_FIRST_SECONDARY_IGNORABLE */ 1150 setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0); 1151 /* UCOL_LAST_SECONDARY_IGNORABLE */ 1152 setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0); 1153 /* UCOL_FIRST_TERTIARY_IGNORABLE */ 1154 setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0); 1155 /* UCOL_LAST_TERTIARY_IGNORABLE */ 1156 setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0); 1157 /* UCOL_FIRST_VARIABLE */ 1158 setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0); 1159 /* UCOL_LAST_VARIABLE */ 1160 setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0); 1161 /* UCOL_FIRST_NON_VARIABLE */ 1162 setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0); 1163 /* UCOL_LAST_NON_VARIABLE */ 1164 setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT); 1165 /* UCOL_FIRST_IMPLICIT */ 1166 setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0); 1167 /* UCOL_LAST_IMPLICIT */ 1168 setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING); 1169 /* UCOL_FIRST_TRAILING */ 1170 setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0); 1171 /* UCOL_LAST_TRAILING */ 1172 setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0); 1173 ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24); 1174 indirectBoundariesSet = TRUE; 1175 } 1176 1177 1178 if(U_SUCCESS(*status) && ruleLen > 0) { 1179 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); 1180 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); 1181 src.current = src.source = rulesCopy; 1182 src.end = rulesCopy+ruleLen; 1183 src.extraCurrent = src.end; 1184 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 1185 1186 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to 1187 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ 1188 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) { 1189 strength = src.parsedToken.strength; 1190 chOffset = src.parsedToken.charsOffset; 1191 chLen = src.parsedToken.charsLen; 1192 exOffset = src.parsedToken.extensionOffset; 1193 exLen = src.parsedToken.extensionLen; 1194 prefixOffset = src.parsedToken.prefixOffset; 1195 prefixLen = src.parsedToken.prefixLen; 1196 specs = src.parsedToken.flags; 1197 1198 startOfRules = FALSE; 1199 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); 1200 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); 1201 1202 uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status); 1203 1204 currCE = ucol_getNextCE(coll, c, status); 1205 if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) { 1206 log_verbose("Thai prevowel detected. Will pick next CE\n"); 1207 currCE = ucol_getNextCE(coll, c, status); 1208 } 1209 1210 currContCE = ucol_getNextCE(coll, c, status); 1211 if(!isContinuation(currContCE)) { 1212 currContCE = 0; 1213 } 1214 1215 /* we need to repack CEs here */ 1216 1217 if(strength == UCOL_TOK_RESET) { 1218 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0); 1219 if(top_ == TRUE) { 1220 int32_t tokenIndex = src.parsedToken.indirectIndex; 1221 1222 nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE; 1223 nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE; 1224 } else { 1225 nextCE = baseCE = currCE; 1226 nextContCE = baseContCE = currContCE; 1227 } 1228 maxStrength = UCOL_IDENTICAL; 1229 } else { 1230 if(strength < maxStrength) { 1231 maxStrength = strength; 1232 if(baseCE == UCOL_RESET_TOP_VALUE) { 1233 log_verbose("Resetting to [top]\n"); 1234 nextCE = UCOL_NEXT_TOP_VALUE; 1235 nextContCE = UCOL_NEXT_TOP_CONT; 1236 } else { 1237 result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength); 1238 } 1239 if(result < 0) { 1240 if(ucol_isTailored(coll, *(src.source+oldOffset), status)) { 1241 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset)); 1242 return; 1243 } else { 1244 log_err("%s: couldn't find the CE\n", colLoc); 1245 return; 1246 } 1247 } 1248 } 1249 1250 currCE &= 0xFFFFFF3F; 1251 currContCE &= 0xFFFFFFBF; 1252 1253 if(maxStrength == UCOL_IDENTICAL) { 1254 if(baseCE != currCE || baseContCE != currContCE) { 1255 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc); 1256 } 1257 } else { 1258 if(strength == UCOL_IDENTICAL) { 1259 if(lastCE != currCE || lastContCE != currContCE) { 1260 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc); 1261 } 1262 } else { 1263 if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) { 1264 /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/ 1265 log_err("%s: current CE is not less than base CE\n", colLoc); 1266 } 1267 if(!before) { 1268 if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) { 1269 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/ 1270 log_err("%s: sequence of generated CEs is broken\n", colLoc); 1271 } 1272 } else { 1273 before = FALSE; 1274 if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) { 1275 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/ 1276 log_err("%s: sequence of generated CEs is broken\n", colLoc); 1277 } 1278 } 1279 } 1280 } 1281 1282 } 1283 1284 oldOffset = chOffset; 1285 lastCE = currCE & 0xFFFFFF3F; 1286 lastContCE = currContCE & 0xFFFFFFBF; 1287 } 1288 uprv_free(src.source); 1289 } 1290 ucol_close(UCA); 1291 uprv_delete_collIterate(c); 1292 } 1293 1294 #if 0 1295 /* these locales are now picked from index RB */ 1296 static const char* localesToTest[] = { 1297 "ar", "bg", "ca", "cs", "da", 1298 "el", "en_BE", "en_US_POSIX", 1299 "es", "et", "fi", "fr", "hi", 1300 "hr", "hu", "is", "iw", "ja", 1301 "ko", "lt", "lv", "mk", "mt", 1302 "nb", "nn", "nn_NO", "pl", "ro", 1303 "ru", "sh", "sk", "sl", "sq", 1304 "sr", "sv", "th", "tr", "uk", 1305 "vi", "zh", "zh_TW" 1306 }; 1307 #endif 1308 1309 static const char* rulesToTest[] = { 1310 /* Funky fa rule */ 1311 "&\\u0622 < \\u0627 << \\u0671 < \\u0621", 1312 /*"& Z < p, P",*/ 1313 /* Cui Mins rules */ 1314 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/ 1315 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/ 1316 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/ 1317 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/ 1318 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/ 1319 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/ 1320 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/ 1321 }; 1322 1323 1324 static void TestCollations(void) { 1325 int32_t noOfLoc = uloc_countAvailable(); 1326 int32_t i = 0, j = 0; 1327 1328 UErrorCode status = U_ZERO_ERROR; 1329 char cName[256]; 1330 UChar name[256]; 1331 int32_t nameSize; 1332 1333 1334 const char *locName = NULL; 1335 UCollator *coll = NULL; 1336 UCollator *UCA = ucol_open("", &status); 1337 UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status); 1338 if (U_FAILURE(status)) { 1339 log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status)); 1340 return; 1341 } 1342 ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status); 1343 1344 for(i = 0; i<noOfLoc; i++) { 1345 status = U_ZERO_ERROR; 1346 locName = uloc_getAvailable(i); 1347 if(uprv_strcmp("ja", locName) == 0) { 1348 log_verbose("Don't know how to test prefixes\n"); 1349 continue; 1350 } 1351 if(hasCollationElements(locName)) { 1352 nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status); 1353 for(j = 0; j<nameSize; j++) { 1354 cName[j] = (char)name[j]; 1355 } 1356 cName[nameSize] = 0; 1357 log_verbose("\nTesting locale %s (%s)\n", locName, cName); 1358 coll = ucol_open(locName, &status); 1359 if(U_SUCCESS(status)) { 1360 testAgainstUCA(coll, UCA, "UCA", FALSE, &status); 1361 ucol_close(coll); 1362 } else { 1363 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status)); 1364 status = U_ZERO_ERROR; 1365 } 1366 } 1367 } 1368 ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status); 1369 ucol_close(UCA); 1370 } 1371 1372 static void RamsRulesTest(void) { 1373 UErrorCode status = U_ZERO_ERROR; 1374 int32_t i = 0; 1375 UCollator *coll = NULL; 1376 UChar rule[2048]; 1377 uint32_t ruleLen; 1378 int32_t noOfLoc = uloc_countAvailable(); 1379 const char *locName = NULL; 1380 1381 log_verbose("RamsRulesTest\n"); 1382 1383 if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) { 1384 /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */ 1385 return; 1386 } 1387 1388 for(i = 0; i<noOfLoc; i++) { 1389 locName = uloc_getAvailable(i); 1390 if(hasCollationElements(locName)) { 1391 if (uprv_strcmp("ja", locName)==0) { 1392 log_verbose("Don't know how to test Japanese because of prefixes\n"); 1393 continue; 1394 } 1395 if (uprv_strcmp("de__PHONEBOOK", locName)==0) { 1396 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n"); 1397 continue; 1398 } 1399 if (uprv_strcmp("bn", locName)==0 || 1400 uprv_strcmp("en_US_POSIX", locName)==0 || 1401 uprv_strcmp("km", locName)==0 || 1402 uprv_strcmp("km_KH", locName)==0 || 1403 uprv_strcmp("my", locName)==0 || 1404 uprv_strcmp("si", locName)==0 || 1405 uprv_strcmp("si_LK", locName)==0 || 1406 uprv_strcmp("zh", locName)==0 || 1407 uprv_strcmp("zh_Hant", locName)==0 1408 ) { 1409 log_verbose("Don't know how to test %s. " 1410 "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName); 1411 continue; 1412 } 1413 log_verbose("Testing locale %s\n", locName); 1414 status = U_ZERO_ERROR; 1415 coll = ucol_open(locName, &status); 1416 if(U_SUCCESS(status)) { 1417 if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) { 1418 if(coll->image->jamoSpecial == TRUE) { 1419 log_err("%s has special JAMOs\n", locName); 1420 } 1421 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status); 1422 testCollator(coll, &status); 1423 testCEs(coll, &status); 1424 } else { 1425 log_verbose("Skipping %s: %s\n", locName, u_errorName(status)); 1426 } 1427 ucol_close(coll); 1428 } else { 1429 log_err("Could not open %s: %s\n", locName, u_errorName(status)); 1430 } 1431 } 1432 } 1433 1434 for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) { 1435 log_verbose("Testing rule: %s\n", rulesToTest[i]); 1436 ruleLen = u_unescape(rulesToTest[i], rule, 2048); 1437 status = U_ZERO_ERROR; 1438 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 1439 if(U_SUCCESS(status)) { 1440 testCollator(coll, &status); 1441 testCEs(coll, &status); 1442 ucol_close(coll); 1443 } else { 1444 log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]); 1445 } 1446 } 1447 1448 } 1449 1450 static void IsTailoredTest(void) { 1451 UErrorCode status = U_ZERO_ERROR; 1452 uint32_t i = 0; 1453 UCollator *coll = NULL; 1454 UChar rule[2048]; 1455 UChar tailored[2048]; 1456 UChar notTailored[2048]; 1457 uint32_t ruleLen, tailoredLen, notTailoredLen; 1458 1459 log_verbose("IsTailoredTest\n"); 1460 1461 u_uastrcpy(rule, "&Z < A, B, C;c < d"); 1462 ruleLen = u_strlen(rule); 1463 1464 u_uastrcpy(tailored, "ABCcd"); 1465 tailoredLen = u_strlen(tailored); 1466 1467 u_uastrcpy(notTailored, "ZabD"); 1468 notTailoredLen = u_strlen(notTailored); 1469 1470 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 1471 if(U_SUCCESS(status)) { 1472 for(i = 0; i<tailoredLen; i++) { 1473 if(!ucol_isTailored(coll, tailored[i], &status)) { 1474 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]); 1475 } 1476 } 1477 for(i = 0; i<notTailoredLen; i++) { 1478 if(ucol_isTailored(coll, notTailored[i], &status)) { 1479 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]); 1480 } 1481 } 1482 ucol_close(coll); 1483 } 1484 else { 1485 log_err_status(status, "Can't tailor rules\n"); 1486 } 1487 /* Code coverage */ 1488 status = U_ZERO_ERROR; 1489 coll = ucol_open("ja", &status); 1490 if(!ucol_isTailored(coll, 0x4E9C, &status)) { 1491 log_err_status(status, "0x4E9C should be tailored - it is reported as not\n"); 1492 } 1493 ucol_close(coll); 1494 } 1495 1496 1497 const static char chTest[][20] = { 1498 "c", 1499 "C", 1500 "ca", "cb", "cx", "cy", "CZ", 1501 "c\\u030C", "C\\u030C", 1502 "h", 1503 "H", 1504 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY", 1505 "ch", "cH", "Ch", "CH", 1506 "cha", "charly", "che", "chh", "chch", "chr", 1507 "i", "I", "iarly", 1508 "r", "R", 1509 "r\\u030C", "R\\u030C", 1510 "s", 1511 "S", 1512 "s\\u030C", "S\\u030C", 1513 "z", "Z", 1514 "z\\u030C", "Z\\u030C" 1515 }; 1516 1517 static void TestChMove(void) { 1518 UChar t1[256] = {0}; 1519 UChar t2[256] = {0}; 1520 1521 uint32_t i = 0, j = 0; 1522 uint32_t size = 0; 1523 UErrorCode status = U_ZERO_ERROR; 1524 1525 UCollator *coll = ucol_open("cs", &status); 1526 1527 if(U_SUCCESS(status)) { 1528 size = sizeof(chTest)/sizeof(chTest[0]); 1529 for(i = 0; i < size-1; i++) { 1530 for(j = i+1; j < size; j++) { 1531 u_unescape(chTest[i], t1, 256); 1532 u_unescape(chTest[j], t2, 256); 1533 doTest(coll, t1, t2, UCOL_LESS); 1534 } 1535 } 1536 } 1537 else { 1538 log_data_err("Can't open collator"); 1539 } 1540 ucol_close(coll); 1541 } 1542 1543 1544 1545 1546 const static char impTest[][20] = { 1547 "\\u4e00", 1548 "a", 1549 "A", 1550 "b", 1551 "B", 1552 "\\u4e01" 1553 }; 1554 1555 1556 static void TestImplicitTailoring(void) { 1557 static const struct { 1558 const char *rules; 1559 const char *data[10]; 1560 const uint32_t len; 1561 } tests[] = { 1562 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 }, 1563 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 }, 1564 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3}, 1565 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3} 1566 }; 1567 1568 int32_t i = 0; 1569 1570 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { 1571 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 1572 } 1573 1574 /* 1575 UChar t1[256] = {0}; 1576 UChar t2[256] = {0}; 1577 1578 const char *rule = "&\\u4e00 < a <<< A < b <<< B"; 1579 1580 uint32_t i = 0, j = 0; 1581 uint32_t size = 0; 1582 uint32_t ruleLen = 0; 1583 UErrorCode status = U_ZERO_ERROR; 1584 UCollator *coll = NULL; 1585 ruleLen = u_unescape(rule, t1, 256); 1586 1587 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status); 1588 1589 if(U_SUCCESS(status)) { 1590 size = sizeof(impTest)/sizeof(impTest[0]); 1591 for(i = 0; i < size-1; i++) { 1592 for(j = i+1; j < size; j++) { 1593 u_unescape(impTest[i], t1, 256); 1594 u_unescape(impTest[j], t2, 256); 1595 doTest(coll, t1, t2, UCOL_LESS); 1596 } 1597 } 1598 } 1599 else { 1600 log_err("Can't open collator"); 1601 } 1602 ucol_close(coll); 1603 */ 1604 } 1605 1606 static void TestFCDProblem(void) { 1607 UChar t1[256] = {0}; 1608 UChar t2[256] = {0}; 1609 1610 const char *s1 = "\\u0430\\u0306\\u0325"; 1611 const char *s2 = "\\u04D1\\u0325"; 1612 1613 UErrorCode status = U_ZERO_ERROR; 1614 UCollator *coll = ucol_open("", &status); 1615 u_unescape(s1, t1, 256); 1616 u_unescape(s2, t2, 256); 1617 1618 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status); 1619 doTest(coll, t1, t2, UCOL_EQUAL); 1620 1621 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 1622 doTest(coll, t1, t2, UCOL_EQUAL); 1623 1624 ucol_close(coll); 1625 } 1626 1627 /* 1628 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC 1629 We're only using NFC/NFD in this test. 1630 */ 1631 #define NORM_BUFFER_TEST_LEN 18 1632 typedef struct { 1633 UChar32 u; 1634 UChar NFC[NORM_BUFFER_TEST_LEN]; 1635 UChar NFD[NORM_BUFFER_TEST_LEN]; 1636 } tester; 1637 1638 static void TestComposeDecompose(void) { 1639 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */ 1640 static const UChar UNICODESET_STR[] = { 1641 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61, 1642 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72, 1643 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0 1644 }; 1645 int32_t noOfLoc; 1646 int32_t i = 0, j = 0; 1647 1648 UErrorCode status = U_ZERO_ERROR; 1649 const char *locName = NULL; 1650 uint32_t nfcSize; 1651 uint32_t nfdSize; 1652 tester **t; 1653 uint32_t noCases = 0; 1654 UCollator *coll = NULL; 1655 UChar32 u = 0; 1656 UChar comp[NORM_BUFFER_TEST_LEN]; 1657 uint32_t len = 0; 1658 UCollationElements *iter; 1659 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status); 1660 int32_t charsToTestSize; 1661 1662 noOfLoc = uloc_countAvailable(); 1663 1664 coll = ucol_open("", &status); 1665 if (U_FAILURE(status)) { 1666 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status)); 1667 return; 1668 } 1669 charsToTestSize = uset_size(charsToTest); 1670 if (charsToTestSize <= 0) { 1671 log_err("Set was zero. Missing data?\n"); 1672 return; 1673 } 1674 t = malloc(charsToTestSize * sizeof(tester *)); 1675 t[0] = (tester *)malloc(sizeof(tester)); 1676 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize); 1677 1678 for(u = 0; u < charsToTestSize; u++) { 1679 UChar32 ch = uset_charAt(charsToTest, u); 1680 len = 0; 1681 UTF_APPEND_CHAR_UNSAFE(comp, len, ch); 1682 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status); 1683 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status); 1684 1685 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0) 1686 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) { 1687 t[noCases]->u = ch; 1688 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) { 1689 u_strncpy(t[noCases]->NFC, comp, len); 1690 t[noCases]->NFC[len] = 0; 1691 } 1692 noCases++; 1693 t[noCases] = (tester *)malloc(sizeof(tester)); 1694 uprv_memset(t[noCases], 0, sizeof(tester)); 1695 } 1696 } 1697 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize); 1698 uset_close(charsToTest); 1699 charsToTest = NULL; 1700 1701 for(u=0; u<(UChar32)noCases; u++) { 1702 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) { 1703 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u); 1704 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL); 1705 } 1706 } 1707 /* 1708 for(u = 0; u < charsToTestSize; u++) { 1709 if(!(u&0xFFFF)) { 1710 log_verbose("%08X ", u); 1711 } 1712 uprv_memset(t[noCases], 0, sizeof(tester)); 1713 t[noCases]->u = u; 1714 len = 0; 1715 UTF_APPEND_CHAR_UNSAFE(comp, len, u); 1716 comp[len] = 0; 1717 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status); 1718 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status); 1719 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL); 1720 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL); 1721 } 1722 */ 1723 1724 ucol_close(coll); 1725 1726 log_verbose("Testing locales, number of cases = %i\n", noCases); 1727 for(i = 0; i<noOfLoc; i++) { 1728 status = U_ZERO_ERROR; 1729 locName = uloc_getAvailable(i); 1730 if(hasCollationElements(locName)) { 1731 char cName[256]; 1732 UChar name[256]; 1733 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status); 1734 1735 for(j = 0; j<nameSize; j++) { 1736 cName[j] = (char)name[j]; 1737 } 1738 cName[nameSize] = 0; 1739 log_verbose("\nTesting locale %s (%s)\n", locName, cName); 1740 1741 coll = ucol_open(locName, &status); 1742 ucol_setStrength(coll, UCOL_IDENTICAL); 1743 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status); 1744 1745 for(u=0; u<(UChar32)noCases; u++) { 1746 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) { 1747 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName); 1748 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL); 1749 log_verbose("Testing NFC\n"); 1750 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status); 1751 backAndForth(iter); 1752 log_verbose("Testing NFD\n"); 1753 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status); 1754 backAndForth(iter); 1755 } 1756 } 1757 ucol_closeElements(iter); 1758 ucol_close(coll); 1759 } 1760 } 1761 for(u = 0; u <= (UChar32)noCases; u++) { 1762 free(t[u]); 1763 } 1764 free(t); 1765 } 1766 1767 static void TestEmptyRule(void) { 1768 UErrorCode status = U_ZERO_ERROR; 1769 UChar rulez[] = { 0 }; 1770 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status); 1771 1772 ucol_close(coll); 1773 } 1774 1775 static void TestUCARules(void) { 1776 UErrorCode status = U_ZERO_ERROR; 1777 UChar b[256]; 1778 UChar *rules = b; 1779 uint32_t ruleLen = 0; 1780 UCollator *UCAfromRules = NULL; 1781 UCollator *coll = ucol_open("", &status); 1782 if(status == U_FILE_ACCESS_ERROR) { 1783 log_data_err("Is your data around?\n"); 1784 return; 1785 } else if(U_FAILURE(status)) { 1786 log_err("Error opening collator\n"); 1787 return; 1788 } 1789 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256); 1790 1791 log_verbose("TestUCARules\n"); 1792 if(ruleLen > 256) { 1793 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar)); 1794 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen); 1795 } 1796 log_verbose("Rules length is %d\n", ruleLen); 1797 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 1798 if(U_SUCCESS(status)) { 1799 ucol_close(UCAfromRules); 1800 } else { 1801 log_verbose("Unable to create a collator from UCARules!\n"); 1802 } 1803 /* 1804 u_unescape(blah, b, 256); 1805 ucol_getSortKey(coll, b, 1, res, 256); 1806 */ 1807 ucol_close(coll); 1808 if(rules != b) { 1809 free(rules); 1810 } 1811 } 1812 1813 1814 /* Pinyin tonal order */ 1815 /* 1816 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0) 1817 (w/macron)< (w/acute)< (w/caron)< (w/grave) 1818 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8) 1819 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec) 1820 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2) 1821 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9) 1822 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) < 1823 .. (\u00fc) 1824 1825 However, in testing we got the following order: 1826 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101) 1827 (w/acute)< (w/grave)< (w/caron)< (w/macron) 1828 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) < 1829 .. (\u0113) 1830 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b) 1831 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d) 1832 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) < 1833 .. (\u01d8) 1834 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b) 1835 */ 1836 1837 static void TestBefore(void) { 1838 const static char *data[] = { 1839 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A", 1840 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E", 1841 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I", 1842 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O", 1843 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U", 1844 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc" 1845 }; 1846 genericRulesStarter( 1847 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0" 1848 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8" 1849 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec" 1850 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2" 1851 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9" 1852 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc", 1853 data, sizeof(data)/sizeof(data[0])); 1854 } 1855 1856 #if 0 1857 /* superceded by TestBeforePinyin */ 1858 static void TestJ784(void) { 1859 const static char *data[] = { 1860 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", 1861 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", 1862 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", 1863 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", 1864 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", 1865 "\\u00fc", 1866 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc" 1867 }; 1868 genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0])); 1869 } 1870 #endif 1871 1872 #if 0 1873 /* superceded by the changes to the lv locale */ 1874 static void TestJ831(void) { 1875 const static char *data[] = { 1876 "I", 1877 "i", 1878 "Y", 1879 "y" 1880 }; 1881 genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0])); 1882 } 1883 #endif 1884 1885 static void TestJ815(void) { 1886 const static char *data[] = { 1887 "aa", 1888 "Aa", 1889 "ab", 1890 "Ab", 1891 "ad", 1892 "Ad", 1893 "ae", 1894 "Ae", 1895 "\\u00e6", 1896 "\\u00c6", 1897 "af", 1898 "Af", 1899 "b", 1900 "B" 1901 }; 1902 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0])); 1903 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0])); 1904 } 1905 1906 1907 /* 1908 "& a < b < c < d& r < c", "& a < b < d& r < c", 1909 "& a < b < c < d& c < m", "& a < b < c < m < d", 1910 "& a < b < c < d& a < m", "& a < m < b < c < d", 1911 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d", 1912 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d", 1913 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e", 1914 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e", 1915 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e", 1916 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g", 1917 */ 1918 static void TestRedundantRules(void) { 1919 int32_t i; 1920 1921 static const struct { 1922 const char *rules; 1923 const char *expectedRules; 1924 const char *testdata[8]; 1925 uint32_t testdatalen; 1926 } tests[] = { 1927 /* this test conflicts with positioning of CODAN placeholder */ 1928 /*{ 1929 "& a <<< b <<< c << d <<< e& [before 1] e <<< x", 1930 "&\\u2089<<<x", 1931 {"\\u2089", "x"}, 2 1932 }, */ 1933 /* this test conflicts with the [before x] syntax tightening */ 1934 /*{ 1935 "& b <<< c <<< d << e <<< f& [before 1] f <<< x", 1936 "&\\u0252<<<x", 1937 {"\\u0252", "x"}, 2 1938 }, */ 1939 /* this test conflicts with the [before x] syntax tightening */ 1940 /*{ 1941 "& a < b <<< c << d <<< e& [before 1] e <<< x", 1942 "& a <<< x < b <<< c << d <<< e", 1943 {"a", "x", "b", "c", "d", "e"}, 6 1944 }, */ 1945 { 1946 "& a < b < c < d& [before 1] c < m", 1947 "& a < b < m < c < d", 1948 {"a", "b", "m", "c", "d"}, 5 1949 }, 1950 { 1951 "& a < b <<< c << d <<< e& [before 3] e <<< x", 1952 "& a < b <<< c << d <<< x <<< e", 1953 {"a", "b", "c", "d", "x", "e"}, 6 1954 }, 1955 /* this test conflicts with the [before x] syntax tightening */ 1956 /* { 1957 "& a < b <<< c << d <<< e& [before 2] e <<< x", 1958 "& a < b <<< c <<< x << d <<< e", 1959 {"a", "b", "c", "x", "d", "e"},, 6 1960 }, */ 1961 { 1962 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", 1963 "& a < b <<< c << d <<< e <<< f < x < g", 1964 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8 1965 }, 1966 { 1967 "& a <<< b << c < d& a < m", 1968 "& a <<< b << c < m < d", 1969 {"a", "b", "c", "m", "d"}, 5 1970 }, 1971 { 1972 "&a<b<<b\\u0301 &z<b", 1973 "&a<b\\u0301 &z<b", 1974 {"a", "b\\u0301", "z", "b"}, 4 1975 }, 1976 { 1977 "&z<m<<<q<<<m", 1978 "&z<q<<<m", 1979 {"z", "q", "m"},3 1980 }, 1981 { 1982 "&z<<<m<q<<<m", 1983 "&z<q<<<m", 1984 {"z", "q", "m"}, 3 1985 }, 1986 { 1987 "& a < b < c < d& r < c", 1988 "& a < b < d& r < c", 1989 {"a", "b", "d"}, 3 1990 }, 1991 { 1992 "& a < b < c < d& r < c", 1993 "& a < b < d& r < c", 1994 {"r", "c"}, 2 1995 }, 1996 { 1997 "& a < b < c < d& c < m", 1998 "& a < b < c < m < d", 1999 {"a", "b", "c", "m", "d"}, 5 2000 }, 2001 { 2002 "& a < b < c < d& a < m", 2003 "& a < m < b < c < d", 2004 {"a", "m", "b", "c", "d"}, 5 2005 } 2006 }; 2007 2008 2009 UCollator *credundant = NULL; 2010 UCollator *cresulting = NULL; 2011 UErrorCode status = U_ZERO_ERROR; 2012 UChar rlz[2048] = { 0 }; 2013 uint32_t rlen = 0; 2014 2015 for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) { 2016 log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules); 2017 rlen = u_unescape(tests[i].rules, rlz, 2048); 2018 2019 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status); 2020 if(status == U_FILE_ACCESS_ERROR) { 2021 log_data_err("Is your data around?\n"); 2022 return; 2023 } else if(U_FAILURE(status)) { 2024 log_err("Error opening collator\n"); 2025 return; 2026 } 2027 2028 rlen = u_unescape(tests[i].expectedRules, rlz, 2048); 2029 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status); 2030 2031 testAgainstUCA(cresulting, credundant, "expected", TRUE, &status); 2032 2033 ucol_close(credundant); 2034 ucol_close(cresulting); 2035 2036 log_verbose("testing using data\n"); 2037 2038 genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen); 2039 } 2040 2041 } 2042 2043 static void TestExpansionSyntax(void) { 2044 int32_t i; 2045 2046 const static char *rules[] = { 2047 "&AE <<< a << b <<< c &d <<< f", 2048 "&AE <<< a <<< b << c << d < e < f <<< g", 2049 "&AE <<< B <<< C / D <<< F" 2050 }; 2051 2052 const static char *expectedRules[] = { 2053 "&A <<< a / E << b / E <<< c /E &d <<< f", 2054 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g", 2055 "&A <<< B / E <<< C / ED <<< F / E" 2056 }; 2057 2058 const static char *testdata[][8] = { 2059 {"AE", "a", "b", "c"}, 2060 {"AE", "a", "b", "c", "d", "e", "f", "g"}, 2061 {"AE", "B", "C"} /* / ED <<< F / E"},*/ 2062 }; 2063 2064 const static uint32_t testdatalen[] = { 2065 4, 2066 8, 2067 3 2068 }; 2069 2070 2071 2072 UCollator *credundant = NULL; 2073 UCollator *cresulting = NULL; 2074 UErrorCode status = U_ZERO_ERROR; 2075 UChar rlz[2048] = { 0 }; 2076 uint32_t rlen = 0; 2077 2078 for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) { 2079 log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]); 2080 rlen = u_unescape(rules[i], rlz, 2048); 2081 2082 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 2083 if(status == U_FILE_ACCESS_ERROR) { 2084 log_data_err("Is your data around?\n"); 2085 return; 2086 } else if(U_FAILURE(status)) { 2087 log_err("Error opening collator\n"); 2088 return; 2089 } 2090 rlen = u_unescape(expectedRules[i], rlz, 2048); 2091 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status); 2092 2093 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */ 2094 /* as a hard error test, but only in information mode */ 2095 testAgainstUCA(cresulting, credundant, "expected", FALSE, &status); 2096 2097 ucol_close(credundant); 2098 ucol_close(cresulting); 2099 2100 log_verbose("testing using data\n"); 2101 2102 genericRulesStarter(rules[i], testdata[i], testdatalen[i]); 2103 } 2104 } 2105 2106 static void TestCase(void) 2107 { 2108 const static UChar gRules[MAX_TOKEN_LEN] = 2109 /*" & 0 < 1,\u2461<a,A"*/ 2110 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 }; 2111 2112 const static UChar testCase[][MAX_TOKEN_LEN] = 2113 { 2114 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000}, 2115 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000}, 2116 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000}, 2117 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000} 2118 }; 2119 2120 const static UCollationResult caseTestResults[][9] = 2121 { 2122 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS }, 2123 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }, 2124 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS }, 2125 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER } 2126 }; 2127 2128 const static UColAttributeValue caseTestAttributes[][2] = 2129 { 2130 { UCOL_LOWER_FIRST, UCOL_OFF}, 2131 { UCOL_UPPER_FIRST, UCOL_OFF}, 2132 { UCOL_LOWER_FIRST, UCOL_ON}, 2133 { UCOL_UPPER_FIRST, UCOL_ON} 2134 }; 2135 int32_t i,j,k; 2136 UErrorCode status = U_ZERO_ERROR; 2137 UCollationElements *iter; 2138 UCollator *myCollation; 2139 myCollation = ucol_open("en_US", &status); 2140 2141 if(U_FAILURE(status)){ 2142 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 2143 return; 2144 } 2145 log_verbose("Testing different case settings\n"); 2146 ucol_setStrength(myCollation, UCOL_TERTIARY); 2147 2148 for(k = 0; k<4; k++) { 2149 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status); 2150 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status); 2151 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]); 2152 for (i = 0; i < 3 ; i++) { 2153 for(j = i+1; j<4; j++) { 2154 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]); 2155 } 2156 } 2157 } 2158 ucol_close(myCollation); 2159 2160 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status); 2161 if(U_FAILURE(status)){ 2162 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 2163 return; 2164 } 2165 log_verbose("Testing different case settings with custom rules\n"); 2166 ucol_setStrength(myCollation, UCOL_TERTIARY); 2167 2168 for(k = 0; k<4; k++) { 2169 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status); 2170 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status); 2171 for (i = 0; i < 3 ; i++) { 2172 for(j = i+1; j<4; j++) { 2173 log_verbose("k:%d, i:%d, j:%d\n", k, i, j); 2174 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]); 2175 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status); 2176 backAndForth(iter); 2177 ucol_closeElements(iter); 2178 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status); 2179 backAndForth(iter); 2180 ucol_closeElements(iter); 2181 } 2182 } 2183 } 2184 ucol_close(myCollation); 2185 { 2186 const static char *lowerFirst[] = { 2187 "h", 2188 "H", 2189 "ch", 2190 "Ch", 2191 "CH", 2192 "cha", 2193 "chA", 2194 "Cha", 2195 "ChA", 2196 "CHa", 2197 "CHA", 2198 "i", 2199 "I" 2200 }; 2201 2202 const static char *upperFirst[] = { 2203 "H", 2204 "h", 2205 "CH", 2206 "Ch", 2207 "ch", 2208 "CHA", 2209 "CHa", 2210 "ChA", 2211 "Cha", 2212 "chA", 2213 "cha", 2214 "I", 2215 "i" 2216 }; 2217 log_verbose("mixed case test\n"); 2218 log_verbose("lower first, case level off\n"); 2219 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); 2220 log_verbose("upper first, case level off\n"); 2221 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); 2222 log_verbose("lower first, case level on\n"); 2223 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); 2224 log_verbose("upper first, case level on\n"); 2225 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); 2226 } 2227 2228 } 2229 2230 static void TestIncrementalNormalize(void) { 2231 2232 /*UChar baseA =0x61;*/ 2233 UChar baseA =0x41; 2234 /* UChar baseB = 0x42;*/ 2235 static const UChar ccMix[] = {0x316, 0x321, 0x300}; 2236 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/ 2237 /* 2238 0x316 is combining grave accent below, cc=220 2239 0x321 is combining palatalized hook below, cc=202 2240 0x300 is combining grave accent, cc=230 2241 */ 2242 2243 #define MAXSLEN 2000 2244 /*int maxSLen = 64000;*/ 2245 int sLen; 2246 int i; 2247 2248 UCollator *coll; 2249 UErrorCode status = U_ZERO_ERROR; 2250 UCollationResult result; 2251 2252 int32_t myQ = getTestOption(QUICK_OPTION); 2253 2254 if(getTestOption(QUICK_OPTION) < 0) { 2255 setTestOption(QUICK_OPTION, 1); 2256 } 2257 2258 { 2259 /* Test 1. Run very long unnormalized strings, to force overflow of*/ 2260 /* most buffers along the way.*/ 2261 UChar strA[MAXSLEN+1]; 2262 UChar strB[MAXSLEN+1]; 2263 2264 coll = ucol_open("en_US", &status); 2265 if(status == U_FILE_ACCESS_ERROR) { 2266 log_data_err("Is your data around?\n"); 2267 return; 2268 } else if(U_FAILURE(status)) { 2269 log_err("Error opening collator\n"); 2270 return; 2271 } 2272 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 2273 2274 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/ 2275 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/ 2276 /*for (sLen = 1000; sLen<1001; sLen++) {*/ 2277 for (sLen = 500; sLen<501; sLen++) { 2278 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/ 2279 strA[0] = baseA; 2280 strB[0] = baseA; 2281 for (i=1; i<=sLen-1; i++) { 2282 strA[i] = ccMix[i % 3]; 2283 strB[sLen-i] = ccMix[i % 3]; 2284 } 2285 strA[sLen] = 0; 2286 strB[sLen] = 0; 2287 2288 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/ 2289 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/ 2290 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/ 2291 doTest(coll, strA, strB, UCOL_EQUAL); 2292 } 2293 } 2294 2295 setTestOption(QUICK_OPTION, myQ); 2296 2297 2298 /* Test 2: Non-normal sequence in a string that extends to the last character*/ 2299 /* of the string. Checks a couple of edge cases.*/ 2300 2301 { 2302 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0}; 2303 static const UChar strB[] = {0x41, 0xc0, 0x316, 0}; 2304 ucol_setStrength(coll, UCOL_TERTIARY); 2305 doTest(coll, strA, strB, UCOL_EQUAL); 2306 } 2307 2308 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/ 2309 2310 { 2311 /* New UCA 3.1.1. 2312 * test below used a code point from Desseret, which sorts differently 2313 * than d800 dc00 2314 */ 2315 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/ 2316 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0}; 2317 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0}; 2318 ucol_setStrength(coll, UCOL_TERTIARY); 2319 doTest(coll, strA, strB, UCOL_GREATER); 2320 } 2321 2322 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/ 2323 2324 { 2325 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00}; 2326 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00}; 2327 char sortKeyA[50]; 2328 char sortKeyAz[50]; 2329 char sortKeyB[50]; 2330 char sortKeyBz[50]; 2331 int r; 2332 2333 /* there used to be -3 here. Hmmmm.... */ 2334 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/ 2335 result = ucol_strcoll(coll, strA, 3, strB, 3); 2336 if (result != UCOL_GREATER) { 2337 log_err("ERROR 1 in test 4\n"); 2338 } 2339 result = ucol_strcoll(coll, strA, -1, strB, -1); 2340 if (result != UCOL_EQUAL) { 2341 log_err("ERROR 2 in test 4\n"); 2342 } 2343 2344 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 2345 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 2346 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 2347 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 2348 2349 r = strcmp(sortKeyA, sortKeyAz); 2350 if (r <= 0) { 2351 log_err("Error 3 in test 4\n"); 2352 } 2353 r = strcmp(sortKeyA, sortKeyB); 2354 if (r <= 0) { 2355 log_err("Error 4 in test 4\n"); 2356 } 2357 r = strcmp(sortKeyAz, sortKeyBz); 2358 if (r != 0) { 2359 log_err("Error 5 in test 4\n"); 2360 } 2361 2362 ucol_setStrength(coll, UCOL_IDENTICAL); 2363 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 2364 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 2365 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 2366 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 2367 2368 r = strcmp(sortKeyA, sortKeyAz); 2369 if (r <= 0) { 2370 log_err("Error 6 in test 4\n"); 2371 } 2372 r = strcmp(sortKeyA, sortKeyB); 2373 if (r <= 0) { 2374 log_err("Error 7 in test 4\n"); 2375 } 2376 r = strcmp(sortKeyAz, sortKeyBz); 2377 if (r != 0) { 2378 log_err("Error 8 in test 4\n"); 2379 } 2380 ucol_setStrength(coll, UCOL_TERTIARY); 2381 } 2382 2383 2384 /* Test 5: Null characters in non-normal source strings.*/ 2385 2386 { 2387 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00}; 2388 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00}; 2389 char sortKeyA[50]; 2390 char sortKeyAz[50]; 2391 char sortKeyB[50]; 2392 char sortKeyBz[50]; 2393 int r; 2394 2395 result = ucol_strcoll(coll, strA, 6, strB, 6); 2396 if (result != UCOL_GREATER) { 2397 log_err("ERROR 1 in test 5\n"); 2398 } 2399 result = ucol_strcoll(coll, strA, -1, strB, -1); 2400 if (result != UCOL_EQUAL) { 2401 log_err("ERROR 2 in test 5\n"); 2402 } 2403 2404 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 2405 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 2406 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 2407 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 2408 2409 r = strcmp(sortKeyA, sortKeyAz); 2410 if (r <= 0) { 2411 log_err("Error 3 in test 5\n"); 2412 } 2413 r = strcmp(sortKeyA, sortKeyB); 2414 if (r <= 0) { 2415 log_err("Error 4 in test 5\n"); 2416 } 2417 r = strcmp(sortKeyAz, sortKeyBz); 2418 if (r != 0) { 2419 log_err("Error 5 in test 5\n"); 2420 } 2421 2422 ucol_setStrength(coll, UCOL_IDENTICAL); 2423 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 2424 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 2425 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 2426 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 2427 2428 r = strcmp(sortKeyA, sortKeyAz); 2429 if (r <= 0) { 2430 log_err("Error 6 in test 5\n"); 2431 } 2432 r = strcmp(sortKeyA, sortKeyB); 2433 if (r <= 0) { 2434 log_err("Error 7 in test 5\n"); 2435 } 2436 r = strcmp(sortKeyAz, sortKeyBz); 2437 if (r != 0) { 2438 log_err("Error 8 in test 5\n"); 2439 } 2440 ucol_setStrength(coll, UCOL_TERTIARY); 2441 } 2442 2443 2444 /* Test 6: Null character as base of a non-normal combining sequence.*/ 2445 2446 { 2447 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00}; 2448 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00}; 2449 2450 result = ucol_strcoll(coll, strA, 5, strB, 5); 2451 if (result != UCOL_LESS) { 2452 log_err("Error 1 in test 6\n"); 2453 } 2454 result = ucol_strcoll(coll, strA, -1, strB, -1); 2455 if (result != UCOL_EQUAL) { 2456 log_err("Error 2 in test 6\n"); 2457 } 2458 } 2459 2460 ucol_close(coll); 2461 } 2462 2463 2464 2465 #if 0 2466 static void TestGetCaseBit(void) { 2467 static const char *caseBitData[] = { 2468 "a", "A", "ch", "Ch", "CH", 2469 "\\uFF9E", "\\u0009" 2470 }; 2471 2472 static const uint8_t results[] = { 2473 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE, 2474 UCOL_UPPER_CASE, UCOL_LOWER_CASE 2475 }; 2476 2477 uint32_t i, blen = 0; 2478 UChar b[256] = {0}; 2479 UErrorCode status = U_ZERO_ERROR; 2480 UCollator *UCA = ucol_open("", &status); 2481 uint8_t res = 0; 2482 2483 for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) { 2484 blen = u_unescape(caseBitData[i], b, 256); 2485 res = ucol_uprv_getCaseBits(UCA, b, blen, &status); 2486 if(results[i] != res) { 2487 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]); 2488 } 2489 } 2490 } 2491 #endif 2492 2493 static void TestHangulTailoring(void) { 2494 static const char *koreanData[] = { 2495 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475", 2496 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef", 2497 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888", 2498 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5", 2499 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E", 2500 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C" 2501 }; 2502 2503 const char *rules = 2504 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 " 2505 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef " 2506 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 " 2507 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 " 2508 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E " 2509 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C"; 2510 2511 2512 UErrorCode status = U_ZERO_ERROR; 2513 UChar rlz[2048] = { 0 }; 2514 uint32_t rlen = u_unescape(rules, rlz, 2048); 2515 2516 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); 2517 if(status == U_FILE_ACCESS_ERROR) { 2518 log_data_err("Is your data around?\n"); 2519 return; 2520 } else if(U_FAILURE(status)) { 2521 log_err("Error opening collator\n"); 2522 return; 2523 } 2524 2525 log_verbose("Using start of korean rules\n"); 2526 2527 if(U_SUCCESS(status)) { 2528 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0])); 2529 } else { 2530 log_err("Unable to open collator with rules %s\n", rules); 2531 } 2532 2533 log_verbose("Setting jamoSpecial to TRUE and testing once more\n"); 2534 ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home */ 2535 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0])); 2536 2537 ucol_close(coll); 2538 2539 log_verbose("Using ko__LOTUS locale\n"); 2540 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0])); 2541 } 2542 2543 static void TestCompressOverlap(void) { 2544 UChar secstr[150]; 2545 UChar tertstr[150]; 2546 UErrorCode status = U_ZERO_ERROR; 2547 UCollator *coll; 2548 char result[200]; 2549 uint32_t resultlen; 2550 int count = 0; 2551 char *tempptr; 2552 2553 coll = ucol_open("", &status); 2554 2555 if (U_FAILURE(status)) { 2556 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status)); 2557 return; 2558 } 2559 while (count < 149) { 2560 secstr[count] = 0x0020; /* [06, 05, 05] */ 2561 tertstr[count] = 0x0020; 2562 count ++; 2563 } 2564 2565 /* top down compression ----------------------------------- */ 2566 secstr[count] = 0x0332; /* [, 87, 05] */ 2567 tertstr[count] = 0x3000; /* [06, 05, 07] */ 2568 2569 /* no compression secstr should have 150 secondary bytes, tertstr should 2570 have 150 tertiary bytes. 2571 with correct overlapping compression, secstr should have 4 secondary 2572 bytes, tertstr should have > 2 tertiary bytes */ 2573 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250); 2574 tempptr = uprv_strchr(result, 1) + 1; 2575 while (*(tempptr + 1) != 1) { 2576 /* the last secondary collation element is not checked since it is not 2577 part of the compression */ 2578 if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) { 2579 log_err("Secondary compression overlapped\n"); 2580 } 2581 tempptr ++; 2582 } 2583 2584 /* tertiary top/bottom/common for en_US is similar to the secondary 2585 top/bottom/common */ 2586 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250); 2587 tempptr = uprv_strrchr(result, 1) + 1; 2588 while (*(tempptr + 1) != 0) { 2589 /* the last secondary collation element is not checked since it is not 2590 part of the compression */ 2591 if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) { 2592 log_err("Tertiary compression overlapped\n"); 2593 } 2594 tempptr ++; 2595 } 2596 2597 /* bottom up compression ------------------------------------- */ 2598 secstr[count] = 0; 2599 tertstr[count] = 0; 2600 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250); 2601 tempptr = uprv_strchr(result, 1) + 1; 2602 while (*(tempptr + 1) != 1) { 2603 /* the last secondary collation element is not checked since it is not 2604 part of the compression */ 2605 if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) { 2606 log_err("Secondary compression overlapped\n"); 2607 } 2608 tempptr ++; 2609 } 2610 2611 /* tertiary top/bottom/common for en_US is similar to the secondary 2612 top/bottom/common */ 2613 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250); 2614 tempptr = uprv_strrchr(result, 1) + 1; 2615 while (*(tempptr + 1) != 0) { 2616 /* the last secondary collation element is not checked since it is not 2617 part of the compression */ 2618 if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) { 2619 log_err("Tertiary compression overlapped\n"); 2620 } 2621 tempptr ++; 2622 } 2623 2624 ucol_close(coll); 2625 } 2626 2627 static void TestCyrillicTailoring(void) { 2628 static const char *test[] = { 2629 "\\u0410b", 2630 "\\u0410\\u0306a", 2631 "\\u04d0A" 2632 }; 2633 2634 /* Russian overrides contractions, so this test is not valid anymore */ 2635 /*genericLocaleStarter("ru", test, 3);*/ 2636 2637 genericLocaleStarter("root", test, 3); 2638 genericRulesStarter("&\\u0410 = \\u0410", test, 3); 2639 genericRulesStarter("&Z < \\u0410", test, 3); 2640 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3); 2641 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3); 2642 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3); 2643 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3); 2644 } 2645 2646 static void TestSuppressContractions(void) { 2647 2648 static const char *testNoCont2[] = { 2649 "\\u0410\\u0302a", 2650 "\\u0410\\u0306b", 2651 "\\u0410c" 2652 }; 2653 static const char *testNoCont[] = { 2654 "a\\u0410", 2655 "A\\u0410\\u0306", 2656 "\\uFF21\\u0410\\u0302" 2657 }; 2658 2659 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3); 2660 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3); 2661 } 2662 2663 static void TestContraction(void) { 2664 const static char *testrules[] = { 2665 "&A = AB / B", 2666 "&A = A\\u0306/\\u0306", 2667 "&c = ch / h" 2668 }; 2669 const static UChar testdata[][2] = { 2670 {0x0041 /* 'A' */, 0x0042 /* 'B' */}, 2671 {0x0041 /* 'A' */, 0x0306 /* combining breve */}, 2672 {0x0063 /* 'c' */, 0x0068 /* 'h' */} 2673 }; 2674 const static UChar testdata2[][2] = { 2675 {0x0063 /* 'c' */, 0x0067 /* 'g' */}, 2676 {0x0063 /* 'c' */, 0x0068 /* 'h' */}, 2677 {0x0063 /* 'c' */, 0x006C /* 'l' */} 2678 }; 2679 const static char *testrules3[] = { 2680 "&z < xyz &xyzw << B", 2681 "&z < xyz &xyz << B / w", 2682 "&z < ch &achm << B", 2683 "&z < ch &a << B / chm", 2684 "&\\ud800\\udc00w << B", 2685 "&\\ud800\\udc00 << B / w", 2686 "&a\\ud800\\udc00m << B", 2687 "&a << B / \\ud800\\udc00m", 2688 }; 2689 2690 UErrorCode status = U_ZERO_ERROR; 2691 UCollator *coll; 2692 UChar rule[256] = {0}; 2693 uint32_t rlen = 0; 2694 int i; 2695 2696 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) { 2697 UCollationElements *iter1; 2698 int j = 0; 2699 log_verbose("Rule %s for testing\n", testrules[i]); 2700 rlen = u_unescape(testrules[i], rule, 32); 2701 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2702 if (U_FAILURE(status)) { 2703 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status)); 2704 return; 2705 } 2706 iter1 = ucol_openElements(coll, testdata[i], 2, &status); 2707 if (U_FAILURE(status)) { 2708 log_err("Collation iterator creation failed\n"); 2709 return; 2710 } 2711 while (j < 2) { 2712 UCollationElements *iter2 = ucol_openElements(coll, 2713 &(testdata[i][j]), 2714 1, &status); 2715 uint32_t ce; 2716 if (U_FAILURE(status)) { 2717 log_err("Collation iterator creation failed\n"); 2718 return; 2719 } 2720 ce = ucol_next(iter2, &status); 2721 while (ce != UCOL_NULLORDER) { 2722 if ((uint32_t)ucol_next(iter1, &status) != ce) { 2723 log_err("Collation elements in contraction split does not match\n"); 2724 return; 2725 } 2726 ce = ucol_next(iter2, &status); 2727 } 2728 j ++; 2729 ucol_closeElements(iter2); 2730 } 2731 if (ucol_next(iter1, &status) != UCOL_NULLORDER) { 2732 log_err("Collation elements not exhausted\n"); 2733 return; 2734 } 2735 ucol_closeElements(iter1); 2736 ucol_close(coll); 2737 } 2738 2739 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256); 2740 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2741 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) { 2742 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n", 2743 testdata2[0][0], testdata2[0][1], testdata2[1][0], 2744 testdata2[1][1]); 2745 return; 2746 } 2747 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) { 2748 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n", 2749 testdata2[1][0], testdata2[1][1], testdata2[2][0], 2750 testdata2[2][1]); 2751 return; 2752 } 2753 ucol_close(coll); 2754 2755 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) { 2756 UCollator *coll1, 2757 *coll2; 2758 UCollationElements *iter1, 2759 *iter2; 2760 UChar ch = 0x0042 /* 'B' */; 2761 uint32_t ce; 2762 rlen = u_unescape(testrules3[i], rule, 32); 2763 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2764 rlen = u_unescape(testrules3[i + 1], rule, 32); 2765 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2766 if (U_FAILURE(status)) { 2767 log_err("Collator creation failed %s\n", testrules[i]); 2768 return; 2769 } 2770 iter1 = ucol_openElements(coll1, &ch, 1, &status); 2771 iter2 = ucol_openElements(coll2, &ch, 1, &status); 2772 if (U_FAILURE(status)) { 2773 log_err("Collation iterator creation failed\n"); 2774 return; 2775 } 2776 ce = ucol_next(iter1, &status); 2777 if (U_FAILURE(status)) { 2778 log_err("Retrieving ces failed\n"); 2779 return; 2780 } 2781 while (ce != UCOL_NULLORDER) { 2782 if (ce != (uint32_t)ucol_next(iter2, &status)) { 2783 log_err("CEs does not match\n"); 2784 return; 2785 } 2786 ce = ucol_next(iter1, &status); 2787 if (U_FAILURE(status)) { 2788 log_err("Retrieving ces failed\n"); 2789 return; 2790 } 2791 } 2792 if (ucol_next(iter2, &status) != UCOL_NULLORDER) { 2793 log_err("CEs not exhausted\n"); 2794 return; 2795 } 2796 ucol_closeElements(iter1); 2797 ucol_closeElements(iter2); 2798 ucol_close(coll1); 2799 ucol_close(coll2); 2800 } 2801 } 2802 2803 static void TestExpansion(void) { 2804 const static char *testrules[] = { 2805 "&J << K / B & K << M", 2806 "&J << K / B << M" 2807 }; 2808 const static UChar testdata[][3] = { 2809 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0}, 2810 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0}, 2811 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0}, 2812 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0}, 2813 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0}, 2814 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0} 2815 }; 2816 2817 UErrorCode status = U_ZERO_ERROR; 2818 UCollator *coll; 2819 UChar rule[256] = {0}; 2820 uint32_t rlen = 0; 2821 int i; 2822 2823 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) { 2824 int j = 0; 2825 log_verbose("Rule %s for testing\n", testrules[i]); 2826 rlen = u_unescape(testrules[i], rule, 32); 2827 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2828 if (U_FAILURE(status)) { 2829 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status)); 2830 return; 2831 } 2832 2833 for (j = 0; j < 5; j ++) { 2834 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS); 2835 } 2836 ucol_close(coll); 2837 } 2838 } 2839 2840 #if 0 2841 /* this test tests the current limitations of the engine */ 2842 /* it always fail, so it is disabled by default */ 2843 static void TestLimitations(void) { 2844 /* recursive expansions */ 2845 { 2846 static const char *rule = "&a=b/c&d=c/e"; 2847 static const char *tlimit01[] = {"add","b","adf"}; 2848 static const char *tlimit02[] = {"aa","b","af"}; 2849 log_verbose("recursive expansions\n"); 2850 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0])); 2851 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0])); 2852 } 2853 /* contractions spanning expansions */ 2854 { 2855 static const char *rule = "&a<<<c/e&g<<<eh"; 2856 static const char *tlimit01[] = {"ad","c","af","f","ch","h"}; 2857 static const char *tlimit02[] = {"ad","c","ch","af","f","h"}; 2858 log_verbose("contractions spanning expansions\n"); 2859 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0])); 2860 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0])); 2861 } 2862 /* normalization: nulls in contractions */ 2863 { 2864 static const char *rule = "&a<<<\\u0000\\u0302"; 2865 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"}; 2866 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"}; 2867 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE }; 2868 static const UColAttributeValue valOn[] = { UCOL_ON }; 2869 static const UColAttributeValue valOff[] = { UCOL_OFF }; 2870 2871 log_verbose("NULL in contractions\n"); 2872 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1); 2873 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1); 2874 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1); 2875 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1); 2876 2877 } 2878 /* normalization: contractions spanning normalization */ 2879 { 2880 static const char *rule = "&a<<<\\u0000\\u0302"; 2881 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"}; 2882 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"}; 2883 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE }; 2884 static const UColAttributeValue valOn[] = { UCOL_ON }; 2885 static const UColAttributeValue valOff[] = { UCOL_OFF }; 2886 2887 log_verbose("contractions spanning normalization\n"); 2888 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1); 2889 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1); 2890 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1); 2891 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1); 2892 2893 } 2894 /* variable top: */ 2895 { 2896 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/ 2897 static const char *rule = "&\\u2010<x<[variable top]=z"; 2898 /*static const char *rule3 = "&' '<x<[variable top]=z";*/ 2899 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" }; 2900 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"}; 2901 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" }; 2902 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH }; 2903 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY }; 2904 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY }; 2905 2906 log_verbose("variable top\n"); 2907 genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2908 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2909 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2910 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0])); 2911 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0])); 2912 2913 } 2914 /* case level */ 2915 { 2916 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH"; 2917 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"}; 2918 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"}; 2919 static const UColAttribute att[] = { UCOL_CASE_FIRST}; 2920 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST}; 2921 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/ 2922 log_verbose("case level\n"); 2923 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2924 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2925 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/ 2926 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/ 2927 } 2928 2929 } 2930 #endif 2931 2932 static void TestBocsuCoverage(void) { 2933 UErrorCode status = U_ZERO_ERROR; 2934 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041"; 2935 UChar test[256] = {0}; 2936 uint32_t tlen = u_unescape(testString, test, 32); 2937 uint8_t key[256] = {0}; 2938 uint32_t klen = 0; 2939 2940 UCollator *coll = ucol_open("", &status); 2941 if(U_SUCCESS(status)) { 2942 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status); 2943 2944 klen = ucol_getSortKey(coll, test, tlen, key, 256); 2945 2946 ucol_close(coll); 2947 } else { 2948 log_data_err("Couldn't open UCA\n"); 2949 } 2950 } 2951 2952 static void TestVariableTopSetting(void) { 2953 UErrorCode status = U_ZERO_ERROR; 2954 const UChar *current = NULL; 2955 uint32_t varTopOriginal = 0, varTop1, varTop2; 2956 UCollator *coll = ucol_open("", &status); 2957 if(U_SUCCESS(status)) { 2958 2959 uint32_t strength = 0; 2960 uint16_t specs = 0; 2961 uint32_t chOffset = 0; 2962 uint32_t chLen = 0; 2963 uint32_t exOffset = 0; 2964 uint32_t exLen = 0; 2965 uint32_t oldChOffset = 0; 2966 uint32_t oldChLen = 0; 2967 uint32_t oldExOffset = 0; 2968 uint32_t oldExLen = 0; 2969 uint32_t prefixOffset = 0; 2970 uint32_t prefixLen = 0; 2971 2972 UBool startOfRules = TRUE; 2973 UColTokenParser src; 2974 UColOptionSet opts; 2975 2976 UChar *rulesCopy = NULL; 2977 uint32_t rulesLen; 2978 2979 UCollationResult result; 2980 2981 UChar first[256] = { 0 }; 2982 UChar second[256] = { 0 }; 2983 UParseError parseError; 2984 int32_t myQ = getTestOption(QUICK_OPTION); 2985 2986 uprv_memset(&src, 0, sizeof(UColTokenParser)); 2987 2988 src.opts = &opts; 2989 2990 if(getTestOption(QUICK_OPTION) <= 0) { 2991 setTestOption(QUICK_OPTION, 1); 2992 } 2993 2994 /* this test will fail when normalization is turned on */ 2995 /* therefore we always turn off exhaustive mode for it */ 2996 { /* QUICK > 0*/ 2997 log_verbose("Slide variable top over UCARules\n"); 2998 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0); 2999 rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); 3000 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE); 3001 3002 if(U_SUCCESS(status) && rulesLen > 0) { 3003 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 3004 src.current = src.source = rulesCopy; 3005 src.end = rulesCopy+rulesLen; 3006 src.extraCurrent = src.end; 3007 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 3008 3009 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to 3010 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ 3011 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) { 3012 strength = src.parsedToken.strength; 3013 chOffset = src.parsedToken.charsOffset; 3014 chLen = src.parsedToken.charsLen; 3015 exOffset = src.parsedToken.extensionOffset; 3016 exLen = src.parsedToken.extensionLen; 3017 prefixOffset = src.parsedToken.prefixOffset; 3018 prefixLen = src.parsedToken.prefixLen; 3019 specs = src.parsedToken.flags; 3020 3021 startOfRules = FALSE; 3022 { 3023 log_verbose("%04X %d ", *(src.source+chOffset), chLen); 3024 } 3025 if(strength == UCOL_PRIMARY) { 3026 status = U_ZERO_ERROR; 3027 varTopOriginal = ucol_getVariableTop(coll, &status); 3028 varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status); 3029 if(U_FAILURE(status)) { 3030 char buffer[256]; 3031 char *buf = buffer; 3032 uint32_t i = 0, j; 3033 uint32_t CE = UCOL_NO_MORE_CES; 3034 3035 /* before we start screaming, let's see if there is a problem with the rules */ 3036 UErrorCode collIterateStatus = U_ZERO_ERROR; 3037 collIterate *s = uprv_new_collIterate(&collIterateStatus); 3038 uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus); 3039 3040 CE = ucol_getNextCE(coll, s, &status); 3041 3042 for(i = 0; i < oldChLen; i++) { 3043 j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i)); 3044 buf += j; 3045 } 3046 if(status == U_PRIMARY_TOO_LONG_ERROR) { 3047 log_verbose("= Expected failure for %s =", buffer); 3048 } else { 3049 if(uprv_collIterateAtEnd(s)) { 3050 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n", 3051 oldChOffset, u_errorName(status), buffer); 3052 } else { 3053 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n", 3054 buffer); 3055 } 3056 } 3057 uprv_delete_collIterate(s); 3058 } 3059 varTop2 = ucol_getVariableTop(coll, &status); 3060 if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) { 3061 log_err("cannot retrieve set varTop value!\n"); 3062 continue; 3063 } 3064 3065 if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) { 3066 3067 u_strncpy(first, src.source+oldChOffset, oldChLen); 3068 u_strncpy(first+oldChLen, src.source+chOffset, chLen); 3069 u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen); 3070 first[2*oldChLen+chLen] = 0; 3071 3072 if(oldExLen == 0) { 3073 u_strncpy(second, src.source+chOffset, chLen); 3074 second[chLen] = 0; 3075 } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */ 3076 u_strncpy(second, src.source+oldExOffset, oldExLen); 3077 u_strncpy(second+oldChLen, src.source+chOffset, chLen); 3078 u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen); 3079 second[2*oldExLen+chLen] = 0; 3080 } 3081 result = ucol_strcoll(coll, first, -1, second, -1); 3082 if(result == UCOL_EQUAL) { 3083 doTest(coll, first, second, UCOL_EQUAL); 3084 } else { 3085 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset)); 3086 } 3087 } 3088 } 3089 if(strength != UCOL_TOK_RESET) { 3090 oldChOffset = chOffset; 3091 oldChLen = chLen; 3092 oldExOffset = exOffset; 3093 oldExLen = exLen; 3094 } 3095 } 3096 status = U_ZERO_ERROR; 3097 } 3098 else { 3099 log_err("Unexpected failure getting rules %s\n", u_errorName(status)); 3100 return; 3101 } 3102 if (U_FAILURE(status)) { 3103 log_err("Error parsing rules %s\n", u_errorName(status)); 3104 return; 3105 } 3106 status = U_ZERO_ERROR; 3107 } 3108 3109 setTestOption(QUICK_OPTION, myQ); 3110 3111 log_verbose("Testing setting variable top to contractions\n"); 3112 { 3113 /* uint32_t tailoredCE = UCOL_NOT_FOUND; */ 3114 /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/ 3115 UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos); 3116 while(*conts != 0) { 3117 if((*(conts+2) == 0) || (*(conts+1)==0)) { /* contracts or pre-context contractions */ 3118 varTop1 = ucol_setVariableTop(coll, conts, -1, &status); 3119 } else { 3120 varTop1 = ucol_setVariableTop(coll, conts, 3, &status); 3121 } 3122 if(U_FAILURE(status)) { 3123 if(status == U_PRIMARY_TOO_LONG_ERROR) { 3124 /* ucol_setVariableTop() is documented to not accept 3-byte primaries, 3125 * therefore it is not an error when it complains about them. */ 3126 log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n", 3127 *conts, *(conts+1), *(conts+2)); 3128 } else { 3129 log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n", 3130 *conts, *(conts+1), *(conts+2), u_errorName(status)); 3131 } 3132 status = U_ZERO_ERROR; 3133 } 3134 conts+=3; 3135 } 3136 3137 status = U_ZERO_ERROR; 3138 3139 first[0] = 0x0040; 3140 first[1] = 0x0050; 3141 first[2] = 0x0000; 3142 3143 ucol_setVariableTop(coll, first, -1, &status); 3144 3145 if(U_SUCCESS(status)) { 3146 log_err("Invalid contraction succeded in setting variable top!\n"); 3147 } 3148 3149 } 3150 3151 log_verbose("Test restoring variable top\n"); 3152 3153 status = U_ZERO_ERROR; 3154 ucol_restoreVariableTop(coll, varTopOriginal, &status); 3155 if(varTopOriginal != ucol_getVariableTop(coll, &status)) { 3156 log_err("Couldn't restore old variable top\n"); 3157 } 3158 3159 log_verbose("Testing calling with error set\n"); 3160 3161 status = U_INTERNAL_PROGRAM_ERROR; 3162 varTop1 = ucol_setVariableTop(coll, first, 1, &status); 3163 varTop2 = ucol_getVariableTop(coll, &status); 3164 ucol_restoreVariableTop(coll, varTop2, &status); 3165 varTop1 = ucol_setVariableTop(NULL, first, 1, &status); 3166 varTop2 = ucol_getVariableTop(NULL, &status); 3167 ucol_restoreVariableTop(NULL, varTop2, &status); 3168 if(status != U_INTERNAL_PROGRAM_ERROR) { 3169 log_err("Bad reaction to passed error!\n"); 3170 } 3171 uprv_free(src.source); 3172 ucol_close(coll); 3173 } else { 3174 log_data_err("Couldn't open UCA collator\n"); 3175 } 3176 3177 } 3178 3179 static void TestNonChars(void) { 3180 static const char *test[] = { 3181 "\\u0000", /* ignorable */ 3182 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */ 3183 "\\uFDD0", "\\uFDEF", 3184 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */ 3185 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */ 3186 "\\U0003FFFE", "\\U0003FFFF", 3187 "\\U0004FFFE", "\\U0004FFFF", 3188 "\\U0005FFFE", "\\U0005FFFF", 3189 "\\U0006FFFE", "\\U0006FFFF", 3190 "\\U0007FFFE", "\\U0007FFFF", 3191 "\\U0008FFFE", "\\U0008FFFF", 3192 "\\U0009FFFE", "\\U0009FFFF", 3193 "\\U000AFFFE", "\\U000AFFFF", 3194 "\\U000BFFFE", "\\U000BFFFF", 3195 "\\U000CFFFE", "\\U000CFFFF", 3196 "\\U000DFFFE", "\\U000DFFFF", 3197 "\\U000EFFFE", "\\U000EFFFF", 3198 "\\U000FFFFE", "\\U000FFFFF", 3199 "\\U0010FFFE", "\\U0010FFFF", 3200 "\\uFFFF" /* special character with maximum primary weight */ 3201 }; 3202 UErrorCode status = U_ZERO_ERROR; 3203 UCollator *coll = ucol_open("en_US", &status); 3204 3205 log_verbose("Test non characters\n"); 3206 3207 if(U_SUCCESS(status)) { 3208 genericOrderingTestWithResult(coll, test, 35, UCOL_LESS); 3209 } else { 3210 log_err_status(status, "Unable to open collator\n"); 3211 } 3212 3213 ucol_close(coll); 3214 } 3215 3216 static void TestExtremeCompression(void) { 3217 static char *test[4]; 3218 int32_t j = 0, i = 0; 3219 3220 for(i = 0; i<4; i++) { 3221 test[i] = (char *)malloc(2048*sizeof(char)); 3222 } 3223 3224 for(j = 20; j < 500; j++) { 3225 for(i = 0; i<4; i++) { 3226 uprv_memset(test[i], 'a', (j-1)*sizeof(char)); 3227 test[i][j-1] = (char)('a'+i); 3228 test[i][j] = 0; 3229 } 3230 genericLocaleStarter("en_US", (const char **)test, 4); 3231 } 3232 3233 3234 for(i = 0; i<4; i++) { 3235 free(test[i]); 3236 } 3237 } 3238 3239 #if 0 3240 static void TestExtremeCompression(void) { 3241 static char *test[4]; 3242 int32_t j = 0, i = 0; 3243 UErrorCode status = U_ZERO_ERROR; 3244 UCollator *coll = ucol_open("en_US", status); 3245 for(i = 0; i<4; i++) { 3246 test[i] = (char *)malloc(2048*sizeof(char)); 3247 } 3248 for(j = 10; j < 2048; j++) { 3249 for(i = 0; i<4; i++) { 3250 uprv_memset(test[i], 'a', (j-2)*sizeof(char)); 3251 test[i][j-1] = (char)('a'+i); 3252 test[i][j] = 0; 3253 } 3254 } 3255 genericLocaleStarter("en_US", (const char **)test, 4); 3256 3257 for(j = 10; j < 2048; j++) { 3258 for(i = 0; i<1; i++) { 3259 uprv_memset(test[i], 'a', (j-1)*sizeof(char)); 3260 test[i][j] = 0; 3261 } 3262 } 3263 for(i = 0; i<4; i++) { 3264 free(test[i]); 3265 } 3266 } 3267 #endif 3268 3269 static void TestSurrogates(void) { 3270 static const char *test[] = { 3271 "z","\\ud900\\udc25", "\\ud805\\udc50", 3272 "\\ud800\\udc00y", "\\ud800\\udc00r", 3273 "\\ud800\\udc00f", "\\ud800\\udc00", 3274 "\\ud800\\udc00c", "\\ud800\\udc00b", 3275 "\\ud800\\udc00fa", "\\ud800\\udc00fb", 3276 "\\ud800\\udc00a", 3277 "c", "b" 3278 }; 3279 3280 static const char *rule = 3281 "&z < \\ud900\\udc25 < \\ud805\\udc50" 3282 "< \\ud800\\udc00y < \\ud800\\udc00r" 3283 "< \\ud800\\udc00f << \\ud800\\udc00" 3284 "< \\ud800\\udc00fa << \\ud800\\udc00fb" 3285 "< \\ud800\\udc00a < c < b" ; 3286 3287 genericRulesStarter(rule, test, 14); 3288 } 3289 3290 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */ 3291 static void TestPrefix(void) { 3292 uint32_t i; 3293 3294 static const struct { 3295 const char *rules; 3296 const char *data[50]; 3297 const uint32_t len; 3298 } tests[] = { 3299 { "&z <<< z|a", 3300 {"zz", "za"}, 2 }, 3301 3302 { "&z <<< z| a", 3303 {"zz", "za"}, 2 }, 3304 { "[strength I]" 3305 "&a=\\ud900\\udc25" 3306 "&z<<<\\ud900\\udc25|a", 3307 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 }, 3308 }; 3309 3310 3311 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3312 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 3313 } 3314 } 3315 3316 /* This test uses data suplied by Masashiko Maedera to test the implementation */ 3317 /* JIS X 4061 collation order implementation */ 3318 static void TestNewJapanese(void) { 3319 3320 static const char * const test1[] = { 3321 "\\u30b7\\u30e3\\u30fc\\u30ec", 3322 "\\u30b7\\u30e3\\u30a4", 3323 "\\u30b7\\u30e4\\u30a3", 3324 "\\u30b7\\u30e3\\u30ec", 3325 "\\u3061\\u3087\\u3053", 3326 "\\u3061\\u3088\\u3053", 3327 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8", 3328 "\\u3066\\u30fc\\u305f", 3329 "\\u30c6\\u30fc\\u30bf", 3330 "\\u30c6\\u30a7\\u30bf", 3331 "\\u3066\\u3048\\u305f", 3332 "\\u3067\\u30fc\\u305f", 3333 "\\u30c7\\u30fc\\u30bf", 3334 "\\u30c7\\u30a7\\u30bf", 3335 "\\u3067\\u3048\\u305f", 3336 "\\u3066\\u30fc\\u305f\\u30fc", 3337 "\\u30c6\\u30fc\\u30bf\\u30a1", 3338 "\\u30c6\\u30a7\\u30bf\\u30fc", 3339 "\\u3066\\u3047\\u305f\\u3041", 3340 "\\u3066\\u3048\\u305f\\u30fc", 3341 "\\u3067\\u30fc\\u305f\\u30fc", 3342 "\\u30c7\\u30fc\\u30bf\\u30a1", 3343 "\\u3067\\u30a7\\u305f\\u30a1", 3344 "\\u30c7\\u3047\\u30bf\\u3041", 3345 "\\u30c7\\u30a8\\u30bf\\u30a2", 3346 "\\u3072\\u3086", 3347 "\\u3073\\u3085\\u3042", 3348 "\\u3074\\u3085\\u3042", 3349 "\\u3073\\u3085\\u3042\\u30fc", 3350 "\\u30d3\\u30e5\\u30a2\\u30fc", 3351 "\\u3074\\u3085\\u3042\\u30fc", 3352 "\\u30d4\\u30e5\\u30a2\\u30fc", 3353 "\\u30d2\\u30e5\\u30a6", 3354 "\\u30d2\\u30e6\\u30a6", 3355 "\\u30d4\\u30e5\\u30a6\\u30a2", 3356 "\\u3073\\u3085\\u30fc\\u3042\\u30fc", 3357 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc", 3358 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc", 3359 "\\u3072\\u3085\\u3093", 3360 "\\u3074\\u3085\\u3093", 3361 "\\u3075\\u30fc\\u308a", 3362 "\\u30d5\\u30fc\\u30ea", 3363 "\\u3075\\u3045\\u308a", 3364 "\\u3075\\u30a5\\u308a", 3365 "\\u3075\\u30a5\\u30ea", 3366 "\\u30d5\\u30a6\\u30ea", 3367 "\\u3076\\u30fc\\u308a", 3368 "\\u30d6\\u30fc\\u30ea", 3369 "\\u3076\\u3045\\u308a", 3370 "\\u30d6\\u30a5\\u308a", 3371 "\\u3077\\u3046\\u308a", 3372 "\\u30d7\\u30a6\\u30ea", 3373 "\\u3075\\u30fc\\u308a\\u30fc", 3374 "\\u30d5\\u30a5\\u30ea\\u30fc", 3375 "\\u3075\\u30a5\\u308a\\u30a3", 3376 "\\u30d5\\u3045\\u308a\\u3043", 3377 "\\u30d5\\u30a6\\u30ea\\u30fc", 3378 "\\u3075\\u3046\\u308a\\u3043", 3379 "\\u30d6\\u30a6\\u30ea\\u30a4", 3380 "\\u3077\\u30fc\\u308a\\u30fc", 3381 "\\u3077\\u30a5\\u308a\\u30a4", 3382 "\\u3077\\u3046\\u308a\\u30fc", 3383 "\\u30d7\\u30a6\\u30ea\\u30a4", 3384 "\\u30d5\\u30fd", 3385 "\\u3075\\u309e", 3386 "\\u3076\\u309d", 3387 "\\u3076\\u3075", 3388 "\\u3076\\u30d5", 3389 "\\u30d6\\u3075", 3390 "\\u30d6\\u30d5", 3391 "\\u3076\\u309e", 3392 "\\u3076\\u3077", 3393 "\\u30d6\\u3077", 3394 "\\u3077\\u309d", 3395 "\\u30d7\\u30fd", 3396 "\\u3077\\u3075", 3397 }; 3398 3399 static const char *test2[] = { 3400 "\\u306f\\u309d", /* H\\u309d */ 3401 "\\u30cf\\u30fd", /* K\\u30fd */ 3402 "\\u306f\\u306f", /* HH */ 3403 "\\u306f\\u30cf", /* HK */ 3404 "\\u30cf\\u30cf", /* KK */ 3405 "\\u306f\\u309e", /* H\\u309e */ 3406 "\\u30cf\\u30fe", /* K\\u30fe */ 3407 "\\u306f\\u3070", /* HH\\u309b */ 3408 "\\u30cf\\u30d0", /* KK\\u309b */ 3409 "\\u306f\\u3071", /* HH\\u309c */ 3410 "\\u30cf\\u3071", /* KH\\u309c */ 3411 "\\u30cf\\u30d1", /* KK\\u309c */ 3412 "\\u3070\\u309d", /* H\\u309b\\u309d */ 3413 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */ 3414 "\\u3070\\u306f", /* H\\u309bH */ 3415 "\\u30d0\\u30cf", /* K\\u309bK */ 3416 "\\u3070\\u309e", /* H\\u309b\\u309e */ 3417 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */ 3418 "\\u3070\\u3070", /* H\\u309bH\\u309b */ 3419 "\\u30d0\\u3070", /* K\\u309bH\\u309b */ 3420 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */ 3421 "\\u3070\\u3071", /* H\\u309bH\\u309c */ 3422 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */ 3423 "\\u3071\\u309d", /* H\\u309c\\u309d */ 3424 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */ 3425 "\\u3071\\u306f", /* H\\u309cH */ 3426 "\\u30d1\\u30cf", /* K\\u309cK */ 3427 "\\u3071\\u3070", /* H\\u309cH\\u309b */ 3428 "\\u3071\\u30d0", /* H\\u309cK\\u309b */ 3429 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */ 3430 "\\u3071\\u3071", /* H\\u309cH\\u309c */ 3431 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */ 3432 }; 3433 /* 3434 static const char *test3[] = { 3435 "\\u221er\\u221e", 3436 "\\u221eR#", 3437 "\\u221et\\u221e", 3438 "#r\\u221e", 3439 "#R#", 3440 "#t%", 3441 "#T%", 3442 "8t\\u221e", 3443 "8T\\u221e", 3444 "8t#", 3445 "8T#", 3446 "8t%", 3447 "8T%", 3448 "8t8", 3449 "8T8", 3450 "\\u03c9r\\u221e", 3451 "\\u03a9R%", 3452 "rr\\u221e", 3453 "rR\\u221e", 3454 "Rr\\u221e", 3455 "RR\\u221e", 3456 "RT%", 3457 "rt8", 3458 "tr\\u221e", 3459 "tr8", 3460 "TR8", 3461 "tt8", 3462 "\\u30b7\\u30e3\\u30fc\\u30ec", 3463 }; 3464 */ 3465 static const UColAttribute att[] = { UCOL_STRENGTH }; 3466 static const UColAttributeValue val[] = { UCOL_QUATERNARY }; 3467 3468 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING}; 3469 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED }; 3470 3471 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1); 3472 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1); 3473 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/ 3474 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2); 3475 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2); 3476 } 3477 3478 static void TestStrCollIdenticalPrefix(void) { 3479 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71"; 3480 const char* test[] = { 3481 "ab\\ud9b0\\udc70", 3482 "ab\\ud9b0\\udc71" 3483 }; 3484 genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL); 3485 } 3486 /* Contractions should have all their canonically equivalent */ 3487 /* strings included */ 3488 static void TestContractionClosure(void) { 3489 static const struct { 3490 const char *rules; 3491 const char *data[10]; 3492 const uint32_t len; 3493 } tests[] = { 3494 { "&b=\\u00e4\\u00e4", 3495 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5}, 3496 { "&b=\\u00C5", 3497 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4}, 3498 }; 3499 uint32_t i; 3500 3501 3502 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3503 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL); 3504 } 3505 } 3506 3507 /* This tests also fails*/ 3508 static void TestBeforePrefixFailure(void) { 3509 static const struct { 3510 const char *rules; 3511 const char *data[10]; 3512 const uint32_t len; 3513 } tests[] = { 3514 { "&g <<< a" 3515 "&[before 3]\\uff41 <<< x", 3516 {"x", "\\uff41"}, 2 }, 3517 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 3518 "&\\u30A8=\\u30A8=\\u3048=\\uff74" 3519 "&[before 3]\\u30a7<<<\\u30a9", 3520 {"\\u30a9", "\\u30a7"}, 2 }, 3521 { "&[before 3]\\u30a7<<<\\u30a9" 3522 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 3523 "&\\u30A8=\\u30A8=\\u3048=\\uff74", 3524 {"\\u30a9", "\\u30a7"}, 2 }, 3525 }; 3526 uint32_t i; 3527 3528 3529 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3530 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 3531 } 3532 3533 #if 0 3534 const char* rule1 = 3535 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 3536 "&\\u30A8=\\u30A8=\\u3048=\\uff74" 3537 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"; 3538 const char* rule2 = 3539 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc" 3540 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 3541 "&\\u30A8=\\u30A8=\\u3048=\\uff74"; 3542 const char* test[] = { 3543 "\\u30c6\\u30fc\\u30bf", 3544 "\\u30c6\\u30a7\\u30bf", 3545 }; 3546 genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0])); 3547 genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0])); 3548 /* this piece of code should be in some sort of verbose mode */ 3549 /* it gets the collation elements for elements and prints them */ 3550 /* This is useful when trying to see whether the problem is */ 3551 { 3552 UErrorCode status = U_ZERO_ERROR; 3553 uint32_t i = 0; 3554 UCollationElements *it = NULL; 3555 uint32_t CE; 3556 UChar string[256]; 3557 uint32_t uStringLen; 3558 UCollator *coll = NULL; 3559 3560 uStringLen = u_unescape(rule1, string, 256); 3561 3562 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 3563 3564 /*coll = ucol_open("ja_JP_JIS", &status);*/ 3565 it = ucol_openElements(coll, string, 0, &status); 3566 3567 for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) { 3568 log_verbose("%s\n", test[i]); 3569 uStringLen = u_unescape(test[i], string, 256); 3570 ucol_setText(it, string, uStringLen, &status); 3571 3572 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) { 3573 log_verbose("%08X\n", CE); 3574 } 3575 log_verbose("\n"); 3576 3577 } 3578 3579 ucol_closeElements(it); 3580 ucol_close(coll); 3581 } 3582 #endif 3583 } 3584 3585 static void TestPrefixCompose(void) { 3586 const char* rule1 = 3587 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc"; 3588 /* 3589 const char* test[] = { 3590 "\\u30c6\\u30fc\\u30bf", 3591 "\\u30c6\\u30a7\\u30bf", 3592 }; 3593 */ 3594 { 3595 UErrorCode status = U_ZERO_ERROR; 3596 /*uint32_t i = 0;*/ 3597 /*UCollationElements *it = NULL;*/ 3598 /* uint32_t CE;*/ 3599 UChar string[256]; 3600 uint32_t uStringLen; 3601 UCollator *coll = NULL; 3602 3603 uStringLen = u_unescape(rule1, string, 256); 3604 3605 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 3606 ucol_close(coll); 3607 } 3608 3609 3610 } 3611 3612 /* 3613 [last variable] last variable value 3614 [last primary ignorable] largest CE for primary ignorable 3615 [last secondary ignorable] largest CE for secondary ignorable 3616 [last tertiary ignorable] largest CE for tertiary ignorable 3617 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8) 3618 */ 3619 3620 static void TestRuleOptions(void) { 3621 /* values here are hardcoded and are correct for the current UCA 3622 * when the UCA changes, one might be forced to change these 3623 * values. 3624 */ 3625 3626 /* 3627 * These strings contain the last character before [variable top] 3628 * and the first and second characters (by primary weights) after it. 3629 * See FractionalUCA.txt. For example: 3630 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR 3631 [variable top = 0C FE] 3632 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT 3633 and 3634 00B4; [0D 0C, 05, 05] 3635 * 3636 * Note: Starting with UCA 6.0, the [variable top] collation element 3637 * is not the weight of any character or string, 3638 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable]. 3639 */ 3640 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F" 3641 #define FIRST_REGULAR_CHAR_STRING "\\u0060" 3642 #define SECOND_REGULAR_CHAR_STRING "\\u00B4" 3643 3644 /* 3645 * This string has to match the character that has the [last regular] weight 3646 * which changes with each UCA version. 3647 * See the bottom of FractionalUCA.txt which says something like 3648 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032 3649 * 3650 * Note: Starting with UCA 6.0, the [last regular] collation element 3651 * is not the weight of any character or string, 3652 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular]. 3653 */ 3654 #define LAST_REGULAR_CHAR_STRING "\\U0001342E" 3655 3656 static const struct { 3657 const char *rules; 3658 const char *data[10]; 3659 const uint32_t len; 3660 } tests[] = { 3661 /* - all befores here amount to zero */ 3662 { "&[before 3][first tertiary ignorable]<<<a", 3663 { "\\u0000", "a"}, 2 3664 }, /* you cannot go before first tertiary ignorable */ 3665 3666 { "&[before 3][last tertiary ignorable]<<<a", 3667 { "\\u0000", "a"}, 2 3668 }, /* you cannot go before last tertiary ignorable */ 3669 3670 { "&[before 3][first secondary ignorable]<<<a", 3671 { "\\u0000", "a"}, 2 3672 }, /* you cannot go before first secondary ignorable */ 3673 3674 { "&[before 3][last secondary ignorable]<<<a", 3675 { "\\u0000", "a"}, 2 3676 }, /* you cannot go before first secondary ignorable */ 3677 3678 /* 'normal' befores */ 3679 3680 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a", 3681 { "c", "b", "\\u0332", "a" }, 4 3682 }, 3683 3684 /* we don't have a code point that corresponds to 3685 * the last primary ignorable 3686 */ 3687 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a", 3688 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5 3689 }, 3690 3691 { "&[before 3][first variable]<<<c<<<b &[first variable]<a", 3692 { "c", "b", "\\u0009", "a", "\\u000a" }, 5 3693 }, 3694 3695 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ", 3696 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5 3697 }, 3698 3699 { "&[first regular]<a" 3700 "&[before 1][first regular]<b", 3701 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4 3702 }, 3703 3704 { "&[before 1][last regular]<b" 3705 "&[last regular]<a", 3706 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4 3707 }, 3708 3709 { "&[before 1][first implicit]<b" 3710 "&[first implicit]<a", 3711 { "b", "\\u4e00", "a", "\\u4e01"}, 4 3712 }, 3713 3714 { "&[before 1][last implicit]<b" 3715 "&[last implicit]<a", 3716 { "b", "\\U0010FFFD", "a" }, 3 3717 }, 3718 3719 { "&[last variable]<z" 3720 "&[last primary ignorable]<x" 3721 "&[last secondary ignorable]<<y" 3722 "&[last tertiary ignorable]<<<w" 3723 "&[top]<u", 3724 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7 3725 } 3726 3727 }; 3728 uint32_t i; 3729 3730 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3731 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 3732 } 3733 } 3734 3735 3736 static void TestOptimize(void) { 3737 /* this is not really a test - just trying out 3738 * whether copying of UCA contents will fail 3739 * Cannot really test, since the functionality 3740 * remains the same. 3741 */ 3742 static const struct { 3743 const char *rules; 3744 const char *data[10]; 3745 const uint32_t len; 3746 } tests[] = { 3747 /* - all befores here amount to zero */ 3748 { "[optimize [\\uAC00-\\uD7FF]]", 3749 { "a", "b"}, 2} 3750 }; 3751 uint32_t i; 3752 3753 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3754 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 3755 } 3756 } 3757 3758 /* 3759 cycheng (at) ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator. 3760 weiv ucol_strcollIter? 3761 cycheng (at) ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021 3762 weiv these are the input strings? 3763 cycheng (at) ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2 3764 weiv will check - could be a problem with utf-8 iterator 3765 cycheng (at) ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2 3766 weiv hmmm 3767 cycheng (at) ca.ibm.c... note that we have a standalone high surrogate 3768 weiv that doesn't sound right 3769 cycheng (at) ca.ibm.c... we got the same inconsistent results on AIX and Win2000 3770 weiv so you have two strings, you convert them to utf-8 and to utf-16BE 3771 cycheng (at) ca.ibm.c... yes 3772 weiv and then do the comparison 3773 cycheng (at) ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be 3774 weiv utf-16 strings look like a little endian ones in the example you sent me 3775 weiv It could be a bug - let me try to test it out 3776 cycheng (at) ca.ibm.c... ok 3777 cycheng (at) ca.ibm.c... we can wait till the conf. call 3778 cycheng (at) ca.ibm.c... next weke 3779 weiv that would be great 3780 weiv hmmm 3781 weiv I might be wrong 3782 weiv let me play with it some more 3783 cycheng (at) ca.ibm.c... ok 3784 cycheng (at) ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be 3785 cycheng (at) ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2 3786 cycheng (at) ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be 3787 weiv ok 3788 cycheng (at) ca.ibm.c... i ask sherman to send you more inconsistent data 3789 weiv thanks 3790 cycheng (at) ca.ibm.c... the 4 strings we sent are just samples 3791 */ 3792 #if 0 3793 static void Alexis(void) { 3794 UErrorCode status = U_ZERO_ERROR; 3795 UCollator *coll = ucol_open("", &status); 3796 3797 3798 const char utf16be[2][4] = { 3799 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 }, 3800 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 } 3801 }; 3802 3803 const char utf8[2][4] = { 3804 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 }, 3805 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 }, 3806 }; 3807 3808 UCharIterator iterU161, iterU162; 3809 UCharIterator iterU81, iterU82; 3810 3811 UCollationResult resU16, resU8; 3812 3813 uiter_setUTF16BE(&iterU161, utf16be[0], 4); 3814 uiter_setUTF16BE(&iterU162, utf16be[1], 4); 3815 3816 uiter_setUTF8(&iterU81, utf8[0], 4); 3817 uiter_setUTF8(&iterU82, utf8[1], 4); 3818 3819 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 3820 3821 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status); 3822 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status); 3823 3824 3825 if(resU16 != resU8) { 3826 log_err("different results\n"); 3827 } 3828 3829 ucol_close(coll); 3830 } 3831 #endif 3832 3833 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256 3834 static void Alexis2(void) { 3835 UErrorCode status = U_ZERO_ERROR; 3836 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE]; 3837 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE]; 3838 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE]; 3839 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0; 3840 3841 UConverter *conv = NULL; 3842 3843 UCharIterator U16BEItS, U16BEItT; 3844 UCharIterator U8ItS, U8ItT; 3845 3846 UCollationResult resU16, resU16BE, resU8; 3847 3848 static const char* const pairs[][2] = { 3849 { "\\ud800\\u0021", "\\uFFFC\\u0062"}, 3850 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" }, 3851 { "\\u0E40\\u0021", "\\u00A1\\u0021"}, 3852 { "\\u0E40\\u0021", "\\uFE57\\u0062"}, 3853 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"}, 3854 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"}, 3855 { "\\u0020", "\\u0020\\u0000"} 3856 /* 3857 5F20 (my result here) 3858 5F204E008E3F 3859 5F20 (your result here) 3860 */ 3861 }; 3862 3863 int32_t i = 0; 3864 3865 UCollator *coll = ucol_open("", &status); 3866 if(status == U_FILE_ACCESS_ERROR) { 3867 log_data_err("Is your data around?\n"); 3868 return; 3869 } else if(U_FAILURE(status)) { 3870 log_err("Error opening collator\n"); 3871 return; 3872 } 3873 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 3874 conv = ucnv_open("UTF16BE", &status); 3875 for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) { 3876 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE); 3877 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE); 3878 3879 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT); 3880 3881 log_verbose("Result of strcoll is %i\n", resU16); 3882 3883 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status); 3884 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status); 3885 3886 /* use the original sizes, as the result from converter is in bytes */ 3887 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS); 3888 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT); 3889 3890 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status); 3891 3892 log_verbose("Result of U16BE is %i\n", resU16BE); 3893 3894 if(resU16 != resU16BE) { 3895 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]); 3896 } 3897 3898 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status); 3899 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status); 3900 3901 uiter_setUTF8(&U8ItS, U8Source, U8LenS); 3902 uiter_setUTF8(&U8ItT, U8Target, U8LenT); 3903 3904 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status); 3905 3906 if(resU16 != resU8) { 3907 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]); 3908 } 3909 3910 } 3911 3912 ucol_close(coll); 3913 ucnv_close(conv); 3914 } 3915 3916 static void TestHebrewUCA(void) { 3917 UErrorCode status = U_ZERO_ERROR; 3918 static const char *first[] = { 3919 "d790d6b8d79cd795d6bcd7a9", 3920 "d790d79cd79ed7a7d799d799d7a1", 3921 "d790d6b4d79ed795d6bcd7a9", 3922 }; 3923 3924 char utf8String[3][256]; 3925 UChar utf16String[3][256]; 3926 3927 int32_t i = 0, j = 0; 3928 int32_t sizeUTF8[3]; 3929 int32_t sizeUTF16[3]; 3930 3931 UCollator *coll = ucol_open("", &status); 3932 if (U_FAILURE(status)) { 3933 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status)); 3934 return; 3935 } 3936 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/ 3937 3938 for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) { 3939 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status); 3940 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status); 3941 log_verbose("%i: "); 3942 for(j = 0; j < sizeUTF16[i]; j++) { 3943 /*log_verbose("\\u%04X", utf16String[i][j]);*/ 3944 log_verbose("%04X", utf16String[i][j]); 3945 } 3946 log_verbose("\n"); 3947 } 3948 for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) { 3949 for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) { 3950 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS); 3951 } 3952 } 3953 3954 ucol_close(coll); 3955 3956 } 3957 3958 static void TestPartialSortKeyTermination(void) { 3959 static const char* cases[] = { 3960 "\\u1234\\u1234\\udc00", 3961 "\\udc00\\ud800\\ud800" 3962 }; 3963 3964 int32_t i = sizeof(UCollator); 3965 3966 UErrorCode status = U_ZERO_ERROR; 3967 3968 UCollator *coll = ucol_open("", &status); 3969 3970 UCharIterator iter; 3971 3972 UChar currCase[256]; 3973 int32_t length = 0; 3974 int32_t pKeyLen = 0; 3975 3976 uint8_t key[256]; 3977 3978 for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) { 3979 uint32_t state[2] = {0, 0}; 3980 length = u_unescape(cases[i], currCase, 256); 3981 uiter_setString(&iter, currCase, length); 3982 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status); 3983 3984 log_verbose("Done\n"); 3985 3986 } 3987 ucol_close(coll); 3988 } 3989 3990 static void TestSettings(void) { 3991 static const char* cases[] = { 3992 "apple", 3993 "Apple" 3994 }; 3995 3996 static const char* locales[] = { 3997 "", 3998 "en" 3999 }; 4000 4001 UErrorCode status = U_ZERO_ERROR; 4002 4003 int32_t i = 0, j = 0; 4004 4005 UChar source[256], target[256]; 4006 int32_t sLen = 0, tLen = 0; 4007 4008 UCollator *collateObject = NULL; 4009 for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) { 4010 collateObject = ucol_open(locales[i], &status); 4011 ucol_setStrength(collateObject, UCOL_PRIMARY); 4012 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status); 4013 for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) { 4014 sLen = u_unescape(cases[j-1], source, 256); 4015 source[sLen] = 0; 4016 tLen = u_unescape(cases[j], target, 256); 4017 source[tLen] = 0; 4018 doTest(collateObject, source, target, UCOL_EQUAL); 4019 } 4020 ucol_close(collateObject); 4021 } 4022 } 4023 4024 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) { 4025 UErrorCode status = U_ZERO_ERROR; 4026 int32_t errorNo = 0; 4027 /*const UChar *sourceRules = NULL;*/ 4028 /*int32_t sourceRulesLen = 0;*/ 4029 UColAttributeValue french = UCOL_OFF; 4030 int32_t cloneSize = 0; 4031 4032 if(!ucol_equals(source, target)) { 4033 log_err("Same collators, different address not equal\n"); 4034 errorNo++; 4035 } 4036 ucol_close(target); 4037 if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) { 4038 /* currently, safeClone is implemented through getRules/openRules 4039 * so it is the same as the test below - I will comment that test out. 4040 */ 4041 /* real thing */ 4042 target = ucol_safeClone(source, NULL, &cloneSize, &status); 4043 if(U_FAILURE(status)) { 4044 log_err("Error creating clone\n"); 4045 errorNo++; 4046 return errorNo; 4047 } 4048 if(!ucol_equals(source, target)) { 4049 log_err("Collator different from it's clone\n"); 4050 errorNo++; 4051 } 4052 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status); 4053 if(french == UCOL_ON) { 4054 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status); 4055 } else { 4056 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status); 4057 } 4058 if(U_FAILURE(status)) { 4059 log_err("Error setting attributes\n"); 4060 errorNo++; 4061 return errorNo; 4062 } 4063 if(ucol_equals(source, target)) { 4064 log_err("Collators same even when options changed\n"); 4065 errorNo++; 4066 } 4067 ucol_close(target); 4068 /* commented out since safeClone uses exactly the same technique */ 4069 /* 4070 sourceRules = ucol_getRules(source, &sourceRulesLen); 4071 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 4072 if(U_FAILURE(status)) { 4073 log_err("Error instantiating target from rules\n"); 4074 errorNo++; 4075 return errorNo; 4076 } 4077 if(!ucol_equals(source, target)) { 4078 log_err("Collator different from collator that was created from the same rules\n"); 4079 errorNo++; 4080 } 4081 ucol_close(target); 4082 */ 4083 } 4084 return errorNo; 4085 } 4086 4087 4088 static void TestEquals(void) { 4089 /* ucol_equals is not currently a public API. There is a chance that it will become 4090 * something like this, but currently it is only used by RuleBasedCollator::operator== 4091 */ 4092 /* test whether the two collators instantiated from the same locale are equal */ 4093 UErrorCode status = U_ZERO_ERROR; 4094 UParseError parseError; 4095 int32_t noOfLoc = uloc_countAvailable(); 4096 const char *locName = NULL; 4097 UCollator *source = NULL, *target = NULL; 4098 int32_t i = 0; 4099 4100 const char* rules[] = { 4101 "&l < lj <<< Lj <<< LJ", 4102 "&n < nj <<< Nj <<< NJ", 4103 "&ae <<< \\u00e4", 4104 "&AE <<< \\u00c4" 4105 }; 4106 /* 4107 const char* badRules[] = { 4108 "&l <<< Lj", 4109 "&n < nj <<< nJ <<< NJ", 4110 "&a <<< \\u00e4", 4111 "&AE <<< \\u00c4 <<< x" 4112 }; 4113 */ 4114 4115 UChar sourceRules[1024], targetRules[1024]; 4116 int32_t sourceRulesSize = 0, targetRulesSize = 0; 4117 int32_t rulesSize = sizeof(rules)/sizeof(rules[0]); 4118 4119 for(i = 0; i < rulesSize; i++) { 4120 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize); 4121 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize); 4122 } 4123 4124 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 4125 if(status == U_FILE_ACCESS_ERROR) { 4126 log_data_err("Is your data around?\n"); 4127 return; 4128 } else if(U_FAILURE(status)) { 4129 log_err("Error opening collator\n"); 4130 return; 4131 } 4132 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 4133 if(!ucol_equals(source, target)) { 4134 log_err("Equivalent collators not equal!\n"); 4135 } 4136 ucol_close(source); 4137 ucol_close(target); 4138 4139 source = ucol_open("root", &status); 4140 target = ucol_open("root", &status); 4141 log_verbose("Testing root\n"); 4142 if(!ucol_equals(source, source)) { 4143 log_err("Same collator not equal\n"); 4144 } 4145 if(TestEqualsForCollator(locName, source, target)) { 4146 log_err("Errors for root\n", locName); 4147 } 4148 ucol_close(source); 4149 4150 for(i = 0; i<noOfLoc; i++) { 4151 status = U_ZERO_ERROR; 4152 locName = uloc_getAvailable(i); 4153 /*if(hasCollationElements(locName)) {*/ 4154 log_verbose("Testing equality for locale %s\n", locName); 4155 source = ucol_open(locName, &status); 4156 target = ucol_open(locName, &status); 4157 if (U_FAILURE(status)) { 4158 log_err("Error opening collator for locale %s %s\n", locName, u_errorName(status)); 4159 continue; 4160 } 4161 if(TestEqualsForCollator(locName, source, target)) { 4162 log_err("Errors for locale %s\n", locName); 4163 } 4164 ucol_close(source); 4165 /*}*/ 4166 } 4167 } 4168 4169 static void TestJ2726(void) { 4170 UChar a[2] = { 0x61, 0x00 }; /*"a"*/ 4171 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/ 4172 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/ 4173 UErrorCode status = U_ZERO_ERROR; 4174 UCollator *coll = ucol_open("en", &status); 4175 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 4176 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); 4177 doTest(coll, a, aSpace, UCOL_EQUAL); 4178 doTest(coll, aSpace, a, UCOL_EQUAL); 4179 doTest(coll, a, spaceA, UCOL_EQUAL); 4180 doTest(coll, spaceA, a, UCOL_EQUAL); 4181 doTest(coll, spaceA, aSpace, UCOL_EQUAL); 4182 doTest(coll, aSpace, spaceA, UCOL_EQUAL); 4183 ucol_close(coll); 4184 } 4185 4186 static void NullRule(void) { 4187 UChar r[3] = {0}; 4188 UErrorCode status = U_ZERO_ERROR; 4189 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 4190 if(U_SUCCESS(status)) { 4191 log_err("This should have been an error!\n"); 4192 ucol_close(coll); 4193 } else { 4194 status = U_ZERO_ERROR; 4195 } 4196 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 4197 if(U_FAILURE(status)) { 4198 log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status)); 4199 } else { 4200 ucol_close(coll); 4201 } 4202 } 4203 4204 /** 4205 * Test for CollationElementIterator previous and next for the whole set of 4206 * unicode characters with normalization on. 4207 */ 4208 static void TestNumericCollation(void) 4209 { 4210 UErrorCode status = U_ZERO_ERROR; 4211 4212 const static char *basicTestStrings[]={ 4213 "hello1", 4214 "hello2", 4215 "hello2002", 4216 "hello2003", 4217 "hello123456", 4218 "hello1234567", 4219 "hello10000000", 4220 "hello100000000", 4221 "hello1000000000", 4222 "hello10000000000", 4223 }; 4224 4225 const static char *preZeroTestStrings[]={ 4226 "avery10000", 4227 "avery010000", 4228 "avery0010000", 4229 "avery00010000", 4230 "avery000010000", 4231 "avery0000010000", 4232 "avery00000010000", 4233 "avery000000010000", 4234 }; 4235 4236 const static char *thirtyTwoBitNumericStrings[]={ 4237 "avery42949672960", 4238 "avery42949672961", 4239 "avery42949672962", 4240 "avery429496729610" 4241 }; 4242 4243 const static char *longNumericStrings[]={ 4244 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings. 4245 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that 4246 are treated as multiple collation elements. */ 4247 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */ 4248 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */ 4249 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */ 4250 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */ 4251 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */ 4252 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */ 4253 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */ 4254 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */ 4255 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */ 4256 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */ 4257 }; 4258 4259 const static char *supplementaryDigits[] = { 4260 "\\uD835\\uDFCE", /* 0 */ 4261 "\\uD835\\uDFCF", /* 1 */ 4262 "\\uD835\\uDFD0", /* 2 */ 4263 "\\uD835\\uDFD1", /* 3 */ 4264 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */ 4265 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */ 4266 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */ 4267 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */ 4268 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */ 4269 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */ 4270 }; 4271 4272 const static char *foreignDigits[] = { 4273 "\\u0661", 4274 "\\u0662", 4275 "\\u0663", 4276 "\\u0661\\u0660", 4277 "\\u0661\\u0662", 4278 "\\u0661\\u0663", 4279 "\\u0662\\u0660", 4280 "\\u0662\\u0662", 4281 "\\u0662\\u0663", 4282 "\\u0663\\u0660", 4283 "\\u0663\\u0662", 4284 "\\u0663\\u0663" 4285 }; 4286 4287 const static char *evenZeroes[] = { 4288 "2000", 4289 "2001", 4290 "2002", 4291 "2003" 4292 }; 4293 4294 UColAttribute att = UCOL_NUMERIC_COLLATION; 4295 UColAttributeValue val = UCOL_ON; 4296 4297 /* Open our collator. */ 4298 UCollator* coll = ucol_open("root", &status); 4299 if (U_FAILURE(status)){ 4300 log_err_status(status, "ERROR: in using ucol_open() -> %s\n", 4301 myErrorName(status)); 4302 return; 4303 } 4304 genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1); 4305 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1); 4306 genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1); 4307 genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1); 4308 genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1); 4309 genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1); 4310 4311 /* Setting up our collator to do digits. */ 4312 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); 4313 if (U_FAILURE(status)){ 4314 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n", 4315 myErrorName(status)); 4316 return; 4317 } 4318 4319 /* 4320 Testing that prepended zeroes still yield the correct collation behavior. 4321 We expect that every element in our strings array will be equal. 4322 */ 4323 genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL); 4324 4325 ucol_close(coll); 4326 } 4327 4328 static void TestTibetanConformance(void) 4329 { 4330 const char* test[] = { 4331 "\\u0FB2\\u0591\\u0F71\\u0061", 4332 "\\u0FB2\\u0F71\\u0061" 4333 }; 4334 4335 UErrorCode status = U_ZERO_ERROR; 4336 UCollator *coll = ucol_open("", &status); 4337 UChar source[100]; 4338 UChar target[100]; 4339 int result; 4340 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 4341 if (U_SUCCESS(status)) { 4342 u_unescape(test[0], source, 100); 4343 u_unescape(test[1], target, 100); 4344 doTest(coll, source, target, UCOL_EQUAL); 4345 result = ucol_strcoll(coll, source, -1, target, -1); 4346 log_verbose("result %d\n", result); 4347 if (UCOL_EQUAL != result) { 4348 log_err("Tibetan comparison error\n"); 4349 } 4350 } 4351 ucol_close(coll); 4352 4353 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL); 4354 } 4355 4356 static void TestPinyinProblem(void) { 4357 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" }; 4358 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0])); 4359 } 4360 4361 #define TST_UCOL_MAX_INPUT 0x220001 4362 #define topByte 0xFF000000; 4363 #define bottomByte 0xFF; 4364 #define fourBytes 0xFFFFFFFF; 4365 4366 4367 static void showImplicit(UChar32 i) { 4368 if (i >= 0 && i <= TST_UCOL_MAX_INPUT) { 4369 log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i)); 4370 } 4371 } 4372 4373 static void TestImplicitGeneration(void) { 4374 UErrorCode status = U_ZERO_ERROR; 4375 UChar32 last = 0; 4376 UChar32 current; 4377 UChar32 i = 0, j = 0; 4378 UChar32 roundtrip = 0; 4379 UChar32 lastBottom = 0; 4380 UChar32 currentBottom = 0; 4381 UChar32 lastTop = 0; 4382 UChar32 currentTop = 0; 4383 4384 UCollator *coll = ucol_open("root", &status); 4385 if(U_FAILURE(status)) { 4386 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); 4387 return; 4388 } 4389 4390 uprv_uca_getRawFromImplicit(0xE20303E7); 4391 4392 for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) { 4393 current = uprv_uca_getImplicitFromRaw(i) & fourBytes; 4394 4395 /* check that it round-trips AND that all intervening ones are illegal*/ 4396 roundtrip = uprv_uca_getRawFromImplicit(current); 4397 if (roundtrip != i) { 4398 log_err("No roundtrip %08X\n", i); 4399 } 4400 if (last != 0) { 4401 for (j = last + 1; j < current; ++j) { 4402 roundtrip = uprv_uca_getRawFromImplicit(j); 4403 /* raise an error if it *doesn't* find an error*/ 4404 if (roundtrip != -1) { 4405 log_err("Fails to recognize illegal %08X\n", j); 4406 } 4407 } 4408 } 4409 /* now do other consistency checks*/ 4410 lastBottom = last & bottomByte; 4411 currentBottom = current & bottomByte; 4412 lastTop = last & topByte; 4413 currentTop = current & topByte; 4414 4415 /* print out some values for spot-checking*/ 4416 if (lastTop != currentTop || i == 0x10000 || i == 0x110000) { 4417 showImplicit(i-3); 4418 showImplicit(i-2); 4419 showImplicit(i-1); 4420 showImplicit(i); 4421 showImplicit(i+1); 4422 showImplicit(i+2); 4423 } 4424 last = current; 4425 4426 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) { 4427 log_err("No raw <-> code point roundtrip for 0x%08X\n", i); 4428 } 4429 } 4430 showImplicit(TST_UCOL_MAX_INPUT-2); 4431 showImplicit(TST_UCOL_MAX_INPUT-1); 4432 showImplicit(TST_UCOL_MAX_INPUT); 4433 ucol_close(coll); 4434 } 4435 4436 /** 4437 * Iterate through the given iterator, checking to see that all the strings 4438 * in the expected array are present. 4439 * @param expected array of strings we expect to see, or NULL 4440 * @param expectedCount number of elements of expected, or 0 4441 */ 4442 static int32_t checkUEnumeration(const char* msg, 4443 UEnumeration* iter, 4444 const char** expected, 4445 int32_t expectedCount) { 4446 UErrorCode ec = U_ZERO_ERROR; 4447 int32_t i = 0, n, j, bit; 4448 int32_t seenMask = 0; 4449 4450 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */ 4451 n = uenum_count(iter, &ec); 4452 if (!assertSuccess("count", &ec)) return -1; 4453 log_verbose("%s = [", msg); 4454 for (;; ++i) { 4455 const char* s = uenum_next(iter, NULL, &ec); 4456 if (!assertSuccess("snext", &ec) || s == NULL) break; 4457 if (i != 0) log_verbose(","); 4458 log_verbose("%s", s); 4459 /* check expected list */ 4460 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) { 4461 if ((seenMask&bit) == 0 && 4462 uprv_strcmp(s, expected[j]) == 0) { 4463 seenMask |= bit; 4464 break; 4465 } 4466 } 4467 } 4468 log_verbose("] (%d)\n", i); 4469 assertTrue("count verified", i==n); 4470 /* did we see all expected strings? */ 4471 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) { 4472 if ((seenMask&bit)!=0) { 4473 log_verbose("Ok: \"%s\" seen\n", expected[j]); 4474 } else { 4475 log_err("FAIL: \"%s\" not seen\n", expected[j]); 4476 } 4477 } 4478 return n; 4479 } 4480 4481 /** 4482 * Test new API added for separate collation tree. 4483 */ 4484 static void TestSeparateTrees(void) { 4485 UErrorCode ec = U_ZERO_ERROR; 4486 UEnumeration *e = NULL; 4487 int32_t n = -1; 4488 UBool isAvailable; 4489 char loc[256]; 4490 4491 static const char* AVAIL[] = { "en", "de" }; 4492 4493 static const char* KW[] = { "collation" }; 4494 4495 static const char* KWVAL[] = { "phonebook", "stroke" }; 4496 4497 #if !UCONFIG_NO_SERVICE 4498 e = ucol_openAvailableLocales(&ec); 4499 if (e != NULL) { 4500 assertSuccess("ucol_openAvailableLocales", &ec); 4501 assertTrue("ucol_openAvailableLocales!=0", e!=0); 4502 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL)); 4503 /* Don't need to check n because we check list */ 4504 uenum_close(e); 4505 } else { 4506 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec)); 4507 } 4508 #endif 4509 4510 e = ucol_getKeywords(&ec); 4511 if (e != NULL) { 4512 assertSuccess("ucol_getKeywords", &ec); 4513 assertTrue("ucol_getKeywords!=0", e!=0); 4514 n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW)); 4515 /* Don't need to check n because we check list */ 4516 uenum_close(e); 4517 } else { 4518 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec)); 4519 } 4520 4521 e = ucol_getKeywordValues(KW[0], &ec); 4522 if (e != NULL) { 4523 assertSuccess("ucol_getKeywordValues", &ec); 4524 assertTrue("ucol_getKeywordValues!=0", e!=0); 4525 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL)); 4526 /* Don't need to check n because we check list */ 4527 uenum_close(e); 4528 } else { 4529 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec)); 4530 } 4531 4532 /* Try setting a warning before calling ucol_getKeywordValues */ 4533 ec = U_USING_FALLBACK_WARNING; 4534 e = ucol_getKeywordValues(KW[0], &ec); 4535 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) { 4536 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0); 4537 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL)); 4538 /* Don't need to check n because we check list */ 4539 uenum_close(e); 4540 } 4541 4542 /* 4543 U_DRAFT int32_t U_EXPORT2 4544 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, 4545 const char* locale, UBool* isAvailable, 4546 UErrorCode* status); 4547 } 4548 */ 4549 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de", 4550 &isAvailable, &ec); 4551 if (assertSuccess("getFunctionalEquivalent", &ec)) { 4552 assertEquals("getFunctionalEquivalent(de)", "de", loc); 4553 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE", 4554 isAvailable == TRUE); 4555 } 4556 4557 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE", 4558 &isAvailable, &ec); 4559 if (assertSuccess("getFunctionalEquivalent", &ec)) { 4560 assertEquals("getFunctionalEquivalent(de_DE)", "de", loc); 4561 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE", 4562 isAvailable == TRUE); 4563 } 4564 } 4565 4566 /* supercedes TestJ784 */ 4567 static void TestBeforePinyin(void) { 4568 const static char rules[] = { 4569 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0" 4570 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8" 4571 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC" 4572 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2" 4573 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9" 4574 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC" 4575 }; 4576 4577 const static char *test[] = { 4578 "l\\u0101", 4579 "la", 4580 "l\\u0101n", 4581 "lan ", 4582 "l\\u0113", 4583 "le", 4584 "l\\u0113n", 4585 "len" 4586 }; 4587 4588 const static char *test2[] = { 4589 "x\\u0101", 4590 "x\\u0100", 4591 "X\\u0101", 4592 "X\\u0100", 4593 "x\\u00E1", 4594 "x\\u00C1", 4595 "X\\u00E1", 4596 "X\\u00C1", 4597 "x\\u01CE", 4598 "x\\u01CD", 4599 "X\\u01CE", 4600 "X\\u01CD", 4601 "x\\u00E0", 4602 "x\\u00C0", 4603 "X\\u00E0", 4604 "X\\u00C0", 4605 "xa", 4606 "xA", 4607 "Xa", 4608 "XA", 4609 "x\\u0101x", 4610 "x\\u0100x", 4611 "x\\u00E1x", 4612 "x\\u00C1x", 4613 "x\\u01CEx", 4614 "x\\u01CDx", 4615 "x\\u00E0x", 4616 "x\\u00C0x", 4617 "xax", 4618 "xAx" 4619 }; 4620 4621 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0])); 4622 genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0])); 4623 genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0])); 4624 genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0])); 4625 } 4626 4627 static void TestBeforeTightening(void) { 4628 static const struct { 4629 const char *rules; 4630 UErrorCode expectedStatus; 4631 } tests[] = { 4632 { "&[before 1]a<x", U_ZERO_ERROR }, 4633 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR }, 4634 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR }, 4635 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR }, 4636 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR }, 4637 { "&[before 2]a<<x",U_ZERO_ERROR }, 4638 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR }, 4639 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR }, 4640 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR }, 4641 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR }, 4642 { "&[before 3]a<<<x",U_ZERO_ERROR }, 4643 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR }, 4644 { "&[before I]a = x",U_INVALID_FORMAT_ERROR } 4645 }; 4646 4647 int32_t i = 0; 4648 4649 UErrorCode status = U_ZERO_ERROR; 4650 UChar rlz[RULE_BUFFER_LEN] = { 0 }; 4651 uint32_t rlen = 0; 4652 4653 UCollator *coll = NULL; 4654 4655 4656 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { 4657 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN); 4658 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); 4659 if(status != tests[i].expectedStatus) { 4660 log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n", 4661 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus)); 4662 } 4663 ucol_close(coll); 4664 status = U_ZERO_ERROR; 4665 } 4666 4667 } 4668 4669 #if 0 4670 &m < a 4671 &[before 1] a < x <<< X << q <<< Q < z 4672 assert: m <<< M < x <<< X << q <<< Q < z < a < n 4673 4674 &m < a 4675 &[before 2] a << x <<< X << q <<< Q < z 4676 assert: m <<< M < x <<< X << q <<< Q << a < z < n 4677 4678 &m < a 4679 &[before 3] a <<< x <<< X << q <<< Q < z 4680 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n 4681 4682 4683 &m << a 4684 &[before 1] a < x <<< X << q <<< Q < z 4685 assert: x <<< X << q <<< Q < z < m <<< M << a < n 4686 4687 &m << a 4688 &[before 2] a << x <<< X << q <<< Q < z 4689 assert: m <<< M << x <<< X << q <<< Q << a < z < n 4690 4691 &m << a 4692 &[before 3] a <<< x <<< X << q <<< Q < z 4693 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n 4694 4695 4696 &m <<< a 4697 &[before 1] a < x <<< X << q <<< Q < z 4698 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M 4699 4700 &m <<< a 4701 &[before 2] a << x <<< X << q <<< Q < z 4702 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n 4703 4704 &m <<< a 4705 &[before 3] a <<< x <<< X << q <<< Q < z 4706 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n 4707 4708 4709 &[before 1] s < x <<< X << q <<< Q < z 4710 assert: r <<< R < x <<< X << q <<< Q < z < s < n 4711 4712 &[before 2] s << x <<< X << q <<< Q < z 4713 assert: r <<< R < x <<< X << q <<< Q << s < z < n 4714 4715 &[before 3] s <<< x <<< X << q <<< Q < z 4716 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n 4717 4718 4719 &[before 1] \u24DC < x <<< X << q <<< Q < z 4720 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M 4721 4722 &[before 2] \u24DC << x <<< X << q <<< Q < z 4723 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n 4724 4725 &[before 3] \u24DC <<< x <<< X << q <<< Q < z 4726 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n 4727 #endif 4728 4729 4730 #if 0 4731 /* requires features not yet supported */ 4732 static void TestMoreBefore(void) { 4733 static const struct { 4734 const char* rules; 4735 const char* order[16]; 4736 int32_t size; 4737 } tests[] = { 4738 { "&m < a &[before 1] a < x <<< X << q <<< Q < z", 4739 { "m","M","x","X","q","Q","z","a","n" }, 9}, 4740 { "&m < a &[before 2] a << x <<< X << q <<< Q < z", 4741 { "m","M","x","X","q","Q","a","z","n" }, 9}, 4742 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z", 4743 { "m","M","x","X","a","q","Q","z","n" }, 9}, 4744 { "&m << a &[before 1] a < x <<< X << q <<< Q < z", 4745 { "x","X","q","Q","z","m","M","a","n" }, 9}, 4746 { "&m << a &[before 2] a << x <<< X << q <<< Q < z", 4747 { "m","M","x","X","q","Q","a","z","n" }, 9}, 4748 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z", 4749 { "m","M","x","X","a","q","Q","z","n" }, 9}, 4750 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z", 4751 { "x","X","q","Q","z","n","m","a","M" }, 9}, 4752 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z", 4753 { "x","X","q","Q","m","a","M","z","n" }, 9}, 4754 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z", 4755 { "m","x","X","a","M","q","Q","z","n" }, 9}, 4756 { "&[before 1] s < x <<< X << q <<< Q < z", 4757 { "r","R","x","X","q","Q","z","s","n" }, 9}, 4758 { "&[before 2] s << x <<< X << q <<< Q < z", 4759 { "r","R","x","X","q","Q","s","z","n" }, 9}, 4760 { "&[before 3] s <<< x <<< X << q <<< Q < z", 4761 { "r","R","x","X","s","q","Q","z","n" }, 9}, 4762 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z", 4763 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9}, 4764 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z", 4765 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9}, 4766 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z", 4767 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9} 4768 }; 4769 4770 int32_t i = 0; 4771 4772 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { 4773 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size); 4774 } 4775 } 4776 #endif 4777 4778 static void TestTailorNULL( void ) { 4779 const static char* rule = "&a <<< '\\u0000'"; 4780 UErrorCode status = U_ZERO_ERROR; 4781 UChar rlz[RULE_BUFFER_LEN] = { 0 }; 4782 uint32_t rlen = 0; 4783 UChar a = 1, null = 0; 4784 UCollationResult res = UCOL_EQUAL; 4785 4786 UCollator *coll = NULL; 4787 4788 4789 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN); 4790 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); 4791 4792 if(U_FAILURE(status)) { 4793 log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status)); 4794 } else { 4795 res = ucol_strcoll(coll, &a, 1, &null, 1); 4796 4797 if(res != UCOL_LESS) { 4798 log_err("NULL was not tailored properly!\n"); 4799 } 4800 } 4801 4802 ucol_close(coll); 4803 } 4804 4805 static void 4806 TestUpperFirstQuaternary(void) 4807 { 4808 const char* tests[] = { "B", "b", "Bb", "bB" }; 4809 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST }; 4810 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST }; 4811 genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0])); 4812 } 4813 4814 static void 4815 TestJ4960(void) 4816 { 4817 const char* tests[] = { "\\u00e2T", "aT" }; 4818 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL }; 4819 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON }; 4820 const char* tests2[] = { "a", "A" }; 4821 const char* rule = "&[first tertiary ignorable]=A=a"; 4822 UColAttribute att2[] = { UCOL_CASE_LEVEL }; 4823 UColAttributeValue attVals2[] = { UCOL_ON }; 4824 /* Test whether we correctly ignore primary ignorables on case level when */ 4825 /* we have only primary & case level */ 4826 genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL); 4827 /* Test whether ICU4J will make case level for sortkeys that have primary strength */ 4828 /* and case level */ 4829 genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0])); 4830 /* Test whether completely ignorable letters have case level info (they shouldn't) */ 4831 genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL); 4832 } 4833 4834 static void 4835 TestJ5223(void) 4836 { 4837 static const char *test = "this is a test string"; 4838 UChar ustr[256]; 4839 int32_t ustr_length = u_unescape(test, ustr, 256); 4840 unsigned char sortkey[256]; 4841 int32_t sortkey_length; 4842 UErrorCode status = U_ZERO_ERROR; 4843 static UCollator *coll = NULL; 4844 coll = ucol_open("root", &status); 4845 if(U_FAILURE(status)) { 4846 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); 4847 return; 4848 } 4849 ucol_setStrength(coll, UCOL_PRIMARY); 4850 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); 4851 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 4852 if (U_FAILURE(status)) { 4853 log_err("Failed setting atributes\n"); 4854 return; 4855 } 4856 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0); 4857 if (sortkey_length > 256) return; 4858 4859 /* we mark the position where the null byte should be written in advance */ 4860 sortkey[sortkey_length-1] = 0xAA; 4861 4862 /* we set the buffer size one byte higher than needed */ 4863 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey, 4864 sortkey_length+1); 4865 4866 /* no error occurs (for me) */ 4867 if (sortkey[sortkey_length-1] == 0xAA) { 4868 log_err("Hit bug at first try\n"); 4869 } 4870 4871 /* we mark the position where the null byte should be written again */ 4872 sortkey[sortkey_length-1] = 0xAA; 4873 4874 /* this time we set the buffer size to the exact amount needed */ 4875 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey, 4876 sortkey_length); 4877 4878 /* now the trailing null byte is not written */ 4879 if (sortkey[sortkey_length-1] == 0xAA) { 4880 log_err("Hit bug at second try\n"); 4881 } 4882 4883 ucol_close(coll); 4884 } 4885 4886 /* Regression test for Thai partial sort key problem */ 4887 static void 4888 TestJ5232(void) 4889 { 4890 const static char *test[] = { 4891 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21", 4892 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21" 4893 }; 4894 4895 genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0])); 4896 } 4897 4898 static void 4899 TestJ5367(void) 4900 { 4901 const static char *test[] = { "a", "y" }; 4902 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a"; 4903 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0])); 4904 } 4905 4906 static void 4907 TestVI5913(void) 4908 { 4909 UErrorCode status = U_ZERO_ERROR; 4910 int32_t i, j; 4911 UCollator *coll =NULL; 4912 uint8_t resColl[100], expColl[100]; 4913 int32_t rLen, tLen, ruleLen, sLen, kLen; 4914 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypogegrammeni*/ 4915 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/ 4916 UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circumflex.*/ 4917 static const UChar tData[][20]={ 4918 {0x1EAC, 0}, 4919 {0x0041, 0x0323, 0x0302, 0}, 4920 {0x1EA0, 0x0302, 0}, 4921 {0x00C2, 0x0323, 0}, 4922 {0x1ED8, 0}, /* O with dot and circumflex */ 4923 {0x1ECC, 0x0302, 0}, 4924 {0x1EB7, 0}, 4925 {0x1EA1, 0x0306, 0}, 4926 }; 4927 static const UChar tailorData[][20]={ 4928 {0x1FA2, 0}, /* Omega with 3 combining marks */ 4929 {0x03C9, 0x0313, 0x0300, 0x0345, 0}, 4930 {0x1FF3, 0x0313, 0x0300, 0}, 4931 {0x1F60, 0x0300, 0x0345, 0}, 4932 {0x1F62, 0x0345, 0}, 4933 {0x1FA0, 0x0300, 0}, 4934 }; 4935 static const UChar tailorData2[][20]={ 4936 {0x1E63, 0x030C, 0}, /* s with dot below + caron */ 4937 {0x0073, 0x0323, 0x030C, 0}, 4938 {0x0073, 0x030C, 0x0323, 0}, 4939 }; 4940 static const UChar tailorData3[][20]={ 4941 {0x007a, 0}, /* z */ 4942 {0x0061, 0x0065, 0}, /* a + e */ 4943 {0x0061, 0x00ea, 0}, /* a + e with circumflex */ 4944 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */ 4945 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */ 4946 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */ 4947 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */ 4948 {0x00EA, 0}, /* e with circumflex */ 4949 }; 4950 4951 /* Test Vietnamese sort. */ 4952 coll = ucol_open("vi", &status); 4953 if(U_FAILURE(status)) { 4954 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status)); 4955 return; 4956 } 4957 log_verbose("\n\nVI collation:"); 4958 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) { 4959 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n"); 4960 } 4961 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) { 4962 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n"); 4963 } 4964 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) { 4965 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n"); 4966 } 4967 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) { 4968 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n"); 4969 } 4970 4971 for (j=0; j<8; j++) { 4972 tLen = u_strlen(tData[j]); 4973 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen); 4974 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100); 4975 for(i = 0; i<rLen; i++) { 4976 log_verbose(" %02X", resColl[i]); 4977 } 4978 } 4979 4980 ucol_close(coll); 4981 4982 /* Test Romanian sort. */ 4983 coll = ucol_open("ro", &status); 4984 log_verbose("\n\nRO collation:"); 4985 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) { 4986 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n"); 4987 } 4988 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) { 4989 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n"); 4990 } 4991 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) { 4992 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n"); 4993 } 4994 4995 for (j=4; j<8; j++) { 4996 tLen = u_strlen(tData[j]); 4997 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen); 4998 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100); 4999 for(i = 0; i<rLen; i++) { 5000 log_verbose(" %02X", resColl[i]); 5001 } 5002 } 5003 ucol_close(coll); 5004 5005 /* Test the precomposed Greek character with 3 combining marks. */ 5006 log_verbose("\n\nTailoring test: Greek character with 3 combining marks"); 5007 ruleLen = u_strlen(rule); 5008 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5009 if (U_FAILURE(status)) { 5010 log_err("ucol_openRules failed with %s\n", u_errorName(status)); 5011 return; 5012 } 5013 sLen = u_strlen(tailorData[0]); 5014 for (j=1; j<6; j++) { 5015 tLen = u_strlen(tailorData[j]); 5016 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) { 5017 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]); 5018 } 5019 } 5020 /* Test getSortKey. */ 5021 tLen = u_strlen(tailorData[0]); 5022 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100); 5023 for (j=0; j<6; j++) { 5024 tLen = u_strlen(tailorData[j]); 5025 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100); 5026 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) { 5027 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 5028 for(i = 0; i<rLen; i++) { 5029 log_err(" %02X", resColl[i]); 5030 } 5031 } 5032 } 5033 ucol_close(coll); 5034 5035 log_verbose("\n\nTailoring test for s with caron:"); 5036 ruleLen = u_strlen(rule2); 5037 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5038 tLen = u_strlen(tailorData2[0]); 5039 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100); 5040 for (j=1; j<3; j++) { 5041 tLen = u_strlen(tailorData2[j]); 5042 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100); 5043 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) { 5044 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 5045 for(i = 0; i<rLen; i++) { 5046 log_err(" %02X", resColl[i]); 5047 } 5048 } 5049 } 5050 ucol_close(coll); 5051 5052 log_verbose("\n\nTailoring test for &z< ae with circumflex:"); 5053 ruleLen = u_strlen(rule3); 5054 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5055 tLen = u_strlen(tailorData3[3]); 5056 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100); 5057 for (j=4; j<6; j++) { 5058 tLen = u_strlen(tailorData3[j]); 5059 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100); 5060 5061 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) { 5062 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 5063 for(i = 0; i<rLen; i++) { 5064 log_err(" %02X", resColl[i]); 5065 } 5066 } 5067 5068 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 5069 for(i = 0; i<rLen; i++) { 5070 log_verbose(" %02X", resColl[i]); 5071 } 5072 } 5073 ucol_close(coll); 5074 } 5075 5076 static void 5077 TestTailor6179(void) 5078 { 5079 UErrorCode status = U_ZERO_ERROR; 5080 int32_t i; 5081 UCollator *coll =NULL; 5082 uint8_t resColl[100]; 5083 int32_t rLen, tLen, ruleLen; 5084 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */ 5085 UChar rule1[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79, 5086 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20, 5087 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20, 5088 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0}; 5089 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */ 5090 UChar rule2[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61, 5091 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C, 5092 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E, 5093 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C, 5094 0x3C,0x3C,0x20,0x62,0}; 5095 5096 UChar tData1[][20]={ 5097 {0x61, 0}, 5098 {0x62, 0}, 5099 { 0xFDD0,0x009E, 0} 5100 }; 5101 UChar tData2[][20]={ 5102 {0x61, 0}, 5103 {0x62, 0}, 5104 { 0xFDD0,0x009E, 0} 5105 }; 5106 5107 /* 5108 * These values from FractionalUCA.txt will change, 5109 * and need to be updated here. 5110 */ 5111 uint8_t firstPrimaryIgnCE[6]={1, 87, 1, 5, 1, 0}; 5112 uint8_t lastPrimaryIgnCE[6]={1, 0xE3, 0xC9, 1, 5, 0}; 5113 uint8_t firstSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0}; 5114 uint8_t lastSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0}; 5115 5116 /* Test [Last Primary ignorable] */ 5117 5118 log_verbose("\n\nTailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b "); 5119 ruleLen = u_strlen(rule1); 5120 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5121 if (U_FAILURE(status)) { 5122 log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status)); 5123 return; 5124 } 5125 tLen = u_strlen(tData1[0]); 5126 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100); 5127 if (uprv_memcmp(resColl, lastPrimaryIgnCE, uprv_min(rLen,6)) < 0) { 5128 log_err("\n Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen); 5129 for(i = 0; i<rLen; i++) { 5130 log_err(" %02X", resColl[i]); 5131 } 5132 } 5133 tLen = u_strlen(tData1[1]); 5134 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100); 5135 if (uprv_memcmp(resColl, firstPrimaryIgnCE, uprv_min(rLen, 6)) < 0) { 5136 log_err("\n Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen); 5137 for(i = 0; i<rLen; i++) { 5138 log_err(" %02X", resColl[i]); 5139 } 5140 } 5141 ucol_close(coll); 5142 5143 5144 /* Test [Last Secondary ignorable] */ 5145 log_verbose("\n\nTailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b "); 5146 ruleLen = u_strlen(rule1); 5147 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5148 if (U_FAILURE(status)) { 5149 log_err("Tailoring test: &[last primary ignorable] failed!"); 5150 return; 5151 } 5152 tLen = u_strlen(tData2[0]); 5153 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100); 5154 log_verbose("\n Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen); 5155 for(i = 0; i<rLen; i++) { 5156 log_verbose(" %02X", resColl[i]); 5157 } 5158 if (uprv_memcmp(resColl, lastSecondaryIgnCE, uprv_min(rLen, 3)) < 0) { 5159 log_err("\n Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen); 5160 for(i = 0; i<rLen; i++) { 5161 log_err(" %02X", resColl[i]); 5162 } 5163 } 5164 tLen = u_strlen(tData2[1]); 5165 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100); 5166 log_verbose("\n Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen); 5167 for(i = 0; i<rLen; i++) { 5168 log_verbose(" %02X", resColl[i]); 5169 } 5170 if (uprv_memcmp(resColl, firstSecondaryIgnCE, uprv_min(rLen, 4)) < 0) { 5171 log_err("\n Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen); 5172 for(i = 0; i<rLen; i++) { 5173 log_err(" %02X", resColl[i]); 5174 } 5175 } 5176 ucol_close(coll); 5177 } 5178 5179 static void 5180 TestUCAPrecontext(void) 5181 { 5182 UErrorCode status = U_ZERO_ERROR; 5183 int32_t i, j; 5184 UCollator *coll =NULL; 5185 uint8_t resColl[100], prevColl[100]; 5186 int32_t rLen, tLen, ruleLen; 5187 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */ 5188 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0}; 5189 /* & l middle-dot << a a is an expansion. */ 5190 5191 UChar tData1[][20]={ 5192 { 0xb7, 0}, /* standalone middle dot(0xb7) */ 5193 { 0x387, 0}, /* standalone middle dot(0x387) */ 5194 { 0x61, 0}, /* a */ 5195 { 0x6C, 0}, /* l */ 5196 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */ 5197 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */ 5198 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */ 5199 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */ 5200 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */ 5201 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */ 5202 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */ 5203 }; 5204 5205 log_verbose("\n\nEN collation:"); 5206 coll = ucol_open("en", &status); 5207 if (U_FAILURE(status)) { 5208 log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status)); 5209 return; 5210 } 5211 for (j=0; j<11; j++) { 5212 tLen = u_strlen(tData1[j]); 5213 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 5214 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 5215 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 5216 j, tData1[j]); 5217 } 5218 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 5219 for(i = 0; i<rLen; i++) { 5220 log_verbose(" %02X", resColl[i]); 5221 } 5222 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 5223 } 5224 ucol_close(coll); 5225 5226 5227 log_verbose("\n\nJA collation:"); 5228 coll = ucol_open("ja", &status); 5229 if (U_FAILURE(status)) { 5230 log_err("Tailoring test: &z <<a|- failed!"); 5231 return; 5232 } 5233 for (j=0; j<11; j++) { 5234 tLen = u_strlen(tData1[j]); 5235 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 5236 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 5237 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 5238 j, tData1[j]); 5239 } 5240 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 5241 for(i = 0; i<rLen; i++) { 5242 log_verbose(" %02X", resColl[i]); 5243 } 5244 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 5245 } 5246 ucol_close(coll); 5247 5248 5249 log_verbose("\n\nTailoring test: & middle dot < a "); 5250 ruleLen = u_strlen(rule1); 5251 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5252 if (U_FAILURE(status)) { 5253 log_err("Tailoring test: & middle dot < a failed!"); 5254 return; 5255 } 5256 for (j=0; j<11; j++) { 5257 tLen = u_strlen(tData1[j]); 5258 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 5259 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 5260 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 5261 j, tData1[j]); 5262 } 5263 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 5264 for(i = 0; i<rLen; i++) { 5265 log_verbose(" %02X", resColl[i]); 5266 } 5267 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 5268 } 5269 ucol_close(coll); 5270 5271 5272 log_verbose("\n\nTailoring test: & l middle-dot << a "); 5273 ruleLen = u_strlen(rule2); 5274 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5275 if (U_FAILURE(status)) { 5276 log_err("Tailoring test: & l middle-dot << a failed!"); 5277 return; 5278 } 5279 for (j=0; j<11; j++) { 5280 tLen = u_strlen(tData1[j]); 5281 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 5282 if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 5283 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 5284 j, tData1[j]); 5285 } 5286 if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) { 5287 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.", 5288 j, tData1[j]); 5289 } 5290 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 5291 for(i = 0; i<rLen; i++) { 5292 log_verbose(" %02X", resColl[i]); 5293 } 5294 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 5295 } 5296 ucol_close(coll); 5297 } 5298 5299 static void 5300 TestOutOfBuffer5468(void) 5301 { 5302 static const char *test = "\\u4e00"; 5303 UChar ustr[256]; 5304 int32_t ustr_length = u_unescape(test, ustr, 256); 5305 unsigned char shortKeyBuf[1]; 5306 int32_t sortkey_length; 5307 UErrorCode status = U_ZERO_ERROR; 5308 static UCollator *coll = NULL; 5309 5310 coll = ucol_open("root", &status); 5311 if(U_FAILURE(status)) { 5312 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); 5313 return; 5314 } 5315 ucol_setStrength(coll, UCOL_PRIMARY); 5316 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); 5317 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 5318 if (U_FAILURE(status)) { 5319 log_err("Failed setting atributes\n"); 5320 return; 5321 } 5322 5323 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf)); 5324 if (sortkey_length != 4) { 5325 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length); 5326 } 5327 log_verbose("length of sortKey is %d", sortkey_length); 5328 ucol_close(coll); 5329 } 5330 5331 #define TSKC_DATA_SIZE 5 5332 #define TSKC_BUF_SIZE 50 5333 static void 5334 TestSortKeyConsistency(void) 5335 { 5336 UErrorCode icuRC = U_ZERO_ERROR; 5337 UCollator* ucol; 5338 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD}; 5339 5340 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE]; 5341 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE]; 5342 int32_t i, j, i2; 5343 5344 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC); 5345 if (U_FAILURE(icuRC)) 5346 { 5347 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC)); 5348 return; 5349 } 5350 5351 for (i = 0; i < TSKC_DATA_SIZE; i++) 5352 { 5353 UCharIterator uiter; 5354 uint32_t state[2] = { 0, 0 }; 5355 int32_t dataLen = i+1; 5356 for (j=0; j<TSKC_BUF_SIZE; j++) 5357 bufFull[i][j] = bufPart[i][j] = 0; 5358 5359 /* Full sort key */ 5360 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE); 5361 5362 /* Partial sort key */ 5363 uiter_setString(&uiter, data, dataLen); 5364 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC); 5365 if (U_FAILURE(icuRC)) 5366 { 5367 log_err("ucol_nextSortKeyPart failed\n"); 5368 ucol_close(ucol); 5369 return; 5370 } 5371 5372 for (i2=0; i2<i; i2++) 5373 { 5374 UBool fullMatch = TRUE; 5375 UBool partMatch = TRUE; 5376 for (j=0; j<TSKC_BUF_SIZE; j++) 5377 { 5378 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]); 5379 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]); 5380 } 5381 if (fullMatch != partMatch) { 5382 log_err(fullMatch ? "full key was consistent, but partial key changed\n" 5383 : "partial key was consistent, but full key changed\n"); 5384 ucol_close(ucol); 5385 return; 5386 } 5387 } 5388 } 5389 5390 /*=============================================*/ 5391 ucol_close(ucol); 5392 } 5393 5394 /* ticket: 6101 */ 5395 static void TestCroatianSortKey(void) { 5396 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3"; 5397 UErrorCode status = U_ZERO_ERROR; 5398 UCollator *ucol; 5399 UCharIterator iter; 5400 5401 static const UChar text[] = { 0x0044, 0xD81A }; 5402 5403 size_t length = sizeof(text)/sizeof(*text); 5404 5405 uint8_t textSortKey[32]; 5406 size_t lenSortKey = 32; 5407 size_t actualSortKeyLen; 5408 uint32_t uStateInfo[2] = { 0, 0 }; 5409 5410 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status); 5411 if (U_FAILURE(status)) { 5412 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status)); 5413 return; 5414 } 5415 5416 uiter_setString(&iter, text, length); 5417 5418 actualSortKeyLen = ucol_nextSortKeyPart( 5419 ucol, &iter, (uint32_t*)uStateInfo, 5420 textSortKey, lenSortKey, &status 5421 ); 5422 5423 if (actualSortKeyLen == lenSortKey) { 5424 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n"); 5425 } 5426 5427 ucol_close(ucol); 5428 } 5429 5430 /* ticket: 6140 */ 5431 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since 5432 * they are both Hiragana and Katakana 5433 */ 5434 #define SORTKEYLEN 50 5435 static void TestHiragana(void) { 5436 UErrorCode status = U_ZERO_ERROR; 5437 UCollator* ucol; 5438 UCollationResult strcollresult; 5439 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */ 5440 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 }; 5441 int32_t data1Len = sizeof(data1)/sizeof(*data1); 5442 int32_t data2Len = sizeof(data2)/sizeof(*data2); 5443 int32_t i, j; 5444 uint8_t sortKey1[SORTKEYLEN]; 5445 uint8_t sortKey2[SORTKEYLEN]; 5446 5447 UCharIterator uiter1; 5448 UCharIterator uiter2; 5449 uint32_t state1[2] = { 0, 0 }; 5450 uint32_t state2[2] = { 0, 0 }; 5451 int32_t keySize1; 5452 int32_t keySize2; 5453 5454 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL, 5455 &status); 5456 if (U_FAILURE(status)) { 5457 log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status)); 5458 return; 5459 } 5460 5461 /* Start of full sort keys */ 5462 /* Full sort key1 */ 5463 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN); 5464 /* Full sort key2 */ 5465 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN); 5466 if (keySize1 == keySize2) { 5467 for (i = 0; i < keySize1; i++) { 5468 if (sortKey1[i] != sortKey2[i]) { 5469 log_err("Full sort keys are different. Should be equal."); 5470 } 5471 } 5472 } else { 5473 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2); 5474 } 5475 /* End of full sort keys */ 5476 5477 /* Start of partial sort keys */ 5478 /* Partial sort key1 */ 5479 uiter_setString(&uiter1, data1, data1Len); 5480 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status); 5481 /* Partial sort key2 */ 5482 uiter_setString(&uiter2, data2, data2Len); 5483 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status); 5484 if (U_SUCCESS(status) && keySize1 == keySize2) { 5485 for (j = 0; j < keySize1; j++) { 5486 if (sortKey1[j] != sortKey2[j]) { 5487 log_err("Partial sort keys are different. Should be equal"); 5488 } 5489 } 5490 } else { 5491 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2); 5492 } 5493 /* End of partial sort keys */ 5494 5495 /* Start of strcoll */ 5496 /* Use ucol_strcoll() to determine ordering */ 5497 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len); 5498 if (strcollresult != UCOL_EQUAL) { 5499 log_err("Result from ucol_strcoll() should be UCOL_EQUAL."); 5500 } 5501 5502 ucol_close(ucol); 5503 } 5504 5505 /* Convenient struct for running collation tests */ 5506 typedef struct { 5507 const UChar source[MAX_TOKEN_LEN]; /* String on left */ 5508 const UChar target[MAX_TOKEN_LEN]; /* String on right */ 5509 UCollationResult result; /* -1, 0 or +1, depending on collation */ 5510 } OneTestCase; 5511 5512 /* 5513 * Utility function to test one collation test case. 5514 * @param testcases Array of test cases. 5515 * @param n_testcases Size of the array testcases. 5516 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats. 5517 * @param n_rules Size of the array str_rules. 5518 */ 5519 static void doTestOneTestCase(const OneTestCase testcases[], 5520 int n_testcases, 5521 const char* str_rules[], 5522 int n_rules) 5523 { 5524 int rule_no, testcase_no; 5525 UChar rule[500]; 5526 int32_t length = 0; 5527 UErrorCode status = U_ZERO_ERROR; 5528 UParseError parse_error; 5529 UCollator *myCollation; 5530 5531 for (rule_no = 0; rule_no < n_rules; ++rule_no) { 5532 5533 length = u_unescape(str_rules[rule_no], rule, 500); 5534 if (length == 0) { 5535 log_err("ERROR: The rule cannot be unescaped: %s\n"); 5536 return; 5537 } 5538 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status); 5539 if(U_FAILURE(status)){ 5540 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 5541 return; 5542 } 5543 log_verbose("Testing the <<* syntax\n"); 5544 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 5545 ucol_setStrength(myCollation, UCOL_TERTIARY); 5546 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) { 5547 doTest(myCollation, 5548 testcases[testcase_no].source, 5549 testcases[testcase_no].target, 5550 testcases[testcase_no].result 5551 ); 5552 } 5553 ucol_close(myCollation); 5554 } 5555 } 5556 5557 const static OneTestCase rangeTestcases[] = { 5558 { {0x0061}, {0x0062}, UCOL_LESS }, /* "a" < "b" */ 5559 { {0x0062}, {0x0063}, UCOL_LESS }, /* "b" < "c" */ 5560 { {0x0061}, {0x0063}, UCOL_LESS }, /* "a" < "c" */ 5561 5562 { {0x0062}, {0x006b}, UCOL_LESS }, /* "b" << "k" */ 5563 { {0x006b}, {0x006c}, UCOL_LESS }, /* "k" << "l" */ 5564 { {0x0062}, {0x006c}, UCOL_LESS }, /* "b" << "l" */ 5565 { {0x0061}, {0x006c}, UCOL_LESS }, /* "a" < "l" */ 5566 { {0x0061}, {0x006d}, UCOL_LESS }, /* "a" < "m" */ 5567 5568 { {0x0079}, {0x006d}, UCOL_LESS }, /* "y" < "f" */ 5569 { {0x0079}, {0x0067}, UCOL_LESS }, /* "y" < "g" */ 5570 { {0x0061}, {0x0068}, UCOL_LESS }, /* "y" < "h" */ 5571 { {0x0061}, {0x0065}, UCOL_LESS }, /* "g" < "e" */ 5572 5573 { {0x0061}, {0x0031}, UCOL_EQUAL }, /* "a" = "1" */ 5574 { {0x0061}, {0x0032}, UCOL_EQUAL }, /* "a" = "2" */ 5575 { {0x0061}, {0x0033}, UCOL_EQUAL }, /* "a" = "3" */ 5576 { {0x0061}, {0x0066}, UCOL_LESS }, /* "a" < "f" */ 5577 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS }, /* "la" < "123" */ 5578 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL }, /* "aaa" = "123" */ 5579 { {0x0062}, {0x007a}, UCOL_LESS }, /* "b" < "z" */ 5580 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS }, /* "azm" = "2yc" */ 5581 }; 5582 5583 static int nRangeTestcases = LEN(rangeTestcases); 5584 5585 const static OneTestCase rangeTestcasesSupplemental[] = { 5586 { {0xfffe}, {0xffff}, UCOL_LESS }, /* U+FFFE < U+FFFF */ 5587 { {0xffff}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFF < U+10000 */ 5588 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+10000 < U+10001 */ 5589 { {0xfffe}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+FFFE < U+10001 */ 5590 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */ 5591 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */ 5592 { {0xfffe}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+FFFE < U+10001 */ 5593 }; 5594 5595 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental); 5596 5597 const static OneTestCase rangeTestcasesQwerty[] = { 5598 { {0x0071}, {0x0077}, UCOL_LESS }, /* "q" < "w" */ 5599 { {0x0077}, {0x0065}, UCOL_LESS }, /* "w" < "e" */ 5600 5601 { {0x0079}, {0x0075}, UCOL_LESS }, /* "y" < "u" */ 5602 { {0x0071}, {0x0075}, UCOL_LESS }, /* "q" << "u" */ 5603 5604 { {0x0074}, {0x0069}, UCOL_LESS }, /* "t" << "i" */ 5605 { {0x006f}, {0x0070}, UCOL_LESS }, /* "o" << "p" */ 5606 5607 { {0x0079}, {0x0065}, UCOL_LESS }, /* "y" < "e" */ 5608 { {0x0069}, {0x0075}, UCOL_LESS }, /* "i" < "u" */ 5609 5610 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, 5611 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS }, /* "quest" < "were" */ 5612 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b}, 5613 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS }, /* "quack" < "quest" */ 5614 }; 5615 5616 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty); 5617 5618 static void TestSameStrengthList(void) 5619 { 5620 const char* strRules[] = { 5621 /* Normal */ 5622 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3", 5623 5624 /* Lists */ 5625 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123", 5626 }; 5627 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); 5628 } 5629 5630 static void TestSameStrengthListQuoted(void) 5631 { 5632 const char* strRules[] = { 5633 /* Lists with quoted characters */ 5634 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123", 5635 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123", 5636 5637 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033", 5638 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'", 5639 5640 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033", 5641 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'", 5642 }; 5643 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); 5644 } 5645 5646 static void TestSameStrengthListSupplemental(void) 5647 { 5648 const char* strRules[] = { 5649 "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002", 5650 "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02", 5651 "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002", 5652 "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02", 5653 }; 5654 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules)); 5655 } 5656 5657 static void TestSameStrengthListQwerty(void) 5658 { 5659 const char* strRules[] = { 5660 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */ 5661 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */ 5662 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064", 5663 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064", 5664 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064", 5665 5666 /* Quoted characters also will work if two quoted characters are not consecutive. */ 5667 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064", 5668 5669 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */ 5670 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/ 5671 5672 }; 5673 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules)); 5674 } 5675 5676 static void TestSameStrengthListQuotedQwerty(void) 5677 { 5678 const char* strRules[] = { 5679 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */ 5680 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */ 5681 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */ 5682 5683 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */ 5684 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */ 5685 }; 5686 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules)); 5687 } 5688 5689 static void TestSameStrengthListRanges(void) 5690 { 5691 const char* strRules[] = { 5692 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3", 5693 }; 5694 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); 5695 } 5696 5697 static void TestSameStrengthListSupplementalRanges(void) 5698 { 5699 const char* strRules[] = { 5700 "&\\ufffe<*\\uffff-\\U00010002", 5701 }; 5702 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules)); 5703 } 5704 5705 static void TestSpecialCharacters(void) 5706 { 5707 const char* strRules[] = { 5708 /* Normal */ 5709 "&';'<'+'<','<'-'<'&'<'*'", 5710 5711 /* List */ 5712 "&';'<*'+,-&*'", 5713 5714 /* Range */ 5715 "&';'<*'+'-'-&*'", 5716 }; 5717 5718 const static OneTestCase specialCharacterStrings[] = { 5719 { {0x003b}, {0x002b}, UCOL_LESS }, /* ; < + */ 5720 { {0x002b}, {0x002c}, UCOL_LESS }, /* + < , */ 5721 { {0x002c}, {0x002d}, UCOL_LESS }, /* , < - */ 5722 { {0x002d}, {0x0026}, UCOL_LESS }, /* - < & */ 5723 }; 5724 doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules)); 5725 } 5726 5727 static void TestPrivateUseCharacters(void) 5728 { 5729 const char* strRules[] = { 5730 /* Normal */ 5731 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'", 5732 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d", 5733 }; 5734 5735 const static OneTestCase privateUseCharacterStrings[] = { 5736 { {0x5ea7}, {0xe2d8}, UCOL_LESS }, 5737 { {0xe2d8}, {0xe2d9}, UCOL_LESS }, 5738 { {0xe2d9}, {0xe2da}, UCOL_LESS }, 5739 { {0xe2da}, {0xe2db}, UCOL_LESS }, 5740 { {0xe2db}, {0xe2dc}, UCOL_LESS }, 5741 { {0xe2dc}, {0x4e8d}, UCOL_LESS }, 5742 }; 5743 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 5744 } 5745 5746 static void TestPrivateUseCharactersInList(void) 5747 { 5748 const char* strRules[] = { 5749 /* List */ 5750 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'", 5751 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */ 5752 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d", 5753 }; 5754 5755 const static OneTestCase privateUseCharacterStrings[] = { 5756 { {0x5ea7}, {0xe2d8}, UCOL_LESS }, 5757 { {0xe2d8}, {0xe2d9}, UCOL_LESS }, 5758 { {0xe2d9}, {0xe2da}, UCOL_LESS }, 5759 { {0xe2da}, {0xe2db}, UCOL_LESS }, 5760 { {0xe2db}, {0xe2dc}, UCOL_LESS }, 5761 { {0xe2dc}, {0x4e8d}, UCOL_LESS }, 5762 }; 5763 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 5764 } 5765 5766 static void TestPrivateUseCharactersInRange(void) 5767 { 5768 const char* strRules[] = { 5769 /* Range */ 5770 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'", 5771 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d", 5772 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */ 5773 }; 5774 5775 const static OneTestCase privateUseCharacterStrings[] = { 5776 { {0x5ea7}, {0xe2d8}, UCOL_LESS }, 5777 { {0xe2d8}, {0xe2d9}, UCOL_LESS }, 5778 { {0xe2d9}, {0xe2da}, UCOL_LESS }, 5779 { {0xe2da}, {0xe2db}, UCOL_LESS }, 5780 { {0xe2db}, {0xe2dc}, UCOL_LESS }, 5781 { {0xe2dc}, {0x4e8d}, UCOL_LESS }, 5782 }; 5783 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 5784 } 5785 5786 static void TestInvalidListsAndRanges(void) 5787 { 5788 const char* invalidRules[] = { 5789 /* Range not in starred expression */ 5790 "&\\ufffe<\\uffff-\\U00010002", 5791 5792 /* Range without start */ 5793 "&a<*-c", 5794 5795 /* Range without end */ 5796 "&a<*b-", 5797 5798 /* More than one hyphen */ 5799 "&a<*b-g-l", 5800 5801 /* Range in the wrong order */ 5802 "&a<*k-b", 5803 5804 }; 5805 5806 UChar rule[500]; 5807 UErrorCode status = U_ZERO_ERROR; 5808 UParseError parse_error; 5809 int n_rules = LEN(invalidRules); 5810 int rule_no; 5811 int length; 5812 UCollator *myCollation; 5813 5814 for (rule_no = 0; rule_no < n_rules; ++rule_no) { 5815 5816 length = u_unescape(invalidRules[rule_no], rule, 500); 5817 if (length == 0) { 5818 log_err("ERROR: The rule cannot be unescaped: %s\n"); 5819 return; 5820 } 5821 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status); 5822 if(!U_FAILURE(status)){ 5823 log_err("ERROR: Could not cause a failure as expected: \n"); 5824 } 5825 status = U_ZERO_ERROR; 5826 } 5827 } 5828 5829 /* 5830 * This test ensures that characters placed before a character in a different script have the same lead byte 5831 * in their collation key before and after script reordering. 5832 */ 5833 static void TestBeforeRuleWithScriptReordering(void) 5834 { 5835 UParseError error; 5836 UErrorCode status = U_ZERO_ERROR; 5837 UCollator *myCollation; 5838 char srules[500] = "&[before 1]\\u03b1 < \\u0e01"; 5839 UChar rules[500]; 5840 uint32_t rulesLength = 0; 5841 int32_t reorderCodes[1] = {USCRIPT_GREEK}; 5842 UCollationResult collResult; 5843 5844 uint8_t baseKey[256]; 5845 uint32_t baseKeyLength; 5846 uint8_t beforeKey[256]; 5847 uint32_t beforeKeyLength; 5848 5849 UChar base[] = { 0x03b1 }; /* base */ 5850 int32_t baseLen = sizeof(base)/sizeof(*base); 5851 5852 UChar before[] = { 0x0e01 }; /* ko kai */ 5853 int32_t beforeLen = sizeof(before)/sizeof(*before); 5854 5855 /*UChar *data[] = { before, base }; 5856 genericRulesStarter(srules, data, 2);*/ 5857 5858 log_verbose("Testing the &[before 1] rule with [reorder grek]\n"); 5859 5860 5861 /* build collator */ 5862 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n"); 5863 5864 rulesLength = u_unescape(srules, rules, LEN(rules)); 5865 myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status); 5866 if(U_FAILURE(status)) { 5867 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 5868 return; 5869 } 5870 5871 /* check collation results - before rule applied but not script reordering */ 5872 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen); 5873 if (collResult != UCOL_GREATER) { 5874 log_err("Collation result not correct before script reordering = %d\n", collResult); 5875 } 5876 5877 /* check the lead byte of the collation keys before script reordering */ 5878 baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256); 5879 beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256); 5880 if (baseKey[0] != beforeKey[0]) { 5881 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]); 5882 } 5883 5884 /* reorder the scripts */ 5885 ucol_setReorderCodes(myCollation, reorderCodes, 1, &status); 5886 if(U_FAILURE(status)) { 5887 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status)); 5888 return; 5889 } 5890 5891 /* check collation results - before rule applied and after script reordering */ 5892 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen); 5893 if (collResult != UCOL_GREATER) { 5894 log_err("Collation result not correct after script reordering = %d\n", collResult); 5895 } 5896 5897 /* check the lead byte of the collation keys after script reordering */ 5898 ucol_getSortKey(myCollation, base, baseLen, baseKey, 256); 5899 ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256); 5900 if (baseKey[0] != beforeKey[0]) { 5901 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]); 5902 } 5903 5904 ucol_close(myCollation); 5905 } 5906 5907 /* 5908 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering. 5909 */ 5910 static void TestNonLeadBytesDuringCollationReordering(void) 5911 { 5912 UErrorCode status = U_ZERO_ERROR; 5913 UCollator *myCollation; 5914 int32_t reorderCodes[1] = {USCRIPT_GREEK}; 5915 UCollationResult collResult; 5916 5917 uint8_t baseKey[256]; 5918 uint32_t baseKeyLength; 5919 uint8_t reorderKey[256]; 5920 uint32_t reorderKeyLength; 5921 5922 UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 }; 5923 5924 int i; 5925 5926 5927 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n"); 5928 5929 /* build collator tertiary */ 5930 myCollation = ucol_open("", &status); 5931 ucol_setStrength(myCollation, UCOL_TERTIARY); 5932 if(U_FAILURE(status)) { 5933 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 5934 return; 5935 } 5936 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256); 5937 5938 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 5939 if(U_FAILURE(status)) { 5940 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 5941 return; 5942 } 5943 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256); 5944 5945 if (baseKeyLength != reorderKeyLength) { 5946 log_err("Key lengths not the same during reordering.\n", collResult); 5947 return; 5948 } 5949 5950 for (i = 1; i < baseKeyLength; i++) { 5951 if (baseKey[i] != reorderKey[i]) { 5952 log_err("Collation key bytes not the same at position %d.\n", i); 5953 return; 5954 } 5955 } 5956 ucol_close(myCollation); 5957 5958 /* build collator quaternary */ 5959 myCollation = ucol_open("", &status); 5960 ucol_setStrength(myCollation, UCOL_QUATERNARY); 5961 if(U_FAILURE(status)) { 5962 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 5963 return; 5964 } 5965 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256); 5966 5967 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 5968 if(U_FAILURE(status)) { 5969 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 5970 return; 5971 } 5972 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256); 5973 5974 if (baseKeyLength != reorderKeyLength) { 5975 log_err("Key lengths not the same during reordering.\n", collResult); 5976 return; 5977 } 5978 5979 for (i = 1; i < baseKeyLength; i++) { 5980 if (baseKey[i] != reorderKey[i]) { 5981 log_err("Collation key bytes not the same at position %d.\n", i); 5982 return; 5983 } 5984 } 5985 ucol_close(myCollation); 5986 } 5987 5988 /* 5989 * Test reordering API. 5990 */ 5991 static void TestReorderingAPI(void) 5992 { 5993 UErrorCode status = U_ZERO_ERROR; 5994 UCollator *myCollation; 5995 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; 5996 UCollationResult collResult; 5997 int32_t retrievedReorderCodesLength; 5998 UChar greekString[] = { 0x03b1 }; 5999 UChar punctuationString[] = { 0x203e }; 6000 6001 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n"); 6002 6003 /* build collator tertiary */ 6004 myCollation = ucol_open("", &status); 6005 ucol_setStrength(myCollation, UCOL_TERTIARY); 6006 if(U_FAILURE(status)) { 6007 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 6008 return; 6009 } 6010 6011 /* set the reorderding */ 6012 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 6013 if (U_FAILURE(status)) { 6014 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 6015 return; 6016 } 6017 6018 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status); 6019 if (status != U_BUFFER_OVERFLOW_ERROR) { 6020 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status)); 6021 return; 6022 } 6023 status = U_ZERO_ERROR; 6024 if (retrievedReorderCodesLength != LEN(reorderCodes)) { 6025 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); 6026 return; 6027 } 6028 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString)); 6029 if (collResult != UCOL_LESS) { 6030 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n"); 6031 return; 6032 } 6033 6034 /* clear the reordering */ 6035 ucol_setReorderCodes(myCollation, NULL, 0, &status); 6036 if (U_FAILURE(status)) { 6037 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status)); 6038 return; 6039 } 6040 6041 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status); 6042 if (retrievedReorderCodesLength != 0) { 6043 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0); 6044 return; 6045 } 6046 6047 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString)); 6048 if (collResult != UCOL_GREATER) { 6049 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n"); 6050 return; 6051 } 6052 6053 ucol_close(myCollation); 6054 } 6055 6056 /* 6057 * Utility function to test one collation reordering test case. 6058 * @param testcases Array of test cases. 6059 * @param n_testcases Size of the array testcases. 6060 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats. 6061 * @param n_rules Size of the array str_rules. 6062 */ 6063 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen) 6064 { 6065 int testCaseNum; 6066 UErrorCode status = U_ZERO_ERROR; 6067 UCollator *myCollation; 6068 6069 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) { 6070 myCollation = ucol_open("", &status); 6071 if (U_FAILURE(status)) { 6072 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 6073 return; 6074 } 6075 ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status); 6076 if(U_FAILURE(status)) { 6077 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status)); 6078 return; 6079 } 6080 6081 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) { 6082 doTest(myCollation, 6083 testCases[testCaseNum].source, 6084 testCases[testCaseNum].target, 6085 testCases[testCaseNum].result 6086 ); 6087 } 6088 ucol_close(myCollation); 6089 } 6090 } 6091 6092 static void TestGreekFirstReorder(void) 6093 { 6094 const char* strRules[] = { 6095 "[reorder Grek]" 6096 }; 6097 6098 const int32_t apiRules[] = { 6099 USCRIPT_GREEK 6100 }; 6101 6102 const static OneTestCase privateUseCharacterStrings[] = { 6103 { {0x0391}, {0x0391}, UCOL_EQUAL }, 6104 { {0x0041}, {0x0391}, UCOL_GREATER }, 6105 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER }, 6106 { {0x0060}, {0x0391}, UCOL_LESS }, 6107 { {0x0391}, {0xe2dc}, UCOL_LESS }, 6108 { {0x0391}, {0x0060}, UCOL_GREATER }, 6109 }; 6110 6111 /* Test rules creation */ 6112 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 6113 6114 /* Test collation reordering API */ 6115 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules)); 6116 } 6117 6118 static void TestGreekLastReorder(void) 6119 { 6120 const char* strRules[] = { 6121 "[reorder Zzzz Grek]" 6122 }; 6123 6124 const int32_t apiRules[] = { 6125 USCRIPT_UNKNOWN, USCRIPT_GREEK 6126 }; 6127 6128 const static OneTestCase privateUseCharacterStrings[] = { 6129 { {0x0391}, {0x0391}, UCOL_EQUAL }, 6130 { {0x0041}, {0x0391}, UCOL_LESS }, 6131 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS }, 6132 { {0x0060}, {0x0391}, UCOL_LESS }, 6133 { {0x0391}, {0xe2dc}, UCOL_GREATER }, 6134 }; 6135 6136 /* Test rules creation */ 6137 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 6138 6139 /* Test collation reordering API */ 6140 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules)); 6141 } 6142 6143 static void TestNonScriptReorder(void) 6144 { 6145 const char* strRules[] = { 6146 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]" 6147 }; 6148 6149 const int32_t apiRules[] = { 6150 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, 6151 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN, 6152 UCOL_REORDER_CODE_CURRENCY 6153 }; 6154 6155 const static OneTestCase privateUseCharacterStrings[] = { 6156 { {0x0391}, {0x0041}, UCOL_LESS }, 6157 { {0x0041}, {0x0391}, UCOL_GREATER }, 6158 { {0x0060}, {0x0041}, UCOL_LESS }, 6159 { {0x0060}, {0x0391}, UCOL_GREATER }, 6160 { {0x0024}, {0x0041}, UCOL_GREATER }, 6161 }; 6162 6163 /* Test rules creation */ 6164 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 6165 6166 /* Test collation reordering API */ 6167 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules)); 6168 } 6169 6170 static void TestHaniReorder(void) 6171 { 6172 const char* strRules[] = { 6173 "[reorder Hani]" 6174 }; 6175 const int32_t apiRules[] = { 6176 USCRIPT_HAN 6177 }; 6178 6179 const static OneTestCase privateUseCharacterStrings[] = { 6180 { {0x4e00}, {0x0041}, UCOL_LESS }, 6181 { {0x4e00}, {0x0060}, UCOL_GREATER }, 6182 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS }, 6183 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER }, 6184 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS }, 6185 { {0xfa27}, {0x0041}, UCOL_LESS }, 6186 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS }, 6187 }; 6188 6189 /* Test rules creation */ 6190 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 6191 6192 /* Test collation reordering API */ 6193 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules)); 6194 } 6195 6196 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b) 6197 { 6198 for (; *a == *b; ++a, ++b) { 6199 if (*a == 0) { 6200 return 0; 6201 } 6202 } 6203 return (*a < *b ? -1 : 1); 6204 } 6205 6206 static void TestImport(void) 6207 { 6208 UCollator* vicoll; 6209 UCollator* escoll; 6210 UCollator* viescoll; 6211 UCollator* importviescoll; 6212 UParseError error; 6213 UErrorCode status = U_ZERO_ERROR; 6214 UChar* virules; 6215 int32_t viruleslength; 6216 UChar* esrules; 6217 int32_t esruleslength; 6218 UChar* viesrules; 6219 int32_t viesruleslength; 6220 char srules[500] = "[import vi][import es]"; 6221 UChar rules[500]; 6222 uint32_t length = 0; 6223 int32_t itemCount; 6224 int32_t i, k; 6225 UChar32 start; 6226 UChar32 end; 6227 UChar str[500]; 6228 int32_t strLength; 6229 6230 uint8_t sk1[500]; 6231 uint8_t sk2[500]; 6232 6233 UBool b; 6234 USet* tailoredSet; 6235 USet* importTailoredSet; 6236 6237 6238 vicoll = ucol_open("vi", &status); 6239 if(U_FAILURE(status)){ 6240 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status)); 6241 return; 6242 } 6243 6244 virules = (UChar*) ucol_getRules(vicoll, &viruleslength); 6245 escoll = ucol_open("es", &status); 6246 esrules = (UChar*) ucol_getRules(escoll, &esruleslength); 6247 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*)); 6248 viesrules[0] = 0; 6249 u_strcat(viesrules, virules); 6250 u_strcat(viesrules, esrules); 6251 viesruleslength = viruleslength + esruleslength; 6252 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status); 6253 6254 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */ 6255 length = u_unescape(srules, rules, 500); 6256 importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status); 6257 if(U_FAILURE(status)){ 6258 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 6259 return; 6260 } 6261 6262 tailoredSet = ucol_getTailoredSet(viescoll, &status); 6263 importTailoredSet = ucol_getTailoredSet(importviescoll, &status); 6264 6265 if(!uset_equals(tailoredSet, importTailoredSet)){ 6266 log_err("Tailored sets not equal"); 6267 } 6268 6269 uset_close(importTailoredSet); 6270 6271 itemCount = uset_getItemCount(tailoredSet); 6272 6273 for( i = 0; i < itemCount; i++){ 6274 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status); 6275 if(strLength < 2){ 6276 for (; start <= end; start++){ 6277 k = 0; 6278 U16_APPEND(str, k, 500, start, b); 6279 ucol_getSortKey(viescoll, str, 1, sk1, 500); 6280 ucol_getSortKey(importviescoll, str, 1, sk2, 500); 6281 if(compare_uint8_t_arrays(sk1, sk2) != 0){ 6282 log_err("Sort key for %s not equal\n", str); 6283 break; 6284 } 6285 } 6286 }else{ 6287 ucol_getSortKey(viescoll, str, strLength, sk1, 500); 6288 ucol_getSortKey(importviescoll, str, strLength, sk2, 500); 6289 if(compare_uint8_t_arrays(sk1, sk2) != 0){ 6290 log_err("ZZSort key for %s not equal\n", str); 6291 break; 6292 } 6293 6294 } 6295 } 6296 6297 uset_close(tailoredSet); 6298 6299 uprv_free(viesrules); 6300 6301 ucol_close(vicoll); 6302 ucol_close(escoll); 6303 ucol_close(viescoll); 6304 ucol_close(importviescoll); 6305 } 6306 6307 static void TestImportWithType(void) 6308 { 6309 UCollator* vicoll; 6310 UCollator* decoll; 6311 UCollator* videcoll; 6312 UCollator* importvidecoll; 6313 UParseError error; 6314 UErrorCode status = U_ZERO_ERROR; 6315 const UChar* virules; 6316 int32_t viruleslength; 6317 const UChar* derules; 6318 int32_t deruleslength; 6319 UChar* viderules; 6320 int32_t videruleslength; 6321 const char srules[500] = "[import vi][import de-u-co-phonebk]"; 6322 UChar rules[500]; 6323 uint32_t length = 0; 6324 int32_t itemCount; 6325 int32_t i, k; 6326 UChar32 start; 6327 UChar32 end; 6328 UChar str[500]; 6329 int32_t strLength; 6330 6331 uint8_t sk1[500]; 6332 uint8_t sk2[500]; 6333 6334 USet* tailoredSet; 6335 USet* importTailoredSet; 6336 6337 vicoll = ucol_open("vi", &status); 6338 if(U_FAILURE(status)){ 6339 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 6340 return; 6341 } 6342 virules = ucol_getRules(vicoll, &viruleslength); 6343 /* decoll = ucol_open("de@collation=phonebook", &status); */ 6344 decoll = ucol_open("de-u-co-phonebk", &status); 6345 if(U_FAILURE(status)){ 6346 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 6347 return; 6348 } 6349 6350 6351 derules = ucol_getRules(decoll, &deruleslength); 6352 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*)); 6353 viderules[0] = 0; 6354 u_strcat(viderules, virules); 6355 u_strcat(viderules, derules); 6356 videruleslength = viruleslength + deruleslength; 6357 videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status); 6358 6359 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */ 6360 length = u_unescape(srules, rules, 500); 6361 importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status); 6362 if(U_FAILURE(status)){ 6363 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 6364 return; 6365 } 6366 6367 tailoredSet = ucol_getTailoredSet(videcoll, &status); 6368 importTailoredSet = ucol_getTailoredSet(importvidecoll, &status); 6369 6370 if(!uset_equals(tailoredSet, importTailoredSet)){ 6371 log_err("Tailored sets not equal"); 6372 } 6373 6374 uset_close(importTailoredSet); 6375 6376 itemCount = uset_getItemCount(tailoredSet); 6377 6378 for( i = 0; i < itemCount; i++){ 6379 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status); 6380 if(strLength < 2){ 6381 for (; start <= end; start++){ 6382 k = 0; 6383 U16_APPEND_UNSAFE(str, k, start); 6384 ucol_getSortKey(videcoll, str, 1, sk1, 500); 6385 ucol_getSortKey(importvidecoll, str, 1, sk2, 500); 6386 if(compare_uint8_t_arrays(sk1, sk2) != 0){ 6387 log_err("Sort key for %s not equal\n", str); 6388 break; 6389 } 6390 } 6391 }else{ 6392 ucol_getSortKey(videcoll, str, strLength, sk1, 500); 6393 ucol_getSortKey(importvidecoll, str, strLength, sk2, 500); 6394 if(compare_uint8_t_arrays(sk1, sk2) != 0){ 6395 log_err("Sort key for %s not equal\n", str); 6396 break; 6397 } 6398 6399 } 6400 } 6401 6402 uset_close(tailoredSet); 6403 6404 uprv_free(viderules); 6405 6406 ucol_close(videcoll); 6407 ucol_close(importvidecoll); 6408 ucol_close(vicoll); 6409 ucol_close(decoll); 6410 6411 } 6412 6413 6414 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x) 6415 6416 void addMiscCollTest(TestNode** root) 6417 { 6418 TEST(TestRuleOptions); 6419 TEST(TestBeforePrefixFailure); 6420 TEST(TestContractionClosure); 6421 TEST(TestPrefixCompose); 6422 TEST(TestStrCollIdenticalPrefix); 6423 TEST(TestPrefix); 6424 TEST(TestNewJapanese); 6425 /*TEST(TestLimitations);*/ 6426 TEST(TestNonChars); 6427 TEST(TestExtremeCompression); 6428 TEST(TestSurrogates); 6429 TEST(TestVariableTopSetting); 6430 TEST(TestBocsuCoverage); 6431 TEST(TestCyrillicTailoring); 6432 TEST(TestCase); 6433 TEST(IncompleteCntTest); 6434 TEST(BlackBirdTest); 6435 TEST(FunkyATest); 6436 TEST(BillFairmanTest); 6437 TEST(RamsRulesTest); 6438 TEST(IsTailoredTest); 6439 TEST(TestCollations); 6440 TEST(TestChMove); 6441 TEST(TestImplicitTailoring); 6442 TEST(TestFCDProblem); 6443 TEST(TestEmptyRule); 6444 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */ 6445 TEST(TestJ815); 6446 /*TEST(TestJ831);*/ /* we changed lv locale */ 6447 TEST(TestBefore); 6448 TEST(TestRedundantRules); 6449 TEST(TestExpansionSyntax); 6450 TEST(TestHangulTailoring); 6451 TEST(TestUCARules); 6452 TEST(TestIncrementalNormalize); 6453 TEST(TestComposeDecompose); 6454 TEST(TestCompressOverlap); 6455 TEST(TestContraction); 6456 TEST(TestExpansion); 6457 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */ 6458 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */ 6459 TEST(TestOptimize); 6460 TEST(TestSuppressContractions); 6461 TEST(Alexis2); 6462 TEST(TestHebrewUCA); 6463 TEST(TestPartialSortKeyTermination); 6464 TEST(TestSettings); 6465 TEST(TestEquals); 6466 TEST(TestJ2726); 6467 TEST(NullRule); 6468 TEST(TestNumericCollation); 6469 TEST(TestTibetanConformance); 6470 TEST(TestPinyinProblem); 6471 TEST(TestImplicitGeneration); 6472 TEST(TestSeparateTrees); 6473 TEST(TestBeforePinyin); 6474 TEST(TestBeforeTightening); 6475 /*TEST(TestMoreBefore);*/ 6476 TEST(TestTailorNULL); 6477 TEST(TestUpperFirstQuaternary); 6478 TEST(TestJ4960); 6479 TEST(TestJ5223); 6480 TEST(TestJ5232); 6481 TEST(TestJ5367); 6482 TEST(TestHiragana); 6483 TEST(TestSortKeyConsistency); 6484 TEST(TestVI5913); /* VI, RO tailored rules */ 6485 TEST(TestCroatianSortKey); 6486 TEST(TestTailor6179); 6487 TEST(TestUCAPrecontext); 6488 TEST(TestOutOfBuffer5468); 6489 TEST(TestSameStrengthList); 6490 6491 TEST(TestSameStrengthListQuoted); 6492 TEST(TestSameStrengthListSupplemental); 6493 TEST(TestSameStrengthListQwerty); 6494 TEST(TestSameStrengthListQuotedQwerty); 6495 TEST(TestSameStrengthListRanges); 6496 TEST(TestSameStrengthListSupplementalRanges); 6497 TEST(TestSpecialCharacters); 6498 TEST(TestPrivateUseCharacters); 6499 TEST(TestPrivateUseCharactersInList); 6500 TEST(TestPrivateUseCharactersInRange); 6501 TEST(TestInvalidListsAndRanges); 6502 TEST(TestImport); 6503 TEST(TestImportWithType); 6504 6505 TEST(TestBeforeRuleWithScriptReordering); 6506 TEST(TestNonLeadBytesDuringCollationReordering); 6507 TEST(TestReorderingAPI); 6508 TEST(TestGreekFirstReorder); 6509 TEST(TestGreekLastReorder); 6510 TEST(TestNonScriptReorder); 6511 TEST(TestHaniReorder); 6512 } 6513 6514 #endif /* #if !UCONFIG_NO_COLLATION */ 6515