1 2 /******************************************************************** 3 * COPYRIGHT: 4 * Copyright (c) 2001-2012, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ********************************************************************/ 7 /******************************************************************************* 8 * 9 * File cmsccoll.C 10 * 11 *******************************************************************************/ 12 /** 13 * These are the tests specific to ICU 1.8 and above, that I didn't know where 14 * to fit. 15 */ 16 17 #include <stdio.h> 18 19 #include "unicode/utypes.h" 20 21 #if !UCONFIG_NO_COLLATION 22 23 #include "unicode/ucol.h" 24 #include "unicode/ucoleitr.h" 25 #include "unicode/uloc.h" 26 #include "cintltst.h" 27 #include "ccolltst.h" 28 #include "callcoll.h" 29 #include "unicode/ustring.h" 30 #include "string.h" 31 #include "ucol_imp.h" 32 #include "ucol_tok.h" 33 #include "cmemory.h" 34 #include "cstring.h" 35 #include "uassert.h" 36 #include "unicode/parseerr.h" 37 #include "unicode/ucnv.h" 38 #include "unicode/ures.h" 39 #include "unicode/uscript.h" 40 #include "unicode/utf16.h" 41 #include "uparse.h" 42 #include "putilimp.h" 43 44 45 #define LEN(a) (sizeof(a)/sizeof(a[0])) 46 47 #define MAX_TOKEN_LEN 16 48 49 typedef UCollationResult tst_strcoll(void *collator, const int object, 50 const UChar *source, const int sLen, 51 const UChar *target, const int tLen); 52 53 54 55 const static char cnt1[][10] = { 56 57 "AA", 58 "AC", 59 "AZ", 60 "AQ", 61 "AB", 62 "ABZ", 63 "ABQ", 64 "Z", 65 "ABC", 66 "Q", 67 "B" 68 }; 69 70 const static char cnt2[][10] = { 71 "DA", 72 "DAD", 73 "DAZ", 74 "MAR", 75 "Z", 76 "DAVIS", 77 "MARK", 78 "DAV", 79 "DAVI" 80 }; 81 82 static void IncompleteCntTest(void) 83 { 84 UErrorCode status = U_ZERO_ERROR; 85 UChar temp[90]; 86 UChar t1[90]; 87 UChar t2[90]; 88 89 UCollator *coll = NULL; 90 uint32_t i = 0, j = 0; 91 uint32_t size = 0; 92 93 u_uastrcpy(temp, " & Z < ABC < Q < B"); 94 95 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status); 96 97 if(U_SUCCESS(status)) { 98 size = sizeof(cnt1)/sizeof(cnt1[0]); 99 for(i = 0; i < size-1; i++) { 100 for(j = i+1; j < size; j++) { 101 UCollationElements *iter; 102 u_uastrcpy(t1, cnt1[i]); 103 u_uastrcpy(t2, cnt1[j]); 104 doTest(coll, t1, t2, UCOL_LESS); 105 /* synwee : added collation element iterator test */ 106 iter = ucol_openElements(coll, t2, u_strlen(t2), &status); 107 if (U_FAILURE(status)) { 108 log_err("Creation of iterator failed\n"); 109 break; 110 } 111 backAndForth(iter); 112 ucol_closeElements(iter); 113 } 114 } 115 } 116 117 ucol_close(coll); 118 119 120 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV"); 121 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); 122 123 if(U_SUCCESS(status)) { 124 size = sizeof(cnt2)/sizeof(cnt2[0]); 125 for(i = 0; i < size-1; i++) { 126 for(j = i+1; j < size; j++) { 127 UCollationElements *iter; 128 u_uastrcpy(t1, cnt2[i]); 129 u_uastrcpy(t2, cnt2[j]); 130 doTest(coll, t1, t2, UCOL_LESS); 131 132 /* synwee : added collation element iterator test */ 133 iter = ucol_openElements(coll, t2, u_strlen(t2), &status); 134 if (U_FAILURE(status)) { 135 log_err("Creation of iterator failed\n"); 136 break; 137 } 138 backAndForth(iter); 139 ucol_closeElements(iter); 140 } 141 } 142 } 143 144 ucol_close(coll); 145 146 147 } 148 149 const static char shifted[][20] = { 150 "black bird", 151 "black-bird", 152 "blackbird", 153 "black Bird", 154 "black-Bird", 155 "blackBird", 156 "black birds", 157 "black-birds", 158 "blackbirds" 159 }; 160 161 const static UCollationResult shiftedTert[] = { 162 UCOL_EQUAL, 163 UCOL_EQUAL, 164 UCOL_EQUAL, 165 UCOL_LESS, 166 UCOL_EQUAL, 167 UCOL_EQUAL, 168 UCOL_LESS, 169 UCOL_EQUAL, 170 UCOL_EQUAL 171 }; 172 173 const static char nonignorable[][20] = { 174 "black bird", 175 "black Bird", 176 "black birds", 177 "black-bird", 178 "black-Bird", 179 "black-birds", 180 "blackbird", 181 "blackBird", 182 "blackbirds" 183 }; 184 185 static void BlackBirdTest(void) { 186 UErrorCode status = U_ZERO_ERROR; 187 UChar t1[90]; 188 UChar t2[90]; 189 190 uint32_t i = 0, j = 0; 191 uint32_t size = 0; 192 UCollator *coll = ucol_open("en_US", &status); 193 194 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status); 195 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status); 196 197 if(U_SUCCESS(status)) { 198 size = sizeof(nonignorable)/sizeof(nonignorable[0]); 199 for(i = 0; i < size-1; i++) { 200 for(j = i+1; j < size; j++) { 201 u_uastrcpy(t1, nonignorable[i]); 202 u_uastrcpy(t2, nonignorable[j]); 203 doTest(coll, t1, t2, UCOL_LESS); 204 } 205 } 206 } 207 208 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 209 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status); 210 211 if(U_SUCCESS(status)) { 212 size = sizeof(shifted)/sizeof(shifted[0]); 213 for(i = 0; i < size-1; i++) { 214 for(j = i+1; j < size; j++) { 215 u_uastrcpy(t1, shifted[i]); 216 u_uastrcpy(t2, shifted[j]); 217 doTest(coll, t1, t2, UCOL_LESS); 218 } 219 } 220 } 221 222 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status); 223 if(U_SUCCESS(status)) { 224 size = sizeof(shifted)/sizeof(shifted[0]); 225 for(i = 1; i < size; i++) { 226 u_uastrcpy(t1, shifted[i-1]); 227 u_uastrcpy(t2, shifted[i]); 228 doTest(coll, t1, t2, shiftedTert[i]); 229 } 230 } 231 232 ucol_close(coll); 233 } 234 235 const static UChar testSourceCases[][MAX_TOKEN_LEN] = { 236 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000}, 237 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000}, 238 {0x0041/*'A'*/, 0x0300, 0x0000}, 239 {0x00C0, 0x0301, 0x0000}, 240 /* this would work with forced normalization */ 241 {0x00C0, 0x0316, 0x0000} 242 }; 243 244 const static UChar testTargetCases[][MAX_TOKEN_LEN] = { 245 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000}, 246 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}, 247 {0x00C0, 0}, 248 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000}, 249 /* this would work with forced normalization */ 250 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000} 251 }; 252 253 const static UCollationResult results[] = { 254 UCOL_GREATER, 255 UCOL_EQUAL, 256 UCOL_EQUAL, 257 UCOL_GREATER, 258 UCOL_EQUAL 259 }; 260 261 static void FunkyATest(void) 262 { 263 264 int32_t i; 265 UErrorCode status = U_ZERO_ERROR; 266 UCollator *myCollation; 267 myCollation = ucol_open("en_US", &status); 268 if(U_FAILURE(status)){ 269 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 270 return; 271 } 272 log_verbose("Testing some A letters, for some reason\n"); 273 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 274 ucol_setStrength(myCollation, UCOL_TERTIARY); 275 for (i = 0; i < 4 ; i++) 276 { 277 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); 278 } 279 ucol_close(myCollation); 280 } 281 282 UColAttributeValue caseFirst[] = { 283 UCOL_OFF, 284 UCOL_LOWER_FIRST, 285 UCOL_UPPER_FIRST 286 }; 287 288 289 UColAttributeValue alternateHandling[] = { 290 UCOL_NON_IGNORABLE, 291 UCOL_SHIFTED 292 }; 293 294 UColAttributeValue caseLevel[] = { 295 UCOL_OFF, 296 UCOL_ON 297 }; 298 299 UColAttributeValue strengths[] = { 300 UCOL_PRIMARY, 301 UCOL_SECONDARY, 302 UCOL_TERTIARY, 303 UCOL_QUATERNARY, 304 UCOL_IDENTICAL 305 }; 306 307 #if 0 308 static const char * strengthsC[] = { 309 "UCOL_PRIMARY", 310 "UCOL_SECONDARY", 311 "UCOL_TERTIARY", 312 "UCOL_QUATERNARY", 313 "UCOL_IDENTICAL" 314 }; 315 316 static const char * caseFirstC[] = { 317 "UCOL_OFF", 318 "UCOL_LOWER_FIRST", 319 "UCOL_UPPER_FIRST" 320 }; 321 322 323 static const char * alternateHandlingC[] = { 324 "UCOL_NON_IGNORABLE", 325 "UCOL_SHIFTED" 326 }; 327 328 static const char * caseLevelC[] = { 329 "UCOL_OFF", 330 "UCOL_ON" 331 }; 332 333 /* not used currently - does not test only prints */ 334 static void PrintMarkDavis(void) 335 { 336 UErrorCode status = U_ZERO_ERROR; 337 UChar m[256]; 338 uint8_t sortkey[256]; 339 UCollator *coll = ucol_open("en_US", &status); 340 uint32_t h,i,j,k, sortkeysize; 341 uint32_t sizem = 0; 342 char buffer[512]; 343 uint32_t len = 512; 344 345 log_verbose("PrintMarkDavis"); 346 347 u_uastrcpy(m, "Mark Davis"); 348 sizem = u_strlen(m); 349 350 351 m[1] = 0xe4; 352 353 for(i = 0; i<sizem; i++) { 354 fprintf(stderr, "\\u%04X ", m[i]); 355 } 356 fprintf(stderr, "\n"); 357 358 for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) { 359 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status); 360 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]); 361 362 for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) { 363 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status); 364 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]); 365 366 for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) { 367 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status); 368 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]); 369 370 for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) { 371 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status); 372 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256); 373 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]); 374 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len)); 375 } 376 377 } 378 379 } 380 381 } 382 } 383 #endif 384 385 static void BillFairmanTest(void) { 386 /* 387 ** check for actual locale via ICU resource bundles 388 ** 389 ** lp points to the original locale ("fr_FR_....") 390 */ 391 392 UResourceBundle *lr,*cr; 393 UErrorCode lec = U_ZERO_ERROR; 394 const char *lp = "fr_FR_you_ll_never_find_this_locale"; 395 396 log_verbose("BillFairmanTest\n"); 397 398 lr = ures_open(NULL,lp,&lec); 399 if (lr) { 400 cr = ures_getByKey(lr,"collations",0,&lec); 401 if (cr) { 402 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec); 403 if (lp) { 404 if (U_SUCCESS(lec)) { 405 if(strcmp(lp, "fr") != 0) { 406 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp); 407 } 408 } 409 } 410 ures_close(cr); 411 } 412 ures_close(lr); 413 } 414 } 415 416 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){ 417 UChar source[256] = { '\0'}; 418 UChar target[256] = { '\0'}; 419 UChar preP = 0x31a3; 420 UChar preQ = 0x310d; 421 /* 422 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491; 423 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413; 424 */ 425 /*log_verbose("Testing primary\n");*/ 426 427 doTest(col, p, q, UCOL_LESS); 428 /* 429 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q)); 430 431 if(result!=UCOL_LESS){ 432 aescstrdup(p,utfSource,256); 433 aescstrdup(q,utfTarget,256); 434 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget); 435 } 436 */ 437 source[0] = preP; 438 u_strcpy(source+1,p); 439 target[0] = preQ; 440 u_strcpy(target+1,q); 441 doTest(col, source, target, UCOL_LESS); 442 /* 443 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget); 444 */ 445 } 446 447 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){ 448 UChar source[256] = { '\0'}; 449 UChar target[256] = { '\0'}; 450 451 /*log_verbose("Testing secondary\n");*/ 452 453 doTest(col, p, q, UCOL_LESS); 454 /* 455 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget); 456 */ 457 source[0] = 0x0053; 458 u_strcpy(source+1,p); 459 target[0]= 0x0073; 460 u_strcpy(target+1,q); 461 462 doTest(col, source, target, UCOL_LESS); 463 /* 464 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget); 465 */ 466 467 468 u_strcpy(source,p); 469 source[u_strlen(p)] = 0x62; 470 source[u_strlen(p)+1] = 0; 471 472 473 u_strcpy(target,q); 474 target[u_strlen(q)] = 0x61; 475 target[u_strlen(q)+1] = 0; 476 477 doTest(col, source, target, UCOL_GREATER); 478 479 /* 480 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget); 481 */ 482 } 483 484 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){ 485 UChar source[256] = { '\0'}; 486 UChar target[256] = { '\0'}; 487 488 /*log_verbose("Testing tertiary\n");*/ 489 490 doTest(col, p, q, UCOL_LESS); 491 /* 492 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget); 493 */ 494 source[0] = 0x0020; 495 u_strcpy(source+1,p); 496 target[0]= 0x002D; 497 u_strcpy(target+1,q); 498 499 doTest(col, source, target, UCOL_LESS); 500 /* 501 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget); 502 */ 503 504 u_strcpy(source,p); 505 source[u_strlen(p)] = 0xE0; 506 source[u_strlen(p)+1] = 0; 507 508 u_strcpy(target,q); 509 target[u_strlen(q)] = 0x61; 510 target[u_strlen(q)+1] = 0; 511 512 doTest(col, source, target, UCOL_GREATER); 513 514 /* 515 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget); 516 */ 517 } 518 519 static void testEquality(UCollator* col, const UChar* p,const UChar* q){ 520 /* 521 UChar source[256] = { '\0'}; 522 UChar target[256] = { '\0'}; 523 */ 524 525 doTest(col, p, q, UCOL_EQUAL); 526 /* 527 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget); 528 */ 529 } 530 531 static void testCollator(UCollator *coll, UErrorCode *status) { 532 const UChar *rules = NULL, *current = NULL; 533 int32_t ruleLen = 0; 534 uint32_t strength = 0; 535 uint32_t chOffset = 0; uint32_t chLen = 0; 536 uint32_t exOffset = 0; uint32_t exLen = 0; 537 uint32_t prefixOffset = 0; uint32_t prefixLen = 0; 538 uint32_t firstEx = 0; 539 /* uint32_t rExpsLen = 0; */ 540 uint32_t firstLen = 0; 541 UBool varT = FALSE; UBool top_ = TRUE; 542 uint16_t specs = 0; 543 UBool startOfRules = TRUE; 544 UBool lastReset = FALSE; 545 UBool before = FALSE; 546 uint32_t beforeStrength = 0; 547 UColTokenParser src; 548 UColOptionSet opts; 549 550 UChar first[256]; 551 UChar second[256]; 552 UChar tempB[256]; 553 uint32_t tempLen; 554 UChar *rulesCopy = NULL; 555 UParseError parseError; 556 557 uprv_memset(&src, 0, sizeof(UColTokenParser)); 558 559 src.opts = &opts; 560 561 rules = ucol_getRules(coll, &ruleLen); 562 if(U_SUCCESS(*status) && ruleLen > 0) { 563 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); 564 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); 565 src.current = src.source = rulesCopy; 566 src.end = rulesCopy+ruleLen; 567 src.extraCurrent = src.end; 568 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 569 *first = *second = 0; 570 571 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to 572 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ 573 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) { 574 strength = src.parsedToken.strength; 575 chOffset = src.parsedToken.charsOffset; 576 chLen = src.parsedToken.charsLen; 577 exOffset = src.parsedToken.extensionOffset; 578 exLen = src.parsedToken.extensionLen; 579 prefixOffset = src.parsedToken.prefixOffset; 580 prefixLen = src.parsedToken.prefixLen; 581 specs = src.parsedToken.flags; 582 583 startOfRules = FALSE; 584 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); 585 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); 586 if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */ 587 second[0] = 0; 588 } else { 589 u_strncpy(second,src.source+chOffset, chLen); 590 second[chLen] = 0; 591 592 if(exLen > 0 && firstEx == 0) { 593 u_strncat(first, src.source+exOffset, exLen); 594 first[firstLen+exLen] = 0; 595 } 596 597 if(lastReset == TRUE && prefixLen != 0) { 598 u_strncpy(first+prefixLen, first, firstLen); 599 u_strncpy(first, src.source+prefixOffset, prefixLen); 600 first[firstLen+prefixLen] = 0; 601 firstLen = firstLen+prefixLen; 602 } 603 604 if(before == TRUE) { /* swap first and second */ 605 u_strcpy(tempB, first); 606 u_strcpy(first, second); 607 u_strcpy(second, tempB); 608 609 tempLen = firstLen; 610 firstLen = chLen; 611 chLen = tempLen; 612 613 tempLen = firstEx; 614 firstEx = exLen; 615 exLen = tempLen; 616 if(beforeStrength < strength) { 617 strength = beforeStrength; 618 } 619 } 620 } 621 lastReset = FALSE; 622 623 switch(strength){ 624 case UCOL_IDENTICAL: 625 testEquality(coll,first,second); 626 break; 627 case UCOL_PRIMARY: 628 testPrimary(coll,first,second); 629 break; 630 case UCOL_SECONDARY: 631 testSecondary(coll,first,second); 632 break; 633 case UCOL_TERTIARY: 634 testTertiary(coll,first,second); 635 break; 636 case UCOL_TOK_RESET: 637 lastReset = TRUE; 638 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0); 639 if(before) { 640 beforeStrength = (specs & UCOL_TOK_BEFORE)-1; 641 } 642 break; 643 default: 644 break; 645 } 646 647 if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */ 648 before = FALSE; 649 } else { 650 firstLen = chLen; 651 firstEx = exLen; 652 u_strcpy(first, second); 653 } 654 } 655 uprv_free(src.source); 656 } 657 } 658 659 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) { 660 UCollator *UCA = (UCollator *)collator; 661 return ucol_strcoll(UCA, source, sLen, target, tLen); 662 } 663 664 /* 665 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) { 666 #if U_PLATFORM_HAS_WIN32_API 667 LCID lcid = (LCID)collator; 668 return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen); 669 #else 670 return 0; 671 #endif 672 } 673 */ 674 675 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts, 676 UChar s1, UChar s2, 677 const UChar *s, const uint32_t sLen, 678 const UChar *t, const uint32_t tLen) { 679 UChar source[256] = {0}; 680 UChar target[256] = {0}; 681 682 source[0] = s1; 683 u_strcpy(source+1, s); 684 target[0] = s2; 685 u_strcpy(target+1, t); 686 687 return func(collator, opts, source, sLen+1, target, tLen+1); 688 } 689 690 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts, 691 UChar s1, UChar s2, 692 const UChar *s, const uint32_t sLen, 693 const UChar *t, const uint32_t tLen) { 694 UChar source[256] = {0}; 695 UChar target[256] = {0}; 696 697 u_strcpy(source, s); 698 source[sLen] = s1; 699 u_strcpy(target, t); 700 target[tLen] = s2; 701 702 return func(collator, opts, source, sLen+1, target, tLen+1); 703 } 704 705 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts, 706 const UChar *s, const uint32_t sLen, 707 const UChar *t, const uint32_t tLen, 708 UCollationResult result) { 709 /*UChar fPrimary = 0x6d;*/ 710 /*UChar sPrimary = 0x6e;*/ 711 UChar fSecondary = 0x310d; 712 UChar sSecondary = 0x31a3; 713 UChar fTertiary = 0x310f; 714 UChar sTertiary = 0x31b7; 715 716 UCollationResult oposite; 717 if(result == UCOL_EQUAL) { 718 return UCOL_IDENTICAL; 719 } else if(result == UCOL_GREATER) { 720 oposite = UCOL_LESS; 721 } else { 722 oposite = UCOL_GREATER; 723 } 724 725 if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) { 726 return UCOL_PRIMARY; 727 } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) && 728 (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) { 729 return UCOL_SECONDARY; 730 } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) && 731 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) { 732 return UCOL_TERTIARY; 733 } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) && 734 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) { 735 return UCOL_QUATERNARY; 736 } else { 737 return UCOL_IDENTICAL; 738 } 739 } 740 741 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) { 742 uint32_t i = 0; 743 744 if(res == UCOL_EQUAL || strength == 0xdeadbeef) { 745 buffer[0] = '='; 746 buffer[1] = '='; 747 buffer[2] = '\0'; 748 } else if(res == UCOL_GREATER) { 749 for(i = 0; i<strength+1; i++) { 750 buffer[i] = '>'; 751 } 752 buffer[strength+1] = '\0'; 753 } else { 754 for(i = 0; i<strength+1; i++) { 755 buffer[i] = '<'; 756 } 757 buffer[strength+1] = '\0'; 758 } 759 760 return buffer; 761 } 762 763 764 765 static void logFailure (const char *platform, const char *test, 766 const UChar *source, const uint32_t sLen, 767 const UChar *target, const uint32_t tLen, 768 UCollationResult realRes, uint32_t realStrength, 769 UCollationResult expRes, uint32_t expStrength, UBool error) { 770 771 uint32_t i = 0; 772 773 char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256]; 774 static int32_t maxOutputLength = 0; 775 int32_t outputLength; 776 777 *sEsc = *tEsc = *s = *t = 0; 778 if(error == TRUE) { 779 log_err("Difference between expected and generated order. Run test with -v for more info\n"); 780 } else if(getTestOption(VERBOSITY_OPTION) == 0) { 781 return; 782 } 783 for(i = 0; i<sLen; i++) { 784 sprintf(b, "%04X", source[i]); 785 strcat(sEsc, "\\u"); 786 strcat(sEsc, b); 787 strcat(s, b); 788 strcat(s, " "); 789 if(source[i] < 0x80) { 790 sprintf(b, "(%c)", source[i]); 791 strcat(sEsc, b); 792 } 793 } 794 for(i = 0; i<tLen; i++) { 795 sprintf(b, "%04X", target[i]); 796 strcat(tEsc, "\\u"); 797 strcat(tEsc, b); 798 strcat(t, b); 799 strcat(t, " "); 800 if(target[i] < 0x80) { 801 sprintf(b, "(%c)", target[i]); 802 strcat(tEsc, b); 803 } 804 } 805 /* 806 strcpy(output, "[[ "); 807 strcat(output, sEsc); 808 strcat(output, getRelationSymbol(expRes, expStrength, relation)); 809 strcat(output, tEsc); 810 811 strcat(output, " : "); 812 813 strcat(output, sEsc); 814 strcat(output, getRelationSymbol(realRes, realStrength, relation)); 815 strcat(output, tEsc); 816 strcat(output, " ]] "); 817 818 log_verbose("%s", output); 819 */ 820 821 822 strcpy(output, "DIFF: "); 823 824 strcat(output, s); 825 strcat(output, " : "); 826 strcat(output, t); 827 828 strcat(output, test); 829 strcat(output, ": "); 830 831 strcat(output, sEsc); 832 strcat(output, getRelationSymbol(expRes, expStrength, relation)); 833 strcat(output, tEsc); 834 835 strcat(output, " "); 836 837 strcat(output, platform); 838 strcat(output, ": "); 839 840 strcat(output, sEsc); 841 strcat(output, getRelationSymbol(realRes, realStrength, relation)); 842 strcat(output, tEsc); 843 844 outputLength = (int32_t)strlen(output); 845 if(outputLength > maxOutputLength) { 846 maxOutputLength = outputLength; 847 U_ASSERT(outputLength < sizeof(output)); 848 } 849 850 log_verbose("%s\n", output); 851 852 } 853 854 /* 855 static void printOutRules(const UChar *rules) { 856 uint32_t len = u_strlen(rules); 857 uint32_t i = 0; 858 char toPrint; 859 uint32_t line = 0; 860 861 fprintf(stdout, "Rules:"); 862 863 for(i = 0; i<len; i++) { 864 if(rules[i]<0x7f && rules[i]>=0x20) { 865 toPrint = (char)rules[i]; 866 if(toPrint == '&') { 867 line = 1; 868 fprintf(stdout, "\n&"); 869 } else if(toPrint == ';') { 870 fprintf(stdout, "<<"); 871 line+=2; 872 } else if(toPrint == ',') { 873 fprintf(stdout, "<<<"); 874 line+=3; 875 } else { 876 fprintf(stdout, "%c", toPrint); 877 line++; 878 } 879 } else if(rules[i]<0x3400 || rules[i]>=0xa000) { 880 fprintf(stdout, "\\u%04X", rules[i]); 881 line+=6; 882 } 883 if(line>72) { 884 fprintf(stdout, "\n"); 885 line = 0; 886 } 887 } 888 889 log_verbose("\n"); 890 891 } 892 */ 893 894 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) { 895 uint32_t diffs = 0; 896 UCollationResult realResult; 897 uint32_t realStrength; 898 899 uint32_t sLen = u_strlen(first); 900 uint32_t tLen = u_strlen(second); 901 902 realResult = func(collator, opts, first, sLen, second, tLen); 903 realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult); 904 905 if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) { 906 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error); 907 diffs++; 908 } else if(realResult != UCOL_LESS || realStrength != strength) { 909 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error); 910 diffs++; 911 } 912 return diffs; 913 } 914 915 916 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) { 917 const UChar *rules = NULL, *current = NULL; 918 int32_t ruleLen = 0; 919 uint32_t strength = 0; 920 uint32_t chOffset = 0; uint32_t chLen = 0; 921 uint32_t exOffset = 0; uint32_t exLen = 0; 922 uint32_t prefixOffset = 0; uint32_t prefixLen = 0; 923 /* uint32_t rExpsLen = 0; */ 924 uint32_t firstLen = 0, secondLen = 0; 925 UBool varT = FALSE; UBool top_ = TRUE; 926 uint16_t specs = 0; 927 UBool startOfRules = TRUE; 928 UColTokenParser src; 929 UColOptionSet opts; 930 931 UChar first[256]; 932 UChar second[256]; 933 UChar *rulesCopy = NULL; 934 935 uint32_t UCAdiff = 0; 936 uint32_t Windiff = 1; 937 UParseError parseError; 938 939 uprv_memset(&src, 0, sizeof(UColTokenParser)); 940 src.opts = &opts; 941 942 rules = ucol_getRules(coll, &ruleLen); 943 944 /*printOutRules(rules);*/ 945 946 if(U_SUCCESS(*status) && ruleLen > 0) { 947 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); 948 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); 949 src.current = src.source = rulesCopy; 950 src.end = rulesCopy+ruleLen; 951 src.extraCurrent = src.end; 952 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 953 *first = *second = 0; 954 955 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to 956 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ 957 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) { 958 strength = src.parsedToken.strength; 959 chOffset = src.parsedToken.charsOffset; 960 chLen = src.parsedToken.charsLen; 961 exOffset = src.parsedToken.extensionOffset; 962 exLen = src.parsedToken.extensionLen; 963 prefixOffset = src.parsedToken.prefixOffset; 964 prefixLen = src.parsedToken.prefixLen; 965 specs = src.parsedToken.flags; 966 967 startOfRules = FALSE; 968 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); 969 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); 970 971 u_strncpy(second,src.source+chOffset, chLen); 972 second[chLen] = 0; 973 secondLen = chLen; 974 975 if(exLen > 0) { 976 u_strncat(first, src.source+exOffset, exLen); 977 first[firstLen+exLen] = 0; 978 firstLen += exLen; 979 } 980 981 if(strength != UCOL_TOK_RESET) { 982 if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) { 983 UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error); 984 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/ 985 } 986 } 987 988 989 firstLen = chLen; 990 u_strcpy(first, second); 991 992 } 993 if(UCAdiff != 0 && Windiff != 0) { 994 log_verbose("\n"); 995 } 996 if(UCAdiff == 0) { 997 log_verbose("No immediate difference with %s!\n", refName); 998 } 999 if(Windiff == 0) { 1000 log_verbose("No immediate difference with Win32!\n"); 1001 } 1002 uprv_free(src.source); 1003 } 1004 } 1005 1006 /* 1007 * Takes two CEs (lead and continuation) and 1008 * compares them as CEs should be compared: 1009 * primary vs. primary, secondary vs. secondary 1010 * tertiary vs. tertiary 1011 */ 1012 static int32_t compareCEs(uint32_t s1, uint32_t s2, 1013 uint32_t t1, uint32_t t2) { 1014 uint32_t s = 0, t = 0; 1015 if(s1 == t1 && s2 == t2) { 1016 return 0; 1017 } 1018 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16); 1019 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16); 1020 if(s < t) { 1021 return -1; 1022 } else if(s > t) { 1023 return 1; 1024 } else { 1025 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8; 1026 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8; 1027 if(s < t) { 1028 return -1; 1029 } else if(s > t) { 1030 return 1; 1031 } else { 1032 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF); 1033 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF); 1034 if(s < t) { 1035 return -1; 1036 } else { 1037 return 1; 1038 } 1039 } 1040 } 1041 } 1042 1043 typedef struct { 1044 uint32_t startCE; 1045 uint32_t startContCE; 1046 uint32_t limitCE; 1047 uint32_t limitContCE; 1048 } indirectBoundaries; 1049 1050 /* these values are used for finding CE values for indirect positioning. */ 1051 /* Indirect positioning is a mechanism for allowing resets on symbolic */ 1052 /* values. It only works for resets and you cannot tailor indirect names */ 1053 /* An indirect name can define either an anchor point or a range. An */ 1054 /* anchor point behaves in exactly the same way as a code point in reset */ 1055 /* would, except that it cannot be tailored. A range (we currently only */ 1056 /* know for the [top] range will explicitly set the upper bound for */ 1057 /* generated CEs, thus allowing for better control over how many CEs can */ 1058 /* be squeezed between in the range without performance penalty. */ 1059 /* In that respect, we use [top] for tailoring of locales that use CJK */ 1060 /* characters. Other indirect values are currently a pure convenience, */ 1061 /* they can be used to assure that the CEs will be always positioned in */ 1062 /* the same place relative to a point with known properties (e.g. first */ 1063 /* primary ignorable). */ 1064 static indirectBoundaries ucolIndirectBoundaries[15]; 1065 static UBool indirectBoundariesSet = FALSE; 1066 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) { 1067 /* Set values for the top - TODO: once we have values for all the indirects, we are going */ 1068 /* to initalize here. */ 1069 ucolIndirectBoundaries[indexR].startCE = start[0]; 1070 ucolIndirectBoundaries[indexR].startContCE = start[1]; 1071 if(end) { 1072 ucolIndirectBoundaries[indexR].limitCE = end[0]; 1073 ucolIndirectBoundaries[indexR].limitContCE = end[1]; 1074 } else { 1075 ucolIndirectBoundaries[indexR].limitCE = 0; 1076 ucolIndirectBoundaries[indexR].limitContCE = 0; 1077 } 1078 } 1079 1080 static void testCEs(UCollator *coll, UErrorCode *status) { 1081 const UChar *rules = NULL, *current = NULL; 1082 int32_t ruleLen = 0; 1083 1084 uint32_t strength = 0; 1085 uint32_t maxStrength = UCOL_IDENTICAL; 1086 uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE; 1087 uint32_t lastCE; 1088 uint32_t lastContCE; 1089 1090 int32_t result = 0; 1091 uint32_t chOffset = 0; uint32_t chLen = 0; 1092 uint32_t exOffset = 0; uint32_t exLen = 0; 1093 uint32_t prefixOffset = 0; uint32_t prefixLen = 0; 1094 uint32_t oldOffset = 0; 1095 1096 /* uint32_t rExpsLen = 0; */ 1097 /* uint32_t firstLen = 0; */ 1098 uint16_t specs = 0; 1099 UBool varT = FALSE; UBool top_ = TRUE; 1100 UBool startOfRules = TRUE; 1101 UBool before = FALSE; 1102 UColTokenParser src; 1103 UColOptionSet opts; 1104 UParseError parseError; 1105 UChar *rulesCopy = NULL; 1106 collIterate *c = uprv_new_collIterate(status); 1107 UCAConstants *consts = NULL; 1108 uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */ 1109 UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT; 1110 const char *colLoc; 1111 UCollator *UCA = ucol_open("root", status); 1112 1113 if (U_FAILURE(*status)) { 1114 log_err("Could not open root collator %s\n", u_errorName(*status)); 1115 uprv_delete_collIterate(c); 1116 return; 1117 } 1118 1119 colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status); 1120 if (U_FAILURE(*status)) { 1121 log_err("Could not get collator name: %s\n", u_errorName(*status)); 1122 ucol_close(UCA); 1123 uprv_delete_collIterate(c); 1124 return; 1125 } 1126 1127 uprv_memset(&src, 0, sizeof(UColTokenParser)); 1128 1129 consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts); 1130 UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0]; 1131 /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */ 1132 UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0]; 1133 UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1]; 1134 1135 baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND; 1136 1137 src.opts = &opts; 1138 1139 rules = ucol_getRules(coll, &ruleLen); 1140 1141 src.invUCA = ucol_initInverseUCA(status); 1142 1143 if(indirectBoundariesSet == FALSE) { 1144 /* UCOL_RESET_TOP_VALUE */ 1145 setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT); 1146 /* UCOL_FIRST_PRIMARY_IGNORABLE */ 1147 setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0); 1148 /* UCOL_LAST_PRIMARY_IGNORABLE */ 1149 setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0); 1150 /* UCOL_FIRST_SECONDARY_IGNORABLE */ 1151 setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0); 1152 /* UCOL_LAST_SECONDARY_IGNORABLE */ 1153 setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0); 1154 /* UCOL_FIRST_TERTIARY_IGNORABLE */ 1155 setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0); 1156 /* UCOL_LAST_TERTIARY_IGNORABLE */ 1157 setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0); 1158 /* UCOL_FIRST_VARIABLE */ 1159 setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0); 1160 /* UCOL_LAST_VARIABLE */ 1161 setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0); 1162 /* UCOL_FIRST_NON_VARIABLE */ 1163 setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0); 1164 /* UCOL_LAST_NON_VARIABLE */ 1165 setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT); 1166 /* UCOL_FIRST_IMPLICIT */ 1167 setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0); 1168 /* UCOL_LAST_IMPLICIT */ 1169 setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING); 1170 /* UCOL_FIRST_TRAILING */ 1171 setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0); 1172 /* UCOL_LAST_TRAILING */ 1173 setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0); 1174 ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24); 1175 indirectBoundariesSet = TRUE; 1176 } 1177 1178 1179 if(U_SUCCESS(*status) && ruleLen > 0) { 1180 rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); 1181 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar)); 1182 src.current = src.source = rulesCopy; 1183 src.end = rulesCopy+ruleLen; 1184 src.extraCurrent = src.end; 1185 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 1186 1187 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to 1188 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ 1189 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) { 1190 strength = src.parsedToken.strength; 1191 chOffset = src.parsedToken.charsOffset; 1192 chLen = src.parsedToken.charsLen; 1193 exOffset = src.parsedToken.extensionOffset; 1194 exLen = src.parsedToken.extensionLen; 1195 prefixOffset = src.parsedToken.prefixOffset; 1196 prefixLen = src.parsedToken.prefixLen; 1197 specs = src.parsedToken.flags; 1198 1199 startOfRules = FALSE; 1200 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0); 1201 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0); 1202 1203 uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status); 1204 1205 currCE = ucol_getNextCE(coll, c, status); 1206 if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) { 1207 log_verbose("Thai prevowel detected. Will pick next CE\n"); 1208 currCE = ucol_getNextCE(coll, c, status); 1209 } 1210 1211 currContCE = ucol_getNextCE(coll, c, status); 1212 if(!isContinuation(currContCE)) { 1213 currContCE = 0; 1214 } 1215 1216 /* we need to repack CEs here */ 1217 1218 if(strength == UCOL_TOK_RESET) { 1219 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0); 1220 if(top_ == TRUE) { 1221 int32_t tokenIndex = src.parsedToken.indirectIndex; 1222 1223 nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE; 1224 nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE; 1225 } else { 1226 nextCE = baseCE = currCE; 1227 nextContCE = baseContCE = currContCE; 1228 } 1229 maxStrength = UCOL_IDENTICAL; 1230 } else { 1231 if(strength < maxStrength) { 1232 maxStrength = strength; 1233 if(baseCE == UCOL_RESET_TOP_VALUE) { 1234 log_verbose("Resetting to [top]\n"); 1235 nextCE = UCOL_NEXT_TOP_VALUE; 1236 nextContCE = UCOL_NEXT_TOP_CONT; 1237 } else { 1238 result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength); 1239 } 1240 if(result < 0) { 1241 if(ucol_isTailored(coll, *(src.source+oldOffset), status)) { 1242 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset)); 1243 return; 1244 } else { 1245 log_err("%s: couldn't find the CE\n", colLoc); 1246 return; 1247 } 1248 } 1249 } 1250 1251 currCE &= 0xFFFFFF3F; 1252 currContCE &= 0xFFFFFFBF; 1253 1254 if(maxStrength == UCOL_IDENTICAL) { 1255 if(baseCE != currCE || baseContCE != currContCE) { 1256 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc); 1257 } 1258 } else { 1259 if(strength == UCOL_IDENTICAL) { 1260 if(lastCE != currCE || lastContCE != currContCE) { 1261 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc); 1262 } 1263 } else { 1264 if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) { 1265 /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/ 1266 log_err("%s: current CE is not less than base CE\n", colLoc); 1267 } 1268 if(!before) { 1269 if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) { 1270 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/ 1271 log_err("%s: sequence of generated CEs is broken\n", colLoc); 1272 } 1273 } else { 1274 before = FALSE; 1275 if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) { 1276 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/ 1277 log_err("%s: sequence of generated CEs is broken\n", colLoc); 1278 } 1279 } 1280 } 1281 } 1282 1283 } 1284 1285 oldOffset = chOffset; 1286 lastCE = currCE & 0xFFFFFF3F; 1287 lastContCE = currContCE & 0xFFFFFFBF; 1288 } 1289 uprv_free(src.source); 1290 } 1291 ucol_close(UCA); 1292 uprv_delete_collIterate(c); 1293 } 1294 1295 #if 0 1296 /* these locales are now picked from index RB */ 1297 static const char* localesToTest[] = { 1298 "ar", "bg", "ca", "cs", "da", 1299 "el", "en_BE", "en_US_POSIX", 1300 "es", "et", "fi", "fr", "hi", 1301 "hr", "hu", "is", "iw", "ja", 1302 "ko", "lt", "lv", "mk", "mt", 1303 "nb", "nn", "nn_NO", "pl", "ro", 1304 "ru", "sh", "sk", "sl", "sq", 1305 "sr", "sv", "th", "tr", "uk", 1306 "vi", "zh", "zh_TW" 1307 }; 1308 #endif 1309 1310 static const char* rulesToTest[] = { 1311 /* Funky fa rule */ 1312 "&\\u0622 < \\u0627 << \\u0671 < \\u0621", 1313 /*"& Z < p, P",*/ 1314 /* Cui Mins rules */ 1315 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/ 1316 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/ 1317 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/ 1318 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/ 1319 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/ 1320 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/ 1321 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/ 1322 }; 1323 1324 1325 static void TestCollations(void) { 1326 int32_t noOfLoc = uloc_countAvailable(); 1327 int32_t i = 0, j = 0; 1328 1329 UErrorCode status = U_ZERO_ERROR; 1330 char cName[256]; 1331 UChar name[256]; 1332 int32_t nameSize; 1333 1334 1335 const char *locName = NULL; 1336 UCollator *coll = NULL; 1337 UCollator *UCA = ucol_open("", &status); 1338 UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status); 1339 if (U_FAILURE(status)) { 1340 log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status)); 1341 return; 1342 } 1343 ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status); 1344 1345 for(i = 0; i<noOfLoc; i++) { 1346 status = U_ZERO_ERROR; 1347 locName = uloc_getAvailable(i); 1348 if(uprv_strcmp("ja", locName) == 0) { 1349 log_verbose("Don't know how to test prefixes\n"); 1350 continue; 1351 } 1352 if(hasCollationElements(locName)) { 1353 nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status); 1354 for(j = 0; j<nameSize; j++) { 1355 cName[j] = (char)name[j]; 1356 } 1357 cName[nameSize] = 0; 1358 log_verbose("\nTesting locale %s (%s)\n", locName, cName); 1359 coll = ucol_open(locName, &status); 1360 if(U_SUCCESS(status)) { 1361 testAgainstUCA(coll, UCA, "UCA", FALSE, &status); 1362 ucol_close(coll); 1363 } else { 1364 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status)); 1365 status = U_ZERO_ERROR; 1366 } 1367 } 1368 } 1369 ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status); 1370 ucol_close(UCA); 1371 } 1372 1373 static void RamsRulesTest(void) { 1374 UErrorCode status = U_ZERO_ERROR; 1375 int32_t i = 0; 1376 UCollator *coll = NULL; 1377 UChar rule[2048]; 1378 uint32_t ruleLen; 1379 int32_t noOfLoc = uloc_countAvailable(); 1380 const char *locName = NULL; 1381 1382 log_verbose("RamsRulesTest\n"); 1383 1384 if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) { 1385 /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */ 1386 return; 1387 } 1388 1389 for(i = 0; i<noOfLoc; i++) { 1390 locName = uloc_getAvailable(i); 1391 if(hasCollationElements(locName)) { 1392 if (uprv_strcmp("ja", locName)==0) { 1393 log_verbose("Don't know how to test Japanese because of prefixes\n"); 1394 continue; 1395 } 1396 if (uprv_strcmp("de__PHONEBOOK", locName)==0) { 1397 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n"); 1398 continue; 1399 } 1400 if (uprv_strcmp("bn", locName)==0 || 1401 uprv_strcmp("en_US_POSIX", locName)==0 || 1402 uprv_strcmp("km", locName)==0 || 1403 uprv_strcmp("km_KH", locName)==0 || 1404 uprv_strcmp("my", locName)==0 || 1405 uprv_strcmp("si", locName)==0 || 1406 uprv_strcmp("si_LK", locName)==0 || 1407 uprv_strcmp("th", locName)==0 || 1408 uprv_strcmp("th_TH", locName)==0 || 1409 uprv_strcmp("zh", locName)==0 || 1410 uprv_strcmp("zh_Hant", locName)==0 1411 ) { 1412 log_verbose("Don't know how to test %s. " 1413 "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName); 1414 continue; 1415 } 1416 log_verbose("Testing locale %s\n", locName); 1417 status = U_ZERO_ERROR; 1418 coll = ucol_open(locName, &status); 1419 if(U_SUCCESS(status)) { 1420 if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) { 1421 if(coll->image->jamoSpecial == TRUE) { 1422 log_err("%s has special JAMOs\n", locName); 1423 } 1424 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status); 1425 testCollator(coll, &status); 1426 testCEs(coll, &status); 1427 } else { 1428 log_verbose("Skipping %s: %s\n", locName, u_errorName(status)); 1429 } 1430 ucol_close(coll); 1431 } else { 1432 log_err("Could not open %s: %s\n", locName, u_errorName(status)); 1433 } 1434 } 1435 } 1436 1437 for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) { 1438 log_verbose("Testing rule: %s\n", rulesToTest[i]); 1439 ruleLen = u_unescape(rulesToTest[i], rule, 2048); 1440 status = U_ZERO_ERROR; 1441 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 1442 if(U_SUCCESS(status)) { 1443 testCollator(coll, &status); 1444 testCEs(coll, &status); 1445 ucol_close(coll); 1446 } else { 1447 log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]); 1448 } 1449 } 1450 1451 } 1452 1453 static void IsTailoredTest(void) { 1454 UErrorCode status = U_ZERO_ERROR; 1455 uint32_t i = 0; 1456 UCollator *coll = NULL; 1457 UChar rule[2048]; 1458 UChar tailored[2048]; 1459 UChar notTailored[2048]; 1460 uint32_t ruleLen, tailoredLen, notTailoredLen; 1461 1462 log_verbose("IsTailoredTest\n"); 1463 1464 u_uastrcpy(rule, "&Z < A, B, C;c < d"); 1465 ruleLen = u_strlen(rule); 1466 1467 u_uastrcpy(tailored, "ABCcd"); 1468 tailoredLen = u_strlen(tailored); 1469 1470 u_uastrcpy(notTailored, "ZabD"); 1471 notTailoredLen = u_strlen(notTailored); 1472 1473 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 1474 if(U_SUCCESS(status)) { 1475 for(i = 0; i<tailoredLen; i++) { 1476 if(!ucol_isTailored(coll, tailored[i], &status)) { 1477 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]); 1478 } 1479 } 1480 for(i = 0; i<notTailoredLen; i++) { 1481 if(ucol_isTailored(coll, notTailored[i], &status)) { 1482 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]); 1483 } 1484 } 1485 ucol_close(coll); 1486 } 1487 else { 1488 log_err_status(status, "Can't tailor rules\n"); 1489 } 1490 /* Code coverage */ 1491 status = U_ZERO_ERROR; 1492 coll = ucol_open("ja", &status); 1493 if(!ucol_isTailored(coll, 0x4E9C, &status)) { 1494 log_err_status(status, "0x4E9C should be tailored - it is reported as not\n"); 1495 } 1496 ucol_close(coll); 1497 } 1498 1499 1500 const static char chTest[][20] = { 1501 "c", 1502 "C", 1503 "ca", "cb", "cx", "cy", "CZ", 1504 "c\\u030C", "C\\u030C", 1505 "h", 1506 "H", 1507 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY", 1508 "ch", "cH", "Ch", "CH", 1509 "cha", "charly", "che", "chh", "chch", "chr", 1510 "i", "I", "iarly", 1511 "r", "R", 1512 "r\\u030C", "R\\u030C", 1513 "s", 1514 "S", 1515 "s\\u030C", "S\\u030C", 1516 "z", "Z", 1517 "z\\u030C", "Z\\u030C" 1518 }; 1519 1520 static void TestChMove(void) { 1521 UChar t1[256] = {0}; 1522 UChar t2[256] = {0}; 1523 1524 uint32_t i = 0, j = 0; 1525 uint32_t size = 0; 1526 UErrorCode status = U_ZERO_ERROR; 1527 1528 UCollator *coll = ucol_open("cs", &status); 1529 1530 if(U_SUCCESS(status)) { 1531 size = sizeof(chTest)/sizeof(chTest[0]); 1532 for(i = 0; i < size-1; i++) { 1533 for(j = i+1; j < size; j++) { 1534 u_unescape(chTest[i], t1, 256); 1535 u_unescape(chTest[j], t2, 256); 1536 doTest(coll, t1, t2, UCOL_LESS); 1537 } 1538 } 1539 } 1540 else { 1541 log_data_err("Can't open collator"); 1542 } 1543 ucol_close(coll); 1544 } 1545 1546 1547 1548 1549 const static char impTest[][20] = { 1550 "\\u4e00", 1551 "a", 1552 "A", 1553 "b", 1554 "B", 1555 "\\u4e01" 1556 }; 1557 1558 1559 static void TestImplicitTailoring(void) { 1560 static const struct { 1561 const char *rules; 1562 const char *data[10]; 1563 const uint32_t len; 1564 } tests[] = { 1565 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 }, 1566 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 }, 1567 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3}, 1568 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3} 1569 }; 1570 1571 int32_t i = 0; 1572 1573 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { 1574 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 1575 } 1576 1577 /* 1578 UChar t1[256] = {0}; 1579 UChar t2[256] = {0}; 1580 1581 const char *rule = "&\\u4e00 < a <<< A < b <<< B"; 1582 1583 uint32_t i = 0, j = 0; 1584 uint32_t size = 0; 1585 uint32_t ruleLen = 0; 1586 UErrorCode status = U_ZERO_ERROR; 1587 UCollator *coll = NULL; 1588 ruleLen = u_unescape(rule, t1, 256); 1589 1590 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status); 1591 1592 if(U_SUCCESS(status)) { 1593 size = sizeof(impTest)/sizeof(impTest[0]); 1594 for(i = 0; i < size-1; i++) { 1595 for(j = i+1; j < size; j++) { 1596 u_unescape(impTest[i], t1, 256); 1597 u_unescape(impTest[j], t2, 256); 1598 doTest(coll, t1, t2, UCOL_LESS); 1599 } 1600 } 1601 } 1602 else { 1603 log_err("Can't open collator"); 1604 } 1605 ucol_close(coll); 1606 */ 1607 } 1608 1609 static void TestFCDProblem(void) { 1610 UChar t1[256] = {0}; 1611 UChar t2[256] = {0}; 1612 1613 const char *s1 = "\\u0430\\u0306\\u0325"; 1614 const char *s2 = "\\u04D1\\u0325"; 1615 1616 UErrorCode status = U_ZERO_ERROR; 1617 UCollator *coll = ucol_open("", &status); 1618 u_unescape(s1, t1, 256); 1619 u_unescape(s2, t2, 256); 1620 1621 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status); 1622 doTest(coll, t1, t2, UCOL_EQUAL); 1623 1624 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 1625 doTest(coll, t1, t2, UCOL_EQUAL); 1626 1627 ucol_close(coll); 1628 } 1629 1630 /* 1631 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC 1632 We're only using NFC/NFD in this test. 1633 */ 1634 #define NORM_BUFFER_TEST_LEN 18 1635 typedef struct { 1636 UChar32 u; 1637 UChar NFC[NORM_BUFFER_TEST_LEN]; 1638 UChar NFD[NORM_BUFFER_TEST_LEN]; 1639 } tester; 1640 1641 static void TestComposeDecompose(void) { 1642 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */ 1643 static const UChar UNICODESET_STR[] = { 1644 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61, 1645 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72, 1646 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0 1647 }; 1648 int32_t noOfLoc; 1649 int32_t i = 0, j = 0; 1650 1651 UErrorCode status = U_ZERO_ERROR; 1652 const char *locName = NULL; 1653 uint32_t nfcSize; 1654 uint32_t nfdSize; 1655 tester **t; 1656 uint32_t noCases = 0; 1657 UCollator *coll = NULL; 1658 UChar32 u = 0; 1659 UChar comp[NORM_BUFFER_TEST_LEN]; 1660 uint32_t len = 0; 1661 UCollationElements *iter; 1662 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status); 1663 int32_t charsToTestSize; 1664 1665 noOfLoc = uloc_countAvailable(); 1666 1667 coll = ucol_open("", &status); 1668 if (U_FAILURE(status)) { 1669 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status)); 1670 return; 1671 } 1672 charsToTestSize = uset_size(charsToTest); 1673 if (charsToTestSize <= 0) { 1674 log_err("Set was zero. Missing data?\n"); 1675 return; 1676 } 1677 t = (tester **)malloc(charsToTestSize * sizeof(tester *)); 1678 t[0] = (tester *)malloc(sizeof(tester)); 1679 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize); 1680 1681 for(u = 0; u < charsToTestSize; u++) { 1682 UChar32 ch = uset_charAt(charsToTest, u); 1683 len = 0; 1684 U16_APPEND_UNSAFE(comp, len, ch); 1685 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status); 1686 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status); 1687 1688 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0) 1689 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) { 1690 t[noCases]->u = ch; 1691 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) { 1692 u_strncpy(t[noCases]->NFC, comp, len); 1693 t[noCases]->NFC[len] = 0; 1694 } 1695 noCases++; 1696 t[noCases] = (tester *)malloc(sizeof(tester)); 1697 uprv_memset(t[noCases], 0, sizeof(tester)); 1698 } 1699 } 1700 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize); 1701 uset_close(charsToTest); 1702 charsToTest = NULL; 1703 1704 for(u=0; u<(UChar32)noCases; u++) { 1705 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) { 1706 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u); 1707 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL); 1708 } 1709 } 1710 /* 1711 for(u = 0; u < charsToTestSize; u++) { 1712 if(!(u&0xFFFF)) { 1713 log_verbose("%08X ", u); 1714 } 1715 uprv_memset(t[noCases], 0, sizeof(tester)); 1716 t[noCases]->u = u; 1717 len = 0; 1718 U16_APPEND_UNSAFE(comp, len, u); 1719 comp[len] = 0; 1720 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status); 1721 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status); 1722 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL); 1723 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL); 1724 } 1725 */ 1726 1727 ucol_close(coll); 1728 1729 log_verbose("Testing locales, number of cases = %i\n", noCases); 1730 for(i = 0; i<noOfLoc; i++) { 1731 status = U_ZERO_ERROR; 1732 locName = uloc_getAvailable(i); 1733 if(hasCollationElements(locName)) { 1734 char cName[256]; 1735 UChar name[256]; 1736 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status); 1737 1738 for(j = 0; j<nameSize; j++) { 1739 cName[j] = (char)name[j]; 1740 } 1741 cName[nameSize] = 0; 1742 log_verbose("\nTesting locale %s (%s)\n", locName, cName); 1743 1744 coll = ucol_open(locName, &status); 1745 ucol_setStrength(coll, UCOL_IDENTICAL); 1746 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status); 1747 1748 for(u=0; u<(UChar32)noCases; u++) { 1749 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) { 1750 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName); 1751 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL); 1752 log_verbose("Testing NFC\n"); 1753 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status); 1754 backAndForth(iter); 1755 log_verbose("Testing NFD\n"); 1756 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status); 1757 backAndForth(iter); 1758 } 1759 } 1760 ucol_closeElements(iter); 1761 ucol_close(coll); 1762 } 1763 } 1764 for(u = 0; u <= (UChar32)noCases; u++) { 1765 free(t[u]); 1766 } 1767 free(t); 1768 } 1769 1770 static void TestEmptyRule(void) { 1771 UErrorCode status = U_ZERO_ERROR; 1772 UChar rulez[] = { 0 }; 1773 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status); 1774 1775 ucol_close(coll); 1776 } 1777 1778 static void TestUCARules(void) { 1779 UErrorCode status = U_ZERO_ERROR; 1780 UChar b[256]; 1781 UChar *rules = b; 1782 uint32_t ruleLen = 0; 1783 UCollator *UCAfromRules = NULL; 1784 UCollator *coll = ucol_open("", &status); 1785 if(status == U_FILE_ACCESS_ERROR) { 1786 log_data_err("Is your data around?\n"); 1787 return; 1788 } else if(U_FAILURE(status)) { 1789 log_err("Error opening collator\n"); 1790 return; 1791 } 1792 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256); 1793 1794 log_verbose("TestUCARules\n"); 1795 if(ruleLen > 256) { 1796 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar)); 1797 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen); 1798 } 1799 log_verbose("Rules length is %d\n", ruleLen); 1800 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 1801 if(U_SUCCESS(status)) { 1802 ucol_close(UCAfromRules); 1803 } else { 1804 log_verbose("Unable to create a collator from UCARules!\n"); 1805 } 1806 /* 1807 u_unescape(blah, b, 256); 1808 ucol_getSortKey(coll, b, 1, res, 256); 1809 */ 1810 ucol_close(coll); 1811 if(rules != b) { 1812 free(rules); 1813 } 1814 } 1815 1816 1817 /* Pinyin tonal order */ 1818 /* 1819 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0) 1820 (w/macron)< (w/acute)< (w/caron)< (w/grave) 1821 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8) 1822 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec) 1823 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2) 1824 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9) 1825 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) < 1826 .. (\u00fc) 1827 1828 However, in testing we got the following order: 1829 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101) 1830 (w/acute)< (w/grave)< (w/caron)< (w/macron) 1831 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) < 1832 .. (\u0113) 1833 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b) 1834 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d) 1835 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) < 1836 .. (\u01d8) 1837 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b) 1838 */ 1839 1840 static void TestBefore(void) { 1841 const static char *data[] = { 1842 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A", 1843 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E", 1844 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I", 1845 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O", 1846 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U", 1847 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc" 1848 }; 1849 genericRulesStarter( 1850 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0" 1851 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8" 1852 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec" 1853 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2" 1854 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9" 1855 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc", 1856 data, sizeof(data)/sizeof(data[0])); 1857 } 1858 1859 #if 0 1860 /* superceded by TestBeforePinyin */ 1861 static void TestJ784(void) { 1862 const static char *data[] = { 1863 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", 1864 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", 1865 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", 1866 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", 1867 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", 1868 "\\u00fc", 1869 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc" 1870 }; 1871 genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0])); 1872 } 1873 #endif 1874 1875 #if 0 1876 /* superceded by the changes to the lv locale */ 1877 static void TestJ831(void) { 1878 const static char *data[] = { 1879 "I", 1880 "i", 1881 "Y", 1882 "y" 1883 }; 1884 genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0])); 1885 } 1886 #endif 1887 1888 static void TestJ815(void) { 1889 const static char *data[] = { 1890 "aa", 1891 "Aa", 1892 "ab", 1893 "Ab", 1894 "ad", 1895 "Ad", 1896 "ae", 1897 "Ae", 1898 "\\u00e6", 1899 "\\u00c6", 1900 "af", 1901 "Af", 1902 "b", 1903 "B" 1904 }; 1905 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0])); 1906 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0])); 1907 } 1908 1909 1910 /* 1911 "& a < b < c < d& r < c", "& a < b < d& r < c", 1912 "& a < b < c < d& c < m", "& a < b < c < m < d", 1913 "& a < b < c < d& a < m", "& a < m < b < c < d", 1914 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d", 1915 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d", 1916 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e", 1917 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e", 1918 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e", 1919 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g", 1920 */ 1921 static void TestRedundantRules(void) { 1922 int32_t i; 1923 1924 static const struct { 1925 const char *rules; 1926 const char *expectedRules; 1927 const char *testdata[8]; 1928 uint32_t testdatalen; 1929 } tests[] = { 1930 /* this test conflicts with positioning of CODAN placeholder */ 1931 /*{ 1932 "& a <<< b <<< c << d <<< e& [before 1] e <<< x", 1933 "&\\u2089<<<x", 1934 {"\\u2089", "x"}, 2 1935 }, */ 1936 /* this test conflicts with the [before x] syntax tightening */ 1937 /*{ 1938 "& b <<< c <<< d << e <<< f& [before 1] f <<< x", 1939 "&\\u0252<<<x", 1940 {"\\u0252", "x"}, 2 1941 }, */ 1942 /* this test conflicts with the [before x] syntax tightening */ 1943 /*{ 1944 "& a < b <<< c << d <<< e& [before 1] e <<< x", 1945 "& a <<< x < b <<< c << d <<< e", 1946 {"a", "x", "b", "c", "d", "e"}, 6 1947 }, */ 1948 { 1949 "& a < b < c < d& [before 1] c < m", 1950 "& a < b < m < c < d", 1951 {"a", "b", "m", "c", "d"}, 5 1952 }, 1953 { 1954 "& a < b <<< c << d <<< e& [before 3] e <<< x", 1955 "& a < b <<< c << d <<< x <<< e", 1956 {"a", "b", "c", "d", "x", "e"}, 6 1957 }, 1958 /* this test conflicts with the [before x] syntax tightening */ 1959 /* { 1960 "& a < b <<< c << d <<< e& [before 2] e <<< x", 1961 "& a < b <<< c <<< x << d <<< e", 1962 {"a", "b", "c", "x", "d", "e"},, 6 1963 }, */ 1964 { 1965 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", 1966 "& a < b <<< c << d <<< e <<< f < x < g", 1967 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8 1968 }, 1969 { 1970 "& a <<< b << c < d& a < m", 1971 "& a <<< b << c < m < d", 1972 {"a", "b", "c", "m", "d"}, 5 1973 }, 1974 { 1975 "&a<b<<b\\u0301 &z<b", 1976 "&a<b\\u0301 &z<b", 1977 {"a", "b\\u0301", "z", "b"}, 4 1978 }, 1979 { 1980 "&z<m<<<q<<<m", 1981 "&z<q<<<m", 1982 {"z", "q", "m"},3 1983 }, 1984 { 1985 "&z<<<m<q<<<m", 1986 "&z<q<<<m", 1987 {"z", "q", "m"}, 3 1988 }, 1989 { 1990 "& a < b < c < d& r < c", 1991 "& a < b < d& r < c", 1992 {"a", "b", "d"}, 3 1993 }, 1994 { 1995 "& a < b < c < d& r < c", 1996 "& a < b < d& r < c", 1997 {"r", "c"}, 2 1998 }, 1999 { 2000 "& a < b < c < d& c < m", 2001 "& a < b < c < m < d", 2002 {"a", "b", "c", "m", "d"}, 5 2003 }, 2004 { 2005 "& a < b < c < d& a < m", 2006 "& a < m < b < c < d", 2007 {"a", "m", "b", "c", "d"}, 5 2008 } 2009 }; 2010 2011 2012 UCollator *credundant = NULL; 2013 UCollator *cresulting = NULL; 2014 UErrorCode status = U_ZERO_ERROR; 2015 UChar rlz[2048] = { 0 }; 2016 uint32_t rlen = 0; 2017 2018 for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) { 2019 log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules); 2020 rlen = u_unescape(tests[i].rules, rlz, 2048); 2021 2022 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status); 2023 if(status == U_FILE_ACCESS_ERROR) { 2024 log_data_err("Is your data around?\n"); 2025 return; 2026 } else if(U_FAILURE(status)) { 2027 log_err("Error opening collator\n"); 2028 return; 2029 } 2030 2031 rlen = u_unescape(tests[i].expectedRules, rlz, 2048); 2032 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status); 2033 2034 testAgainstUCA(cresulting, credundant, "expected", TRUE, &status); 2035 2036 ucol_close(credundant); 2037 ucol_close(cresulting); 2038 2039 log_verbose("testing using data\n"); 2040 2041 genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen); 2042 } 2043 2044 } 2045 2046 static void TestExpansionSyntax(void) { 2047 int32_t i; 2048 2049 const static char *rules[] = { 2050 "&AE <<< a << b <<< c &d <<< f", 2051 "&AE <<< a <<< b << c << d < e < f <<< g", 2052 "&AE <<< B <<< C / D <<< F" 2053 }; 2054 2055 const static char *expectedRules[] = { 2056 "&A <<< a / E << b / E <<< c /E &d <<< f", 2057 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g", 2058 "&A <<< B / E <<< C / ED <<< F / E" 2059 }; 2060 2061 const static char *testdata[][8] = { 2062 {"AE", "a", "b", "c"}, 2063 {"AE", "a", "b", "c", "d", "e", "f", "g"}, 2064 {"AE", "B", "C"} /* / ED <<< F / E"},*/ 2065 }; 2066 2067 const static uint32_t testdatalen[] = { 2068 4, 2069 8, 2070 3 2071 }; 2072 2073 2074 2075 UCollator *credundant = NULL; 2076 UCollator *cresulting = NULL; 2077 UErrorCode status = U_ZERO_ERROR; 2078 UChar rlz[2048] = { 0 }; 2079 uint32_t rlen = 0; 2080 2081 for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) { 2082 log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]); 2083 rlen = u_unescape(rules[i], rlz, 2048); 2084 2085 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 2086 if(status == U_FILE_ACCESS_ERROR) { 2087 log_data_err("Is your data around?\n"); 2088 return; 2089 } else if(U_FAILURE(status)) { 2090 log_err("Error opening collator\n"); 2091 return; 2092 } 2093 rlen = u_unescape(expectedRules[i], rlz, 2048); 2094 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status); 2095 2096 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */ 2097 /* as a hard error test, but only in information mode */ 2098 testAgainstUCA(cresulting, credundant, "expected", FALSE, &status); 2099 2100 ucol_close(credundant); 2101 ucol_close(cresulting); 2102 2103 log_verbose("testing using data\n"); 2104 2105 genericRulesStarter(rules[i], testdata[i], testdatalen[i]); 2106 } 2107 } 2108 2109 static void TestCase(void) 2110 { 2111 const static UChar gRules[MAX_TOKEN_LEN] = 2112 /*" & 0 < 1,\u2461<a,A"*/ 2113 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 }; 2114 2115 const static UChar testCase[][MAX_TOKEN_LEN] = 2116 { 2117 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000}, 2118 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000}, 2119 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000}, 2120 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000} 2121 }; 2122 2123 const static UCollationResult caseTestResults[][9] = 2124 { 2125 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS }, 2126 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }, 2127 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS }, 2128 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER } 2129 }; 2130 2131 const static UColAttributeValue caseTestAttributes[][2] = 2132 { 2133 { UCOL_LOWER_FIRST, UCOL_OFF}, 2134 { UCOL_UPPER_FIRST, UCOL_OFF}, 2135 { UCOL_LOWER_FIRST, UCOL_ON}, 2136 { UCOL_UPPER_FIRST, UCOL_ON} 2137 }; 2138 int32_t i,j,k; 2139 UErrorCode status = U_ZERO_ERROR; 2140 UCollationElements *iter; 2141 UCollator *myCollation; 2142 myCollation = ucol_open("en_US", &status); 2143 2144 if(U_FAILURE(status)){ 2145 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 2146 return; 2147 } 2148 log_verbose("Testing different case settings\n"); 2149 ucol_setStrength(myCollation, UCOL_TERTIARY); 2150 2151 for(k = 0; k<4; k++) { 2152 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status); 2153 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status); 2154 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]); 2155 for (i = 0; i < 3 ; i++) { 2156 for(j = i+1; j<4; j++) { 2157 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]); 2158 } 2159 } 2160 } 2161 ucol_close(myCollation); 2162 2163 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status); 2164 if(U_FAILURE(status)){ 2165 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 2166 return; 2167 } 2168 log_verbose("Testing different case settings with custom rules\n"); 2169 ucol_setStrength(myCollation, UCOL_TERTIARY); 2170 2171 for(k = 0; k<4; k++) { 2172 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status); 2173 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status); 2174 for (i = 0; i < 3 ; i++) { 2175 for(j = i+1; j<4; j++) { 2176 log_verbose("k:%d, i:%d, j:%d\n", k, i, j); 2177 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]); 2178 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status); 2179 backAndForth(iter); 2180 ucol_closeElements(iter); 2181 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status); 2182 backAndForth(iter); 2183 ucol_closeElements(iter); 2184 } 2185 } 2186 } 2187 ucol_close(myCollation); 2188 { 2189 const static char *lowerFirst[] = { 2190 "h", 2191 "H", 2192 "ch", 2193 "Ch", 2194 "CH", 2195 "cha", 2196 "chA", 2197 "Cha", 2198 "ChA", 2199 "CHa", 2200 "CHA", 2201 "i", 2202 "I" 2203 }; 2204 2205 const static char *upperFirst[] = { 2206 "H", 2207 "h", 2208 "CH", 2209 "Ch", 2210 "ch", 2211 "CHA", 2212 "CHa", 2213 "ChA", 2214 "Cha", 2215 "chA", 2216 "cha", 2217 "I", 2218 "i" 2219 }; 2220 log_verbose("mixed case test\n"); 2221 log_verbose("lower first, case level off\n"); 2222 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); 2223 log_verbose("upper first, case level off\n"); 2224 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); 2225 log_verbose("lower first, case level on\n"); 2226 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); 2227 log_verbose("upper first, case level on\n"); 2228 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); 2229 } 2230 2231 } 2232 2233 static void TestIncrementalNormalize(void) { 2234 2235 /*UChar baseA =0x61;*/ 2236 UChar baseA =0x41; 2237 /* UChar baseB = 0x42;*/ 2238 static const UChar ccMix[] = {0x316, 0x321, 0x300}; 2239 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/ 2240 /* 2241 0x316 is combining grave accent below, cc=220 2242 0x321 is combining palatalized hook below, cc=202 2243 0x300 is combining grave accent, cc=230 2244 */ 2245 2246 #define MAXSLEN 2000 2247 /*int maxSLen = 64000;*/ 2248 int sLen; 2249 int i; 2250 2251 UCollator *coll; 2252 UErrorCode status = U_ZERO_ERROR; 2253 UCollationResult result; 2254 2255 int32_t myQ = getTestOption(QUICK_OPTION); 2256 2257 if(getTestOption(QUICK_OPTION) < 0) { 2258 setTestOption(QUICK_OPTION, 1); 2259 } 2260 2261 { 2262 /* Test 1. Run very long unnormalized strings, to force overflow of*/ 2263 /* most buffers along the way.*/ 2264 UChar strA[MAXSLEN+1]; 2265 UChar strB[MAXSLEN+1]; 2266 2267 coll = ucol_open("en_US", &status); 2268 if(status == U_FILE_ACCESS_ERROR) { 2269 log_data_err("Is your data around?\n"); 2270 return; 2271 } else if(U_FAILURE(status)) { 2272 log_err("Error opening collator\n"); 2273 return; 2274 } 2275 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 2276 2277 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/ 2278 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/ 2279 /*for (sLen = 1000; sLen<1001; sLen++) {*/ 2280 for (sLen = 500; sLen<501; sLen++) { 2281 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/ 2282 strA[0] = baseA; 2283 strB[0] = baseA; 2284 for (i=1; i<=sLen-1; i++) { 2285 strA[i] = ccMix[i % 3]; 2286 strB[sLen-i] = ccMix[i % 3]; 2287 } 2288 strA[sLen] = 0; 2289 strB[sLen] = 0; 2290 2291 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default strength, which runs*/ 2292 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/ 2293 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/ 2294 doTest(coll, strA, strB, UCOL_EQUAL); 2295 } 2296 } 2297 2298 setTestOption(QUICK_OPTION, myQ); 2299 2300 2301 /* Test 2: Non-normal sequence in a string that extends to the last character*/ 2302 /* of the string. Checks a couple of edge cases.*/ 2303 2304 { 2305 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0}; 2306 static const UChar strB[] = {0x41, 0xc0, 0x316, 0}; 2307 ucol_setStrength(coll, UCOL_TERTIARY); 2308 doTest(coll, strA, strB, UCOL_EQUAL); 2309 } 2310 2311 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/ 2312 2313 { 2314 /* New UCA 3.1.1. 2315 * test below used a code point from Desseret, which sorts differently 2316 * than d800 dc00 2317 */ 2318 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/ 2319 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0}; 2320 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0}; 2321 ucol_setStrength(coll, UCOL_TERTIARY); 2322 doTest(coll, strA, strB, UCOL_GREATER); 2323 } 2324 2325 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/ 2326 2327 { 2328 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00}; 2329 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00}; 2330 char sortKeyA[50]; 2331 char sortKeyAz[50]; 2332 char sortKeyB[50]; 2333 char sortKeyBz[50]; 2334 int r; 2335 2336 /* there used to be -3 here. Hmmmm.... */ 2337 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/ 2338 result = ucol_strcoll(coll, strA, 3, strB, 3); 2339 if (result != UCOL_GREATER) { 2340 log_err("ERROR 1 in test 4\n"); 2341 } 2342 result = ucol_strcoll(coll, strA, -1, strB, -1); 2343 if (result != UCOL_EQUAL) { 2344 log_err("ERROR 2 in test 4\n"); 2345 } 2346 2347 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 2348 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 2349 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 2350 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 2351 2352 r = strcmp(sortKeyA, sortKeyAz); 2353 if (r <= 0) { 2354 log_err("Error 3 in test 4\n"); 2355 } 2356 r = strcmp(sortKeyA, sortKeyB); 2357 if (r <= 0) { 2358 log_err("Error 4 in test 4\n"); 2359 } 2360 r = strcmp(sortKeyAz, sortKeyBz); 2361 if (r != 0) { 2362 log_err("Error 5 in test 4\n"); 2363 } 2364 2365 ucol_setStrength(coll, UCOL_IDENTICAL); 2366 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 2367 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 2368 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 2369 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 2370 2371 r = strcmp(sortKeyA, sortKeyAz); 2372 if (r <= 0) { 2373 log_err("Error 6 in test 4\n"); 2374 } 2375 r = strcmp(sortKeyA, sortKeyB); 2376 if (r <= 0) { 2377 log_err("Error 7 in test 4\n"); 2378 } 2379 r = strcmp(sortKeyAz, sortKeyBz); 2380 if (r != 0) { 2381 log_err("Error 8 in test 4\n"); 2382 } 2383 ucol_setStrength(coll, UCOL_TERTIARY); 2384 } 2385 2386 2387 /* Test 5: Null characters in non-normal source strings.*/ 2388 2389 { 2390 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00}; 2391 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00}; 2392 char sortKeyA[50]; 2393 char sortKeyAz[50]; 2394 char sortKeyB[50]; 2395 char sortKeyBz[50]; 2396 int r; 2397 2398 result = ucol_strcoll(coll, strA, 6, strB, 6); 2399 if (result != UCOL_GREATER) { 2400 log_err("ERROR 1 in test 5\n"); 2401 } 2402 result = ucol_strcoll(coll, strA, -1, strB, -1); 2403 if (result != UCOL_EQUAL) { 2404 log_err("ERROR 2 in test 5\n"); 2405 } 2406 2407 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 2408 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 2409 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 2410 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 2411 2412 r = strcmp(sortKeyA, sortKeyAz); 2413 if (r <= 0) { 2414 log_err("Error 3 in test 5\n"); 2415 } 2416 r = strcmp(sortKeyA, sortKeyB); 2417 if (r <= 0) { 2418 log_err("Error 4 in test 5\n"); 2419 } 2420 r = strcmp(sortKeyAz, sortKeyBz); 2421 if (r != 0) { 2422 log_err("Error 5 in test 5\n"); 2423 } 2424 2425 ucol_setStrength(coll, UCOL_IDENTICAL); 2426 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA)); 2427 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)); 2428 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB)); 2429 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)); 2430 2431 r = strcmp(sortKeyA, sortKeyAz); 2432 if (r <= 0) { 2433 log_err("Error 6 in test 5\n"); 2434 } 2435 r = strcmp(sortKeyA, sortKeyB); 2436 if (r <= 0) { 2437 log_err("Error 7 in test 5\n"); 2438 } 2439 r = strcmp(sortKeyAz, sortKeyBz); 2440 if (r != 0) { 2441 log_err("Error 8 in test 5\n"); 2442 } 2443 ucol_setStrength(coll, UCOL_TERTIARY); 2444 } 2445 2446 2447 /* Test 6: Null character as base of a non-normal combining sequence.*/ 2448 2449 { 2450 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00}; 2451 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00}; 2452 2453 result = ucol_strcoll(coll, strA, 5, strB, 5); 2454 if (result != UCOL_LESS) { 2455 log_err("Error 1 in test 6\n"); 2456 } 2457 result = ucol_strcoll(coll, strA, -1, strB, -1); 2458 if (result != UCOL_EQUAL) { 2459 log_err("Error 2 in test 6\n"); 2460 } 2461 } 2462 2463 ucol_close(coll); 2464 } 2465 2466 2467 2468 #if 0 2469 static void TestGetCaseBit(void) { 2470 static const char *caseBitData[] = { 2471 "a", "A", "ch", "Ch", "CH", 2472 "\\uFF9E", "\\u0009" 2473 }; 2474 2475 static const uint8_t results[] = { 2476 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE, 2477 UCOL_UPPER_CASE, UCOL_LOWER_CASE 2478 }; 2479 2480 uint32_t i, blen = 0; 2481 UChar b[256] = {0}; 2482 UErrorCode status = U_ZERO_ERROR; 2483 UCollator *UCA = ucol_open("", &status); 2484 uint8_t res = 0; 2485 2486 for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) { 2487 blen = u_unescape(caseBitData[i], b, 256); 2488 res = ucol_uprv_getCaseBits(UCA, b, blen, &status); 2489 if(results[i] != res) { 2490 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]); 2491 } 2492 } 2493 } 2494 #endif 2495 2496 static void TestHangulTailoring(void) { 2497 static const char *koreanData[] = { 2498 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475", 2499 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef", 2500 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888", 2501 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5", 2502 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E", 2503 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C" 2504 }; 2505 2506 const char *rules = 2507 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 " 2508 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef " 2509 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 " 2510 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 " 2511 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E " 2512 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C"; 2513 2514 2515 UErrorCode status = U_ZERO_ERROR; 2516 UChar rlz[2048] = { 0 }; 2517 uint32_t rlen = u_unescape(rules, rlz, 2048); 2518 2519 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); 2520 if(status == U_FILE_ACCESS_ERROR) { 2521 log_data_err("Is your data around?\n"); 2522 return; 2523 } else if(U_FAILURE(status)) { 2524 log_err("Error opening collator\n"); 2525 return; 2526 } 2527 2528 log_verbose("Using start of korean rules\n"); 2529 2530 if(U_SUCCESS(status)) { 2531 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0])); 2532 } else { 2533 log_err("Unable to open collator with rules %s\n", rules); 2534 } 2535 2536 log_verbose("Setting jamoSpecial to TRUE and testing once more\n"); 2537 ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home */ 2538 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0])); 2539 2540 ucol_close(coll); 2541 2542 log_verbose("Using ko__LOTUS locale\n"); 2543 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0])); 2544 } 2545 2546 static void TestCompressOverlap(void) { 2547 UChar secstr[150]; 2548 UChar tertstr[150]; 2549 UErrorCode status = U_ZERO_ERROR; 2550 UCollator *coll; 2551 char result[200]; 2552 uint32_t resultlen; 2553 int count = 0; 2554 char *tempptr; 2555 2556 coll = ucol_open("", &status); 2557 2558 if (U_FAILURE(status)) { 2559 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status)); 2560 return; 2561 } 2562 while (count < 149) { 2563 secstr[count] = 0x0020; /* [06, 05, 05] */ 2564 tertstr[count] = 0x0020; 2565 count ++; 2566 } 2567 2568 /* top down compression ----------------------------------- */ 2569 secstr[count] = 0x0332; /* [, 87, 05] */ 2570 tertstr[count] = 0x3000; /* [06, 05, 07] */ 2571 2572 /* no compression secstr should have 150 secondary bytes, tertstr should 2573 have 150 tertiary bytes. 2574 with correct overlapping compression, secstr should have 4 secondary 2575 bytes, tertstr should have > 2 tertiary bytes */ 2576 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250); 2577 tempptr = uprv_strchr(result, 1) + 1; 2578 while (*(tempptr + 1) != 1) { 2579 /* the last secondary collation element is not checked since it is not 2580 part of the compression */ 2581 if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) { 2582 log_err("Secondary compression overlapped\n"); 2583 } 2584 tempptr ++; 2585 } 2586 2587 /* tertiary top/bottom/common for en_US is similar to the secondary 2588 top/bottom/common */ 2589 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250); 2590 tempptr = uprv_strrchr(result, 1) + 1; 2591 while (*(tempptr + 1) != 0) { 2592 /* the last secondary collation element is not checked since it is not 2593 part of the compression */ 2594 if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) { 2595 log_err("Tertiary compression overlapped\n"); 2596 } 2597 tempptr ++; 2598 } 2599 2600 /* bottom up compression ------------------------------------- */ 2601 secstr[count] = 0; 2602 tertstr[count] = 0; 2603 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250); 2604 tempptr = uprv_strchr(result, 1) + 1; 2605 while (*(tempptr + 1) != 1) { 2606 /* the last secondary collation element is not checked since it is not 2607 part of the compression */ 2608 if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) { 2609 log_err("Secondary compression overlapped\n"); 2610 } 2611 tempptr ++; 2612 } 2613 2614 /* tertiary top/bottom/common for en_US is similar to the secondary 2615 top/bottom/common */ 2616 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250); 2617 tempptr = uprv_strrchr(result, 1) + 1; 2618 while (*(tempptr + 1) != 0) { 2619 /* the last secondary collation element is not checked since it is not 2620 part of the compression */ 2621 if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) { 2622 log_err("Tertiary compression overlapped\n"); 2623 } 2624 tempptr ++; 2625 } 2626 2627 ucol_close(coll); 2628 } 2629 2630 static void TestCyrillicTailoring(void) { 2631 static const char *test[] = { 2632 "\\u0410b", 2633 "\\u0410\\u0306a", 2634 "\\u04d0A" 2635 }; 2636 2637 /* Russian overrides contractions, so this test is not valid anymore */ 2638 /*genericLocaleStarter("ru", test, 3);*/ 2639 2640 genericLocaleStarter("root", test, 3); 2641 genericRulesStarter("&\\u0410 = \\u0410", test, 3); 2642 genericRulesStarter("&Z < \\u0410", test, 3); 2643 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3); 2644 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3); 2645 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3); 2646 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3); 2647 } 2648 2649 static void TestSuppressContractions(void) { 2650 2651 static const char *testNoCont2[] = { 2652 "\\u0410\\u0302a", 2653 "\\u0410\\u0306b", 2654 "\\u0410c" 2655 }; 2656 static const char *testNoCont[] = { 2657 "a\\u0410", 2658 "A\\u0410\\u0306", 2659 "\\uFF21\\u0410\\u0302" 2660 }; 2661 2662 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3); 2663 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3); 2664 } 2665 2666 static void TestContraction(void) { 2667 const static char *testrules[] = { 2668 "&A = AB / B", 2669 "&A = A\\u0306/\\u0306", 2670 "&c = ch / h" 2671 }; 2672 const static UChar testdata[][2] = { 2673 {0x0041 /* 'A' */, 0x0042 /* 'B' */}, 2674 {0x0041 /* 'A' */, 0x0306 /* combining breve */}, 2675 {0x0063 /* 'c' */, 0x0068 /* 'h' */} 2676 }; 2677 const static UChar testdata2[][2] = { 2678 {0x0063 /* 'c' */, 0x0067 /* 'g' */}, 2679 {0x0063 /* 'c' */, 0x0068 /* 'h' */}, 2680 {0x0063 /* 'c' */, 0x006C /* 'l' */} 2681 }; 2682 const static char *testrules3[] = { 2683 "&z < xyz &xyzw << B", 2684 "&z < xyz &xyz << B / w", 2685 "&z < ch &achm << B", 2686 "&z < ch &a << B / chm", 2687 "&\\ud800\\udc00w << B", 2688 "&\\ud800\\udc00 << B / w", 2689 "&a\\ud800\\udc00m << B", 2690 "&a << B / \\ud800\\udc00m", 2691 }; 2692 2693 UErrorCode status = U_ZERO_ERROR; 2694 UCollator *coll; 2695 UChar rule[256] = {0}; 2696 uint32_t rlen = 0; 2697 int i; 2698 2699 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) { 2700 UCollationElements *iter1; 2701 int j = 0; 2702 log_verbose("Rule %s for testing\n", testrules[i]); 2703 rlen = u_unescape(testrules[i], rule, 32); 2704 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2705 if (U_FAILURE(status)) { 2706 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status)); 2707 return; 2708 } 2709 iter1 = ucol_openElements(coll, testdata[i], 2, &status); 2710 if (U_FAILURE(status)) { 2711 log_err("Collation iterator creation failed\n"); 2712 return; 2713 } 2714 while (j < 2) { 2715 UCollationElements *iter2 = ucol_openElements(coll, 2716 &(testdata[i][j]), 2717 1, &status); 2718 uint32_t ce; 2719 if (U_FAILURE(status)) { 2720 log_err("Collation iterator creation failed\n"); 2721 return; 2722 } 2723 ce = ucol_next(iter2, &status); 2724 while (ce != UCOL_NULLORDER) { 2725 if ((uint32_t)ucol_next(iter1, &status) != ce) { 2726 log_err("Collation elements in contraction split does not match\n"); 2727 return; 2728 } 2729 ce = ucol_next(iter2, &status); 2730 } 2731 j ++; 2732 ucol_closeElements(iter2); 2733 } 2734 if (ucol_next(iter1, &status) != UCOL_NULLORDER) { 2735 log_err("Collation elements not exhausted\n"); 2736 return; 2737 } 2738 ucol_closeElements(iter1); 2739 ucol_close(coll); 2740 } 2741 2742 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256); 2743 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2744 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) { 2745 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n", 2746 testdata2[0][0], testdata2[0][1], testdata2[1][0], 2747 testdata2[1][1]); 2748 return; 2749 } 2750 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) { 2751 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n", 2752 testdata2[1][0], testdata2[1][1], testdata2[2][0], 2753 testdata2[2][1]); 2754 return; 2755 } 2756 ucol_close(coll); 2757 2758 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) { 2759 UCollator *coll1, 2760 *coll2; 2761 UCollationElements *iter1, 2762 *iter2; 2763 UChar ch = 0x0042 /* 'B' */; 2764 uint32_t ce; 2765 rlen = u_unescape(testrules3[i], rule, 32); 2766 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2767 rlen = u_unescape(testrules3[i + 1], rule, 32); 2768 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2769 if (U_FAILURE(status)) { 2770 log_err("Collator creation failed %s\n", testrules[i]); 2771 return; 2772 } 2773 iter1 = ucol_openElements(coll1, &ch, 1, &status); 2774 iter2 = ucol_openElements(coll2, &ch, 1, &status); 2775 if (U_FAILURE(status)) { 2776 log_err("Collation iterator creation failed\n"); 2777 return; 2778 } 2779 ce = ucol_next(iter1, &status); 2780 if (U_FAILURE(status)) { 2781 log_err("Retrieving ces failed\n"); 2782 return; 2783 } 2784 while (ce != UCOL_NULLORDER) { 2785 if (ce != (uint32_t)ucol_next(iter2, &status)) { 2786 log_err("CEs does not match\n"); 2787 return; 2788 } 2789 ce = ucol_next(iter1, &status); 2790 if (U_FAILURE(status)) { 2791 log_err("Retrieving ces failed\n"); 2792 return; 2793 } 2794 } 2795 if (ucol_next(iter2, &status) != UCOL_NULLORDER) { 2796 log_err("CEs not exhausted\n"); 2797 return; 2798 } 2799 ucol_closeElements(iter1); 2800 ucol_closeElements(iter2); 2801 ucol_close(coll1); 2802 ucol_close(coll2); 2803 } 2804 } 2805 2806 static void TestExpansion(void) { 2807 const static char *testrules[] = { 2808 "&J << K / B & K << M", 2809 "&J << K / B << M" 2810 }; 2811 const static UChar testdata[][3] = { 2812 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0}, 2813 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0}, 2814 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0}, 2815 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0}, 2816 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0}, 2817 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0} 2818 }; 2819 2820 UErrorCode status = U_ZERO_ERROR; 2821 UCollator *coll; 2822 UChar rule[256] = {0}; 2823 uint32_t rlen = 0; 2824 int i; 2825 2826 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) { 2827 int j = 0; 2828 log_verbose("Rule %s for testing\n", testrules[i]); 2829 rlen = u_unescape(testrules[i], rule, 32); 2830 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); 2831 if (U_FAILURE(status)) { 2832 log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status)); 2833 return; 2834 } 2835 2836 for (j = 0; j < 5; j ++) { 2837 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS); 2838 } 2839 ucol_close(coll); 2840 } 2841 } 2842 2843 #if 0 2844 /* this test tests the current limitations of the engine */ 2845 /* it always fail, so it is disabled by default */ 2846 static void TestLimitations(void) { 2847 /* recursive expansions */ 2848 { 2849 static const char *rule = "&a=b/c&d=c/e"; 2850 static const char *tlimit01[] = {"add","b","adf"}; 2851 static const char *tlimit02[] = {"aa","b","af"}; 2852 log_verbose("recursive expansions\n"); 2853 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0])); 2854 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0])); 2855 } 2856 /* contractions spanning expansions */ 2857 { 2858 static const char *rule = "&a<<<c/e&g<<<eh"; 2859 static const char *tlimit01[] = {"ad","c","af","f","ch","h"}; 2860 static const char *tlimit02[] = {"ad","c","ch","af","f","h"}; 2861 log_verbose("contractions spanning expansions\n"); 2862 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0])); 2863 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0])); 2864 } 2865 /* normalization: nulls in contractions */ 2866 { 2867 static const char *rule = "&a<<<\\u0000\\u0302"; 2868 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"}; 2869 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"}; 2870 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE }; 2871 static const UColAttributeValue valOn[] = { UCOL_ON }; 2872 static const UColAttributeValue valOff[] = { UCOL_OFF }; 2873 2874 log_verbose("NULL in contractions\n"); 2875 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1); 2876 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1); 2877 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1); 2878 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1); 2879 2880 } 2881 /* normalization: contractions spanning normalization */ 2882 { 2883 static const char *rule = "&a<<<\\u0000\\u0302"; 2884 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"}; 2885 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"}; 2886 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE }; 2887 static const UColAttributeValue valOn[] = { UCOL_ON }; 2888 static const UColAttributeValue valOff[] = { UCOL_OFF }; 2889 2890 log_verbose("contractions spanning normalization\n"); 2891 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1); 2892 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1); 2893 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1); 2894 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1); 2895 2896 } 2897 /* variable top: */ 2898 { 2899 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/ 2900 static const char *rule = "&\\u2010<x<[variable top]=z"; 2901 /*static const char *rule3 = "&' '<x<[variable top]=z";*/ 2902 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" }; 2903 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"}; 2904 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" }; 2905 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH }; 2906 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY }; 2907 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY }; 2908 2909 log_verbose("variable top\n"); 2910 genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2911 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2912 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2913 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0])); 2914 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0])); 2915 2916 } 2917 /* case level */ 2918 { 2919 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH"; 2920 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"}; 2921 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"}; 2922 static const UColAttribute att[] = { UCOL_CASE_FIRST}; 2923 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST}; 2924 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/ 2925 log_verbose("case level\n"); 2926 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2927 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0])); 2928 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/ 2929 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/ 2930 } 2931 2932 } 2933 #endif 2934 2935 static void TestBocsuCoverage(void) { 2936 UErrorCode status = U_ZERO_ERROR; 2937 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041"; 2938 UChar test[256] = {0}; 2939 uint32_t tlen = u_unescape(testString, test, 32); 2940 uint8_t key[256] = {0}; 2941 uint32_t klen = 0; 2942 2943 UCollator *coll = ucol_open("", &status); 2944 if(U_SUCCESS(status)) { 2945 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status); 2946 2947 klen = ucol_getSortKey(coll, test, tlen, key, 256); 2948 2949 ucol_close(coll); 2950 } else { 2951 log_data_err("Couldn't open UCA\n"); 2952 } 2953 } 2954 2955 static void TestVariableTopSetting(void) { 2956 UErrorCode status = U_ZERO_ERROR; 2957 const UChar *current = NULL; 2958 uint32_t varTopOriginal = 0, varTop1, varTop2; 2959 UCollator *coll = ucol_open("", &status); 2960 if(U_SUCCESS(status)) { 2961 2962 uint32_t strength = 0; 2963 uint16_t specs = 0; 2964 uint32_t chOffset = 0; 2965 uint32_t chLen = 0; 2966 uint32_t exOffset = 0; 2967 uint32_t exLen = 0; 2968 uint32_t oldChOffset = 0; 2969 uint32_t oldChLen = 0; 2970 uint32_t oldExOffset = 0; 2971 uint32_t oldExLen = 0; 2972 uint32_t prefixOffset = 0; 2973 uint32_t prefixLen = 0; 2974 2975 UBool startOfRules = TRUE; 2976 UColTokenParser src; 2977 UColOptionSet opts; 2978 2979 UChar *rulesCopy = NULL; 2980 uint32_t rulesLen; 2981 2982 UCollationResult result; 2983 2984 UChar first[256] = { 0 }; 2985 UChar second[256] = { 0 }; 2986 UParseError parseError; 2987 int32_t myQ = getTestOption(QUICK_OPTION); 2988 2989 uprv_memset(&src, 0, sizeof(UColTokenParser)); 2990 2991 src.opts = &opts; 2992 2993 if(getTestOption(QUICK_OPTION) <= 0) { 2994 setTestOption(QUICK_OPTION, 1); 2995 } 2996 2997 /* this test will fail when normalization is turned on */ 2998 /* therefore we always turn off exhaustive mode for it */ 2999 { /* QUICK > 0*/ 3000 log_verbose("Slide variable top over UCARules\n"); 3001 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0); 3002 rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar)); 3003 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE); 3004 3005 if(U_SUCCESS(status) && rulesLen > 0) { 3006 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 3007 src.current = src.source = rulesCopy; 3008 src.end = rulesCopy+rulesLen; 3009 src.extraCurrent = src.end; 3010 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE; 3011 3012 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to 3013 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */ 3014 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) { 3015 strength = src.parsedToken.strength; 3016 chOffset = src.parsedToken.charsOffset; 3017 chLen = src.parsedToken.charsLen; 3018 exOffset = src.parsedToken.extensionOffset; 3019 exLen = src.parsedToken.extensionLen; 3020 prefixOffset = src.parsedToken.prefixOffset; 3021 prefixLen = src.parsedToken.prefixLen; 3022 specs = src.parsedToken.flags; 3023 3024 startOfRules = FALSE; 3025 { 3026 log_verbose("%04X %d ", *(src.source+chOffset), chLen); 3027 } 3028 if(strength == UCOL_PRIMARY) { 3029 status = U_ZERO_ERROR; 3030 varTopOriginal = ucol_getVariableTop(coll, &status); 3031 varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status); 3032 if(U_FAILURE(status)) { 3033 char buffer[256]; 3034 char *buf = buffer; 3035 uint32_t i = 0, j; 3036 uint32_t CE = UCOL_NO_MORE_CES; 3037 3038 /* before we start screaming, let's see if there is a problem with the rules */ 3039 UErrorCode collIterateStatus = U_ZERO_ERROR; 3040 collIterate *s = uprv_new_collIterate(&collIterateStatus); 3041 uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus); 3042 3043 CE = ucol_getNextCE(coll, s, &status); 3044 3045 for(i = 0; i < oldChLen; i++) { 3046 j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i)); 3047 buf += j; 3048 } 3049 if(status == U_PRIMARY_TOO_LONG_ERROR) { 3050 log_verbose("= Expected failure for %s =", buffer); 3051 } else { 3052 if(uprv_collIterateAtEnd(s)) { 3053 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n", 3054 oldChOffset, u_errorName(status), buffer); 3055 } else { 3056 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n", 3057 buffer); 3058 } 3059 } 3060 uprv_delete_collIterate(s); 3061 } 3062 varTop2 = ucol_getVariableTop(coll, &status); 3063 if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) { 3064 log_err("cannot retrieve set varTop value!\n"); 3065 continue; 3066 } 3067 3068 if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) { 3069 3070 u_strncpy(first, src.source+oldChOffset, oldChLen); 3071 u_strncpy(first+oldChLen, src.source+chOffset, chLen); 3072 u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen); 3073 first[2*oldChLen+chLen] = 0; 3074 3075 if(oldExLen == 0) { 3076 u_strncpy(second, src.source+chOffset, chLen); 3077 second[chLen] = 0; 3078 } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */ 3079 u_strncpy(second, src.source+oldExOffset, oldExLen); 3080 u_strncpy(second+oldChLen, src.source+chOffset, chLen); 3081 u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen); 3082 second[2*oldExLen+chLen] = 0; 3083 } 3084 result = ucol_strcoll(coll, first, -1, second, -1); 3085 if(result == UCOL_EQUAL) { 3086 doTest(coll, first, second, UCOL_EQUAL); 3087 } else { 3088 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset)); 3089 } 3090 } 3091 } 3092 if(strength != UCOL_TOK_RESET) { 3093 oldChOffset = chOffset; 3094 oldChLen = chLen; 3095 oldExOffset = exOffset; 3096 oldExLen = exLen; 3097 } 3098 } 3099 status = U_ZERO_ERROR; 3100 } 3101 else { 3102 log_err("Unexpected failure getting rules %s\n", u_errorName(status)); 3103 return; 3104 } 3105 if (U_FAILURE(status)) { 3106 log_err("Error parsing rules %s\n", u_errorName(status)); 3107 return; 3108 } 3109 status = U_ZERO_ERROR; 3110 } 3111 3112 setTestOption(QUICK_OPTION, myQ); 3113 3114 log_verbose("Testing setting variable top to contractions\n"); 3115 { 3116 UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos); 3117 int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth; 3118 while(*conts != 0) { 3119 /* 3120 * A continuation is NUL-terminated and NUL-padded 3121 * except if it has the maximum length. 3122 */ 3123 int32_t contractionLength = maxUCAContractionLength; 3124 while(contractionLength > 0 && conts[contractionLength - 1] == 0) { 3125 --contractionLength; 3126 } 3127 if(*(conts+1)==0) { /* pre-context */ 3128 varTop1 = ucol_setVariableTop(coll, conts, 1, &status); 3129 } else { 3130 varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status); 3131 } 3132 if(U_FAILURE(status)) { 3133 if(status == U_PRIMARY_TOO_LONG_ERROR) { 3134 /* ucol_setVariableTop() is documented to not accept 3-byte primaries, 3135 * therefore it is not an error when it complains about them. */ 3136 log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n", 3137 *conts, *(conts+1), *(conts+2)); 3138 } else { 3139 log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n", 3140 *conts, *(conts+1), *(conts+2), u_errorName(status)); 3141 } 3142 status = U_ZERO_ERROR; 3143 } 3144 conts+=maxUCAContractionLength; 3145 } 3146 3147 status = U_ZERO_ERROR; 3148 3149 first[0] = 0x0040; 3150 first[1] = 0x0050; 3151 first[2] = 0x0000; 3152 3153 ucol_setVariableTop(coll, first, -1, &status); 3154 3155 if(U_SUCCESS(status)) { 3156 log_err("Invalid contraction succeded in setting variable top!\n"); 3157 } 3158 3159 } 3160 3161 log_verbose("Test restoring variable top\n"); 3162 3163 status = U_ZERO_ERROR; 3164 ucol_restoreVariableTop(coll, varTopOriginal, &status); 3165 if(varTopOriginal != ucol_getVariableTop(coll, &status)) { 3166 log_err("Couldn't restore old variable top\n"); 3167 } 3168 3169 log_verbose("Testing calling with error set\n"); 3170 3171 status = U_INTERNAL_PROGRAM_ERROR; 3172 varTop1 = ucol_setVariableTop(coll, first, 1, &status); 3173 varTop2 = ucol_getVariableTop(coll, &status); 3174 ucol_restoreVariableTop(coll, varTop2, &status); 3175 varTop1 = ucol_setVariableTop(NULL, first, 1, &status); 3176 varTop2 = ucol_getVariableTop(NULL, &status); 3177 ucol_restoreVariableTop(NULL, varTop2, &status); 3178 if(status != U_INTERNAL_PROGRAM_ERROR) { 3179 log_err("Bad reaction to passed error!\n"); 3180 } 3181 uprv_free(src.source); 3182 ucol_close(coll); 3183 } else { 3184 log_data_err("Couldn't open UCA collator\n"); 3185 } 3186 3187 } 3188 3189 static void TestNonChars(void) { 3190 static const char *test[] = { 3191 "\\u0000", /* ignorable */ 3192 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */ 3193 "\\uFDD0", "\\uFDEF", 3194 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */ 3195 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */ 3196 "\\U0003FFFE", "\\U0003FFFF", 3197 "\\U0004FFFE", "\\U0004FFFF", 3198 "\\U0005FFFE", "\\U0005FFFF", 3199 "\\U0006FFFE", "\\U0006FFFF", 3200 "\\U0007FFFE", "\\U0007FFFF", 3201 "\\U0008FFFE", "\\U0008FFFF", 3202 "\\U0009FFFE", "\\U0009FFFF", 3203 "\\U000AFFFE", "\\U000AFFFF", 3204 "\\U000BFFFE", "\\U000BFFFF", 3205 "\\U000CFFFE", "\\U000CFFFF", 3206 "\\U000DFFFE", "\\U000DFFFF", 3207 "\\U000EFFFE", "\\U000EFFFF", 3208 "\\U000FFFFE", "\\U000FFFFF", 3209 "\\U0010FFFE", "\\U0010FFFF", 3210 "\\uFFFF" /* special character with maximum primary weight */ 3211 }; 3212 UErrorCode status = U_ZERO_ERROR; 3213 UCollator *coll = ucol_open("en_US", &status); 3214 3215 log_verbose("Test non characters\n"); 3216 3217 if(U_SUCCESS(status)) { 3218 genericOrderingTestWithResult(coll, test, 35, UCOL_LESS); 3219 } else { 3220 log_err_status(status, "Unable to open collator\n"); 3221 } 3222 3223 ucol_close(coll); 3224 } 3225 3226 static void TestExtremeCompression(void) { 3227 static char *test[4]; 3228 int32_t j = 0, i = 0; 3229 3230 for(i = 0; i<4; i++) { 3231 test[i] = (char *)malloc(2048*sizeof(char)); 3232 } 3233 3234 for(j = 20; j < 500; j++) { 3235 for(i = 0; i<4; i++) { 3236 uprv_memset(test[i], 'a', (j-1)*sizeof(char)); 3237 test[i][j-1] = (char)('a'+i); 3238 test[i][j] = 0; 3239 } 3240 genericLocaleStarter("en_US", (const char **)test, 4); 3241 } 3242 3243 3244 for(i = 0; i<4; i++) { 3245 free(test[i]); 3246 } 3247 } 3248 3249 #if 0 3250 static void TestExtremeCompression(void) { 3251 static char *test[4]; 3252 int32_t j = 0, i = 0; 3253 UErrorCode status = U_ZERO_ERROR; 3254 UCollator *coll = ucol_open("en_US", status); 3255 for(i = 0; i<4; i++) { 3256 test[i] = (char *)malloc(2048*sizeof(char)); 3257 } 3258 for(j = 10; j < 2048; j++) { 3259 for(i = 0; i<4; i++) { 3260 uprv_memset(test[i], 'a', (j-2)*sizeof(char)); 3261 test[i][j-1] = (char)('a'+i); 3262 test[i][j] = 0; 3263 } 3264 } 3265 genericLocaleStarter("en_US", (const char **)test, 4); 3266 3267 for(j = 10; j < 2048; j++) { 3268 for(i = 0; i<1; i++) { 3269 uprv_memset(test[i], 'a', (j-1)*sizeof(char)); 3270 test[i][j] = 0; 3271 } 3272 } 3273 for(i = 0; i<4; i++) { 3274 free(test[i]); 3275 } 3276 } 3277 #endif 3278 3279 static void TestSurrogates(void) { 3280 static const char *test[] = { 3281 "z","\\ud900\\udc25", "\\ud805\\udc50", 3282 "\\ud800\\udc00y", "\\ud800\\udc00r", 3283 "\\ud800\\udc00f", "\\ud800\\udc00", 3284 "\\ud800\\udc00c", "\\ud800\\udc00b", 3285 "\\ud800\\udc00fa", "\\ud800\\udc00fb", 3286 "\\ud800\\udc00a", 3287 "c", "b" 3288 }; 3289 3290 static const char *rule = 3291 "&z < \\ud900\\udc25 < \\ud805\\udc50" 3292 "< \\ud800\\udc00y < \\ud800\\udc00r" 3293 "< \\ud800\\udc00f << \\ud800\\udc00" 3294 "< \\ud800\\udc00fa << \\ud800\\udc00fb" 3295 "< \\ud800\\udc00a < c < b" ; 3296 3297 genericRulesStarter(rule, test, 14); 3298 } 3299 3300 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */ 3301 static void TestPrefix(void) { 3302 uint32_t i; 3303 3304 static const struct { 3305 const char *rules; 3306 const char *data[50]; 3307 const uint32_t len; 3308 } tests[] = { 3309 { "&z <<< z|a", 3310 {"zz", "za"}, 2 }, 3311 3312 { "&z <<< z| a", 3313 {"zz", "za"}, 2 }, 3314 { "[strength I]" 3315 "&a=\\ud900\\udc25" 3316 "&z<<<\\ud900\\udc25|a", 3317 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 }, 3318 }; 3319 3320 3321 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3322 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 3323 } 3324 } 3325 3326 /* This test uses data suplied by Masashiko Maedera to test the implementation */ 3327 /* JIS X 4061 collation order implementation */ 3328 static void TestNewJapanese(void) { 3329 3330 static const char * const test1[] = { 3331 "\\u30b7\\u30e3\\u30fc\\u30ec", 3332 "\\u30b7\\u30e3\\u30a4", 3333 "\\u30b7\\u30e4\\u30a3", 3334 "\\u30b7\\u30e3\\u30ec", 3335 "\\u3061\\u3087\\u3053", 3336 "\\u3061\\u3088\\u3053", 3337 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8", 3338 "\\u3066\\u30fc\\u305f", 3339 "\\u30c6\\u30fc\\u30bf", 3340 "\\u30c6\\u30a7\\u30bf", 3341 "\\u3066\\u3048\\u305f", 3342 "\\u3067\\u30fc\\u305f", 3343 "\\u30c7\\u30fc\\u30bf", 3344 "\\u30c7\\u30a7\\u30bf", 3345 "\\u3067\\u3048\\u305f", 3346 "\\u3066\\u30fc\\u305f\\u30fc", 3347 "\\u30c6\\u30fc\\u30bf\\u30a1", 3348 "\\u30c6\\u30a7\\u30bf\\u30fc", 3349 "\\u3066\\u3047\\u305f\\u3041", 3350 "\\u3066\\u3048\\u305f\\u30fc", 3351 "\\u3067\\u30fc\\u305f\\u30fc", 3352 "\\u30c7\\u30fc\\u30bf\\u30a1", 3353 "\\u3067\\u30a7\\u305f\\u30a1", 3354 "\\u30c7\\u3047\\u30bf\\u3041", 3355 "\\u30c7\\u30a8\\u30bf\\u30a2", 3356 "\\u3072\\u3086", 3357 "\\u3073\\u3085\\u3042", 3358 "\\u3074\\u3085\\u3042", 3359 "\\u3073\\u3085\\u3042\\u30fc", 3360 "\\u30d3\\u30e5\\u30a2\\u30fc", 3361 "\\u3074\\u3085\\u3042\\u30fc", 3362 "\\u30d4\\u30e5\\u30a2\\u30fc", 3363 "\\u30d2\\u30e5\\u30a6", 3364 "\\u30d2\\u30e6\\u30a6", 3365 "\\u30d4\\u30e5\\u30a6\\u30a2", 3366 "\\u3073\\u3085\\u30fc\\u3042\\u30fc", 3367 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc", 3368 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc", 3369 "\\u3072\\u3085\\u3093", 3370 "\\u3074\\u3085\\u3093", 3371 "\\u3075\\u30fc\\u308a", 3372 "\\u30d5\\u30fc\\u30ea", 3373 "\\u3075\\u3045\\u308a", 3374 "\\u3075\\u30a5\\u308a", 3375 "\\u3075\\u30a5\\u30ea", 3376 "\\u30d5\\u30a6\\u30ea", 3377 "\\u3076\\u30fc\\u308a", 3378 "\\u30d6\\u30fc\\u30ea", 3379 "\\u3076\\u3045\\u308a", 3380 "\\u30d6\\u30a5\\u308a", 3381 "\\u3077\\u3046\\u308a", 3382 "\\u30d7\\u30a6\\u30ea", 3383 "\\u3075\\u30fc\\u308a\\u30fc", 3384 "\\u30d5\\u30a5\\u30ea\\u30fc", 3385 "\\u3075\\u30a5\\u308a\\u30a3", 3386 "\\u30d5\\u3045\\u308a\\u3043", 3387 "\\u30d5\\u30a6\\u30ea\\u30fc", 3388 "\\u3075\\u3046\\u308a\\u3043", 3389 "\\u30d6\\u30a6\\u30ea\\u30a4", 3390 "\\u3077\\u30fc\\u308a\\u30fc", 3391 "\\u3077\\u30a5\\u308a\\u30a4", 3392 "\\u3077\\u3046\\u308a\\u30fc", 3393 "\\u30d7\\u30a6\\u30ea\\u30a4", 3394 "\\u30d5\\u30fd", 3395 "\\u3075\\u309e", 3396 "\\u3076\\u309d", 3397 "\\u3076\\u3075", 3398 "\\u3076\\u30d5", 3399 "\\u30d6\\u3075", 3400 "\\u30d6\\u30d5", 3401 "\\u3076\\u309e", 3402 "\\u3076\\u3077", 3403 "\\u30d6\\u3077", 3404 "\\u3077\\u309d", 3405 "\\u30d7\\u30fd", 3406 "\\u3077\\u3075", 3407 }; 3408 3409 static const char *test2[] = { 3410 "\\u306f\\u309d", /* H\\u309d */ 3411 "\\u30cf\\u30fd", /* K\\u30fd */ 3412 "\\u306f\\u306f", /* HH */ 3413 "\\u306f\\u30cf", /* HK */ 3414 "\\u30cf\\u30cf", /* KK */ 3415 "\\u306f\\u309e", /* H\\u309e */ 3416 "\\u30cf\\u30fe", /* K\\u30fe */ 3417 "\\u306f\\u3070", /* HH\\u309b */ 3418 "\\u30cf\\u30d0", /* KK\\u309b */ 3419 "\\u306f\\u3071", /* HH\\u309c */ 3420 "\\u30cf\\u3071", /* KH\\u309c */ 3421 "\\u30cf\\u30d1", /* KK\\u309c */ 3422 "\\u3070\\u309d", /* H\\u309b\\u309d */ 3423 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */ 3424 "\\u3070\\u306f", /* H\\u309bH */ 3425 "\\u30d0\\u30cf", /* K\\u309bK */ 3426 "\\u3070\\u309e", /* H\\u309b\\u309e */ 3427 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */ 3428 "\\u3070\\u3070", /* H\\u309bH\\u309b */ 3429 "\\u30d0\\u3070", /* K\\u309bH\\u309b */ 3430 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */ 3431 "\\u3070\\u3071", /* H\\u309bH\\u309c */ 3432 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */ 3433 "\\u3071\\u309d", /* H\\u309c\\u309d */ 3434 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */ 3435 "\\u3071\\u306f", /* H\\u309cH */ 3436 "\\u30d1\\u30cf", /* K\\u309cK */ 3437 "\\u3071\\u3070", /* H\\u309cH\\u309b */ 3438 "\\u3071\\u30d0", /* H\\u309cK\\u309b */ 3439 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */ 3440 "\\u3071\\u3071", /* H\\u309cH\\u309c */ 3441 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */ 3442 }; 3443 /* 3444 static const char *test3[] = { 3445 "\\u221er\\u221e", 3446 "\\u221eR#", 3447 "\\u221et\\u221e", 3448 "#r\\u221e", 3449 "#R#", 3450 "#t%", 3451 "#T%", 3452 "8t\\u221e", 3453 "8T\\u221e", 3454 "8t#", 3455 "8T#", 3456 "8t%", 3457 "8T%", 3458 "8t8", 3459 "8T8", 3460 "\\u03c9r\\u221e", 3461 "\\u03a9R%", 3462 "rr\\u221e", 3463 "rR\\u221e", 3464 "Rr\\u221e", 3465 "RR\\u221e", 3466 "RT%", 3467 "rt8", 3468 "tr\\u221e", 3469 "tr8", 3470 "TR8", 3471 "tt8", 3472 "\\u30b7\\u30e3\\u30fc\\u30ec", 3473 }; 3474 */ 3475 static const UColAttribute att[] = { UCOL_STRENGTH }; 3476 static const UColAttributeValue val[] = { UCOL_QUATERNARY }; 3477 3478 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING}; 3479 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED }; 3480 3481 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1); 3482 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1); 3483 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/ 3484 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2); 3485 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2); 3486 } 3487 3488 static void TestStrCollIdenticalPrefix(void) { 3489 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71"; 3490 const char* test[] = { 3491 "ab\\ud9b0\\udc70", 3492 "ab\\ud9b0\\udc71" 3493 }; 3494 genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL); 3495 } 3496 /* Contractions should have all their canonically equivalent */ 3497 /* strings included */ 3498 static void TestContractionClosure(void) { 3499 static const struct { 3500 const char *rules; 3501 const char *data[10]; 3502 const uint32_t len; 3503 } tests[] = { 3504 { "&b=\\u00e4\\u00e4", 3505 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5}, 3506 { "&b=\\u00C5", 3507 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4}, 3508 }; 3509 uint32_t i; 3510 3511 3512 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3513 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL); 3514 } 3515 } 3516 3517 /* This tests also fails*/ 3518 static void TestBeforePrefixFailure(void) { 3519 static const struct { 3520 const char *rules; 3521 const char *data[10]; 3522 const uint32_t len; 3523 } tests[] = { 3524 { "&g <<< a" 3525 "&[before 3]\\uff41 <<< x", 3526 {"x", "\\uff41"}, 2 }, 3527 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 3528 "&\\u30A8=\\u30A8=\\u3048=\\uff74" 3529 "&[before 3]\\u30a7<<<\\u30a9", 3530 {"\\u30a9", "\\u30a7"}, 2 }, 3531 { "&[before 3]\\u30a7<<<\\u30a9" 3532 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 3533 "&\\u30A8=\\u30A8=\\u3048=\\uff74", 3534 {"\\u30a9", "\\u30a7"}, 2 }, 3535 }; 3536 uint32_t i; 3537 3538 3539 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3540 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 3541 } 3542 3543 #if 0 3544 const char* rule1 = 3545 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 3546 "&\\u30A8=\\u30A8=\\u3048=\\uff74" 3547 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"; 3548 const char* rule2 = 3549 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc" 3550 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" 3551 "&\\u30A8=\\u30A8=\\u3048=\\uff74"; 3552 const char* test[] = { 3553 "\\u30c6\\u30fc\\u30bf", 3554 "\\u30c6\\u30a7\\u30bf", 3555 }; 3556 genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0])); 3557 genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0])); 3558 /* this piece of code should be in some sort of verbose mode */ 3559 /* it gets the collation elements for elements and prints them */ 3560 /* This is useful when trying to see whether the problem is */ 3561 { 3562 UErrorCode status = U_ZERO_ERROR; 3563 uint32_t i = 0; 3564 UCollationElements *it = NULL; 3565 uint32_t CE; 3566 UChar string[256]; 3567 uint32_t uStringLen; 3568 UCollator *coll = NULL; 3569 3570 uStringLen = u_unescape(rule1, string, 256); 3571 3572 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 3573 3574 /*coll = ucol_open("ja_JP_JIS", &status);*/ 3575 it = ucol_openElements(coll, string, 0, &status); 3576 3577 for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) { 3578 log_verbose("%s\n", test[i]); 3579 uStringLen = u_unescape(test[i], string, 256); 3580 ucol_setText(it, string, uStringLen, &status); 3581 3582 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) { 3583 log_verbose("%08X\n", CE); 3584 } 3585 log_verbose("\n"); 3586 3587 } 3588 3589 ucol_closeElements(it); 3590 ucol_close(coll); 3591 } 3592 #endif 3593 } 3594 3595 static void TestPrefixCompose(void) { 3596 const char* rule1 = 3597 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc"; 3598 /* 3599 const char* test[] = { 3600 "\\u30c6\\u30fc\\u30bf", 3601 "\\u30c6\\u30a7\\u30bf", 3602 }; 3603 */ 3604 { 3605 UErrorCode status = U_ZERO_ERROR; 3606 /*uint32_t i = 0;*/ 3607 /*UCollationElements *it = NULL;*/ 3608 /* uint32_t CE;*/ 3609 UChar string[256]; 3610 uint32_t uStringLen; 3611 UCollator *coll = NULL; 3612 3613 uStringLen = u_unescape(rule1, string, 256); 3614 3615 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 3616 ucol_close(coll); 3617 } 3618 3619 3620 } 3621 3622 /* 3623 [last variable] last variable value 3624 [last primary ignorable] largest CE for primary ignorable 3625 [last secondary ignorable] largest CE for secondary ignorable 3626 [last tertiary ignorable] largest CE for tertiary ignorable 3627 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8) 3628 */ 3629 3630 static void TestRuleOptions(void) { 3631 /* values here are hardcoded and are correct for the current UCA 3632 * when the UCA changes, one might be forced to change these 3633 * values. 3634 */ 3635 3636 /* 3637 * These strings contain the last character before [variable top] 3638 * and the first and second characters (by primary weights) after it. 3639 * See FractionalUCA.txt. For example: 3640 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR 3641 [variable top = 0C FE] 3642 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT 3643 and 3644 00B4; [0D 0C, 05, 05] 3645 * 3646 * Note: Starting with UCA 6.0, the [variable top] collation element 3647 * is not the weight of any character or string, 3648 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable]. 3649 */ 3650 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F" 3651 #define FIRST_REGULAR_CHAR_STRING "\\u0060" 3652 #define SECOND_REGULAR_CHAR_STRING "\\u00B4" 3653 3654 /* 3655 * This string has to match the character that has the [last regular] weight 3656 * which changes with each UCA version. 3657 * See the bottom of FractionalUCA.txt which says something like 3658 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032 3659 * 3660 * Note: Starting with UCA 6.0, the [last regular] collation element 3661 * is not the weight of any character or string, 3662 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular]. 3663 */ 3664 #define LAST_REGULAR_CHAR_STRING "\\U0001342E" 3665 3666 static const struct { 3667 const char *rules; 3668 const char *data[10]; 3669 const uint32_t len; 3670 } tests[] = { 3671 /* - all befores here amount to zero */ 3672 { "&[before 3][first tertiary ignorable]<<<a", 3673 { "\\u0000", "a"}, 2 3674 }, /* you cannot go before first tertiary ignorable */ 3675 3676 { "&[before 3][last tertiary ignorable]<<<a", 3677 { "\\u0000", "a"}, 2 3678 }, /* you cannot go before last tertiary ignorable */ 3679 3680 { "&[before 3][first secondary ignorable]<<<a", 3681 { "\\u0000", "a"}, 2 3682 }, /* you cannot go before first secondary ignorable */ 3683 3684 { "&[before 3][last secondary ignorable]<<<a", 3685 { "\\u0000", "a"}, 2 3686 }, /* you cannot go before first secondary ignorable */ 3687 3688 /* 'normal' befores */ 3689 3690 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a", 3691 { "c", "b", "\\u0332", "a" }, 4 3692 }, 3693 3694 /* we don't have a code point that corresponds to 3695 * the last primary ignorable 3696 */ 3697 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a", 3698 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5 3699 }, 3700 3701 { "&[before 3][first variable]<<<c<<<b &[first variable]<a", 3702 { "c", "b", "\\u0009", "a", "\\u000a" }, 5 3703 }, 3704 3705 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ", 3706 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5 3707 }, 3708 3709 { "&[first regular]<a" 3710 "&[before 1][first regular]<b", 3711 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4 3712 }, 3713 3714 { "&[before 1][last regular]<b" 3715 "&[last regular]<a", 3716 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4 3717 }, 3718 3719 { "&[before 1][first implicit]<b" 3720 "&[first implicit]<a", 3721 { "b", "\\u4e00", "a", "\\u4e01"}, 4 3722 }, 3723 3724 { "&[before 1][last implicit]<b" 3725 "&[last implicit]<a", 3726 { "b", "\\U0010FFFD", "a" }, 3 3727 }, 3728 3729 { "&[last variable]<z" 3730 "&[last primary ignorable]<x" 3731 "&[last secondary ignorable]<<y" 3732 "&[last tertiary ignorable]<<<w" 3733 "&[top]<u", 3734 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7 3735 } 3736 3737 }; 3738 uint32_t i; 3739 3740 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3741 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 3742 } 3743 } 3744 3745 3746 static void TestOptimize(void) { 3747 /* this is not really a test - just trying out 3748 * whether copying of UCA contents will fail 3749 * Cannot really test, since the functionality 3750 * remains the same. 3751 */ 3752 static const struct { 3753 const char *rules; 3754 const char *data[10]; 3755 const uint32_t len; 3756 } tests[] = { 3757 /* - all befores here amount to zero */ 3758 { "[optimize [\\uAC00-\\uD7FF]]", 3759 { "a", "b"}, 2} 3760 }; 3761 uint32_t i; 3762 3763 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { 3764 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); 3765 } 3766 } 3767 3768 /* 3769 cycheng (at) ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator. 3770 weiv ucol_strcollIter? 3771 cycheng (at) ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021 3772 weiv these are the input strings? 3773 cycheng (at) ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2 3774 weiv will check - could be a problem with utf-8 iterator 3775 cycheng (at) ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2 3776 weiv hmmm 3777 cycheng (at) ca.ibm.c... note that we have a standalone high surrogate 3778 weiv that doesn't sound right 3779 cycheng (at) ca.ibm.c... we got the same inconsistent results on AIX and Win2000 3780 weiv so you have two strings, you convert them to utf-8 and to utf-16BE 3781 cycheng (at) ca.ibm.c... yes 3782 weiv and then do the comparison 3783 cycheng (at) ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be 3784 weiv utf-16 strings look like a little endian ones in the example you sent me 3785 weiv It could be a bug - let me try to test it out 3786 cycheng (at) ca.ibm.c... ok 3787 cycheng (at) ca.ibm.c... we can wait till the conf. call 3788 cycheng (at) ca.ibm.c... next weke 3789 weiv that would be great 3790 weiv hmmm 3791 weiv I might be wrong 3792 weiv let me play with it some more 3793 cycheng (at) ca.ibm.c... ok 3794 cycheng (at) ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be 3795 cycheng (at) ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2 3796 cycheng (at) ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be 3797 weiv ok 3798 cycheng (at) ca.ibm.c... i ask sherman to send you more inconsistent data 3799 weiv thanks 3800 cycheng (at) ca.ibm.c... the 4 strings we sent are just samples 3801 */ 3802 #if 0 3803 static void Alexis(void) { 3804 UErrorCode status = U_ZERO_ERROR; 3805 UCollator *coll = ucol_open("", &status); 3806 3807 3808 const char utf16be[2][4] = { 3809 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 }, 3810 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 } 3811 }; 3812 3813 const char utf8[2][4] = { 3814 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 }, 3815 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 }, 3816 }; 3817 3818 UCharIterator iterU161, iterU162; 3819 UCharIterator iterU81, iterU82; 3820 3821 UCollationResult resU16, resU8; 3822 3823 uiter_setUTF16BE(&iterU161, utf16be[0], 4); 3824 uiter_setUTF16BE(&iterU162, utf16be[1], 4); 3825 3826 uiter_setUTF8(&iterU81, utf8[0], 4); 3827 uiter_setUTF8(&iterU82, utf8[1], 4); 3828 3829 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 3830 3831 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status); 3832 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status); 3833 3834 3835 if(resU16 != resU8) { 3836 log_err("different results\n"); 3837 } 3838 3839 ucol_close(coll); 3840 } 3841 #endif 3842 3843 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256 3844 static void Alexis2(void) { 3845 UErrorCode status = U_ZERO_ERROR; 3846 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE]; 3847 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE]; 3848 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE]; 3849 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0; 3850 3851 UConverter *conv = NULL; 3852 3853 UCharIterator U16BEItS, U16BEItT; 3854 UCharIterator U8ItS, U8ItT; 3855 3856 UCollationResult resU16, resU16BE, resU8; 3857 3858 static const char* const pairs[][2] = { 3859 { "\\ud800\\u0021", "\\uFFFC\\u0062"}, 3860 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" }, 3861 { "\\u0E40\\u0021", "\\u00A1\\u0021"}, 3862 { "\\u0E40\\u0021", "\\uFE57\\u0062"}, 3863 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"}, 3864 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"}, 3865 { "\\u0020", "\\u0020\\u0000"} 3866 /* 3867 5F20 (my result here) 3868 5F204E008E3F 3869 5F20 (your result here) 3870 */ 3871 }; 3872 3873 int32_t i = 0; 3874 3875 UCollator *coll = ucol_open("", &status); 3876 if(status == U_FILE_ACCESS_ERROR) { 3877 log_data_err("Is your data around?\n"); 3878 return; 3879 } else if(U_FAILURE(status)) { 3880 log_err("Error opening collator\n"); 3881 return; 3882 } 3883 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 3884 conv = ucnv_open("UTF16BE", &status); 3885 for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) { 3886 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE); 3887 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE); 3888 3889 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT); 3890 3891 log_verbose("Result of strcoll is %i\n", resU16); 3892 3893 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status); 3894 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status); 3895 3896 /* use the original sizes, as the result from converter is in bytes */ 3897 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS); 3898 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT); 3899 3900 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status); 3901 3902 log_verbose("Result of U16BE is %i\n", resU16BE); 3903 3904 if(resU16 != resU16BE) { 3905 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]); 3906 } 3907 3908 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status); 3909 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status); 3910 3911 uiter_setUTF8(&U8ItS, U8Source, U8LenS); 3912 uiter_setUTF8(&U8ItT, U8Target, U8LenT); 3913 3914 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status); 3915 3916 if(resU16 != resU8) { 3917 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]); 3918 } 3919 3920 } 3921 3922 ucol_close(coll); 3923 ucnv_close(conv); 3924 } 3925 3926 static void TestHebrewUCA(void) { 3927 UErrorCode status = U_ZERO_ERROR; 3928 static const char *first[] = { 3929 "d790d6b8d79cd795d6bcd7a9", 3930 "d790d79cd79ed7a7d799d799d7a1", 3931 "d790d6b4d79ed795d6bcd7a9", 3932 }; 3933 3934 char utf8String[3][256]; 3935 UChar utf16String[3][256]; 3936 3937 int32_t i = 0, j = 0; 3938 int32_t sizeUTF8[3]; 3939 int32_t sizeUTF16[3]; 3940 3941 UCollator *coll = ucol_open("", &status); 3942 if (U_FAILURE(status)) { 3943 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status)); 3944 return; 3945 } 3946 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/ 3947 3948 for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) { 3949 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status); 3950 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status); 3951 log_verbose("%i: "); 3952 for(j = 0; j < sizeUTF16[i]; j++) { 3953 /*log_verbose("\\u%04X", utf16String[i][j]);*/ 3954 log_verbose("%04X", utf16String[i][j]); 3955 } 3956 log_verbose("\n"); 3957 } 3958 for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) { 3959 for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) { 3960 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS); 3961 } 3962 } 3963 3964 ucol_close(coll); 3965 3966 } 3967 3968 static void TestPartialSortKeyTermination(void) { 3969 static const char* cases[] = { 3970 "\\u1234\\u1234\\udc00", 3971 "\\udc00\\ud800\\ud800" 3972 }; 3973 3974 int32_t i = sizeof(UCollator); 3975 3976 UErrorCode status = U_ZERO_ERROR; 3977 3978 UCollator *coll = ucol_open("", &status); 3979 3980 UCharIterator iter; 3981 3982 UChar currCase[256]; 3983 int32_t length = 0; 3984 int32_t pKeyLen = 0; 3985 3986 uint8_t key[256]; 3987 3988 for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) { 3989 uint32_t state[2] = {0, 0}; 3990 length = u_unescape(cases[i], currCase, 256); 3991 uiter_setString(&iter, currCase, length); 3992 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status); 3993 3994 log_verbose("Done\n"); 3995 3996 } 3997 ucol_close(coll); 3998 } 3999 4000 static void TestSettings(void) { 4001 static const char* cases[] = { 4002 "apple", 4003 "Apple" 4004 }; 4005 4006 static const char* locales[] = { 4007 "", 4008 "en" 4009 }; 4010 4011 UErrorCode status = U_ZERO_ERROR; 4012 4013 int32_t i = 0, j = 0; 4014 4015 UChar source[256], target[256]; 4016 int32_t sLen = 0, tLen = 0; 4017 4018 UCollator *collateObject = NULL; 4019 for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) { 4020 collateObject = ucol_open(locales[i], &status); 4021 ucol_setStrength(collateObject, UCOL_PRIMARY); 4022 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status); 4023 for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) { 4024 sLen = u_unescape(cases[j-1], source, 256); 4025 source[sLen] = 0; 4026 tLen = u_unescape(cases[j], target, 256); 4027 source[tLen] = 0; 4028 doTest(collateObject, source, target, UCOL_EQUAL); 4029 } 4030 ucol_close(collateObject); 4031 } 4032 } 4033 4034 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) { 4035 UErrorCode status = U_ZERO_ERROR; 4036 int32_t errorNo = 0; 4037 /*const UChar *sourceRules = NULL;*/ 4038 /*int32_t sourceRulesLen = 0;*/ 4039 UColAttributeValue french = UCOL_OFF; 4040 int32_t cloneSize = 0; 4041 4042 if(!ucol_equals(source, target)) { 4043 log_err("Same collators, different address not equal\n"); 4044 errorNo++; 4045 } 4046 ucol_close(target); 4047 if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) { 4048 /* currently, safeClone is implemented through getRules/openRules 4049 * so it is the same as the test below - I will comment that test out. 4050 */ 4051 /* real thing */ 4052 target = ucol_safeClone(source, NULL, &cloneSize, &status); 4053 if(U_FAILURE(status)) { 4054 log_err("Error creating clone\n"); 4055 errorNo++; 4056 return errorNo; 4057 } 4058 if(!ucol_equals(source, target)) { 4059 log_err("Collator different from it's clone\n"); 4060 errorNo++; 4061 } 4062 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status); 4063 if(french == UCOL_ON) { 4064 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status); 4065 } else { 4066 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status); 4067 } 4068 if(U_FAILURE(status)) { 4069 log_err("Error setting attributes\n"); 4070 errorNo++; 4071 return errorNo; 4072 } 4073 if(ucol_equals(source, target)) { 4074 log_err("Collators same even when options changed\n"); 4075 errorNo++; 4076 } 4077 ucol_close(target); 4078 /* commented out since safeClone uses exactly the same technique */ 4079 /* 4080 sourceRules = ucol_getRules(source, &sourceRulesLen); 4081 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 4082 if(U_FAILURE(status)) { 4083 log_err("Error instantiating target from rules\n"); 4084 errorNo++; 4085 return errorNo; 4086 } 4087 if(!ucol_equals(source, target)) { 4088 log_err("Collator different from collator that was created from the same rules\n"); 4089 errorNo++; 4090 } 4091 ucol_close(target); 4092 */ 4093 } 4094 return errorNo; 4095 } 4096 4097 4098 static void TestEquals(void) { 4099 /* ucol_equals is not currently a public API. There is a chance that it will become 4100 * something like this, but currently it is only used by RuleBasedCollator::operator== 4101 */ 4102 /* test whether the two collators instantiated from the same locale are equal */ 4103 UErrorCode status = U_ZERO_ERROR; 4104 UParseError parseError; 4105 int32_t noOfLoc = uloc_countAvailable(); 4106 const char *locName = NULL; 4107 UCollator *source = NULL, *target = NULL; 4108 int32_t i = 0; 4109 4110 const char* rules[] = { 4111 "&l < lj <<< Lj <<< LJ", 4112 "&n < nj <<< Nj <<< NJ", 4113 "&ae <<< \\u00e4", 4114 "&AE <<< \\u00c4" 4115 }; 4116 /* 4117 const char* badRules[] = { 4118 "&l <<< Lj", 4119 "&n < nj <<< nJ <<< NJ", 4120 "&a <<< \\u00e4", 4121 "&AE <<< \\u00c4 <<< x" 4122 }; 4123 */ 4124 4125 UChar sourceRules[1024], targetRules[1024]; 4126 int32_t sourceRulesSize = 0, targetRulesSize = 0; 4127 int32_t rulesSize = sizeof(rules)/sizeof(rules[0]); 4128 4129 for(i = 0; i < rulesSize; i++) { 4130 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize); 4131 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize); 4132 } 4133 4134 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 4135 if(status == U_FILE_ACCESS_ERROR) { 4136 log_data_err("Is your data around?\n"); 4137 return; 4138 } else if(U_FAILURE(status)) { 4139 log_err("Error opening collator\n"); 4140 return; 4141 } 4142 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 4143 if(!ucol_equals(source, target)) { 4144 log_err("Equivalent collators not equal!\n"); 4145 } 4146 ucol_close(source); 4147 ucol_close(target); 4148 4149 source = ucol_open("root", &status); 4150 target = ucol_open("root", &status); 4151 log_verbose("Testing root\n"); 4152 if(!ucol_equals(source, source)) { 4153 log_err("Same collator not equal\n"); 4154 } 4155 if(TestEqualsForCollator(locName, source, target)) { 4156 log_err("Errors for root\n", locName); 4157 } 4158 ucol_close(source); 4159 4160 for(i = 0; i<noOfLoc; i++) { 4161 status = U_ZERO_ERROR; 4162 locName = uloc_getAvailable(i); 4163 /*if(hasCollationElements(locName)) {*/ 4164 log_verbose("Testing equality for locale %s\n", locName); 4165 source = ucol_open(locName, &status); 4166 target = ucol_open(locName, &status); 4167 if (U_FAILURE(status)) { 4168 log_err("Error opening collator for locale %s %s\n", locName, u_errorName(status)); 4169 continue; 4170 } 4171 if(TestEqualsForCollator(locName, source, target)) { 4172 log_err("Errors for locale %s\n", locName); 4173 } 4174 ucol_close(source); 4175 /*}*/ 4176 } 4177 } 4178 4179 static void TestJ2726(void) { 4180 UChar a[2] = { 0x61, 0x00 }; /*"a"*/ 4181 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/ 4182 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/ 4183 UErrorCode status = U_ZERO_ERROR; 4184 UCollator *coll = ucol_open("en", &status); 4185 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 4186 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); 4187 doTest(coll, a, aSpace, UCOL_EQUAL); 4188 doTest(coll, aSpace, a, UCOL_EQUAL); 4189 doTest(coll, a, spaceA, UCOL_EQUAL); 4190 doTest(coll, spaceA, a, UCOL_EQUAL); 4191 doTest(coll, spaceA, aSpace, UCOL_EQUAL); 4192 doTest(coll, aSpace, spaceA, UCOL_EQUAL); 4193 ucol_close(coll); 4194 } 4195 4196 static void NullRule(void) { 4197 UChar r[3] = {0}; 4198 UErrorCode status = U_ZERO_ERROR; 4199 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 4200 if(U_SUCCESS(status)) { 4201 log_err("This should have been an error!\n"); 4202 ucol_close(coll); 4203 } else { 4204 status = U_ZERO_ERROR; 4205 } 4206 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); 4207 if(U_FAILURE(status)) { 4208 log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status)); 4209 } else { 4210 ucol_close(coll); 4211 } 4212 } 4213 4214 /** 4215 * Test for CollationElementIterator previous and next for the whole set of 4216 * unicode characters with normalization on. 4217 */ 4218 static void TestNumericCollation(void) 4219 { 4220 UErrorCode status = U_ZERO_ERROR; 4221 4222 const static char *basicTestStrings[]={ 4223 "hello1", 4224 "hello2", 4225 "hello2002", 4226 "hello2003", 4227 "hello123456", 4228 "hello1234567", 4229 "hello10000000", 4230 "hello100000000", 4231 "hello1000000000", 4232 "hello10000000000", 4233 }; 4234 4235 const static char *preZeroTestStrings[]={ 4236 "avery10000", 4237 "avery010000", 4238 "avery0010000", 4239 "avery00010000", 4240 "avery000010000", 4241 "avery0000010000", 4242 "avery00000010000", 4243 "avery000000010000", 4244 }; 4245 4246 const static char *thirtyTwoBitNumericStrings[]={ 4247 "avery42949672960", 4248 "avery42949672961", 4249 "avery42949672962", 4250 "avery429496729610" 4251 }; 4252 4253 const static char *longNumericStrings[]={ 4254 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings. 4255 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that 4256 are treated as multiple collation elements. */ 4257 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */ 4258 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */ 4259 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */ 4260 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */ 4261 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */ 4262 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */ 4263 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */ 4264 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */ 4265 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */ 4266 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */ 4267 }; 4268 4269 const static char *supplementaryDigits[] = { 4270 "\\uD835\\uDFCE", /* 0 */ 4271 "\\uD835\\uDFCF", /* 1 */ 4272 "\\uD835\\uDFD0", /* 2 */ 4273 "\\uD835\\uDFD1", /* 3 */ 4274 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */ 4275 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */ 4276 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */ 4277 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */ 4278 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */ 4279 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */ 4280 }; 4281 4282 const static char *foreignDigits[] = { 4283 "\\u0661", 4284 "\\u0662", 4285 "\\u0663", 4286 "\\u0661\\u0660", 4287 "\\u0661\\u0662", 4288 "\\u0661\\u0663", 4289 "\\u0662\\u0660", 4290 "\\u0662\\u0662", 4291 "\\u0662\\u0663", 4292 "\\u0663\\u0660", 4293 "\\u0663\\u0662", 4294 "\\u0663\\u0663" 4295 }; 4296 4297 const static char *evenZeroes[] = { 4298 "2000", 4299 "2001", 4300 "2002", 4301 "2003" 4302 }; 4303 4304 UColAttribute att = UCOL_NUMERIC_COLLATION; 4305 UColAttributeValue val = UCOL_ON; 4306 4307 /* Open our collator. */ 4308 UCollator* coll = ucol_open("root", &status); 4309 if (U_FAILURE(status)){ 4310 log_err_status(status, "ERROR: in using ucol_open() -> %s\n", 4311 myErrorName(status)); 4312 return; 4313 } 4314 genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1); 4315 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1); 4316 genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1); 4317 genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1); 4318 genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1); 4319 genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1); 4320 4321 /* Setting up our collator to do digits. */ 4322 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); 4323 if (U_FAILURE(status)){ 4324 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n", 4325 myErrorName(status)); 4326 return; 4327 } 4328 4329 /* 4330 Testing that prepended zeroes still yield the correct collation behavior. 4331 We expect that every element in our strings array will be equal. 4332 */ 4333 genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL); 4334 4335 ucol_close(coll); 4336 } 4337 4338 static void TestTibetanConformance(void) 4339 { 4340 const char* test[] = { 4341 "\\u0FB2\\u0591\\u0F71\\u0061", 4342 "\\u0FB2\\u0F71\\u0061" 4343 }; 4344 4345 UErrorCode status = U_ZERO_ERROR; 4346 UCollator *coll = ucol_open("", &status); 4347 UChar source[100]; 4348 UChar target[100]; 4349 int result; 4350 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 4351 if (U_SUCCESS(status)) { 4352 u_unescape(test[0], source, 100); 4353 u_unescape(test[1], target, 100); 4354 doTest(coll, source, target, UCOL_EQUAL); 4355 result = ucol_strcoll(coll, source, -1, target, -1); 4356 log_verbose("result %d\n", result); 4357 if (UCOL_EQUAL != result) { 4358 log_err("Tibetan comparison error\n"); 4359 } 4360 } 4361 ucol_close(coll); 4362 4363 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL); 4364 } 4365 4366 static void TestPinyinProblem(void) { 4367 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" }; 4368 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0])); 4369 } 4370 4371 #define TST_UCOL_MAX_INPUT 0x220001 4372 #define topByte 0xFF000000; 4373 #define bottomByte 0xFF; 4374 #define fourBytes 0xFFFFFFFF; 4375 4376 4377 static void showImplicit(UChar32 i) { 4378 if (i >= 0 && i <= TST_UCOL_MAX_INPUT) { 4379 log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i)); 4380 } 4381 } 4382 4383 static void TestImplicitGeneration(void) { 4384 UErrorCode status = U_ZERO_ERROR; 4385 UChar32 last = 0; 4386 UChar32 current; 4387 UChar32 i = 0, j = 0; 4388 UChar32 roundtrip = 0; 4389 UChar32 lastBottom = 0; 4390 UChar32 currentBottom = 0; 4391 UChar32 lastTop = 0; 4392 UChar32 currentTop = 0; 4393 4394 UCollator *coll = ucol_open("root", &status); 4395 if(U_FAILURE(status)) { 4396 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); 4397 return; 4398 } 4399 4400 uprv_uca_getRawFromImplicit(0xE20303E7); 4401 4402 for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) { 4403 current = uprv_uca_getImplicitFromRaw(i) & fourBytes; 4404 4405 /* check that it round-trips AND that all intervening ones are illegal*/ 4406 roundtrip = uprv_uca_getRawFromImplicit(current); 4407 if (roundtrip != i) { 4408 log_err("No roundtrip %08X\n", i); 4409 } 4410 if (last != 0) { 4411 for (j = last + 1; j < current; ++j) { 4412 roundtrip = uprv_uca_getRawFromImplicit(j); 4413 /* raise an error if it *doesn't* find an error*/ 4414 if (roundtrip != -1) { 4415 log_err("Fails to recognize illegal %08X\n", j); 4416 } 4417 } 4418 } 4419 /* now do other consistency checks*/ 4420 lastBottom = last & bottomByte; 4421 currentBottom = current & bottomByte; 4422 lastTop = last & topByte; 4423 currentTop = current & topByte; 4424 4425 /* print out some values for spot-checking*/ 4426 if (lastTop != currentTop || i == 0x10000 || i == 0x110000) { 4427 showImplicit(i-3); 4428 showImplicit(i-2); 4429 showImplicit(i-1); 4430 showImplicit(i); 4431 showImplicit(i+1); 4432 showImplicit(i+2); 4433 } 4434 last = current; 4435 4436 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) { 4437 log_err("No raw <-> code point roundtrip for 0x%08X\n", i); 4438 } 4439 } 4440 showImplicit(TST_UCOL_MAX_INPUT-2); 4441 showImplicit(TST_UCOL_MAX_INPUT-1); 4442 showImplicit(TST_UCOL_MAX_INPUT); 4443 ucol_close(coll); 4444 } 4445 4446 /** 4447 * Iterate through the given iterator, checking to see that all the strings 4448 * in the expected array are present. 4449 * @param expected array of strings we expect to see, or NULL 4450 * @param expectedCount number of elements of expected, or 0 4451 */ 4452 static int32_t checkUEnumeration(const char* msg, 4453 UEnumeration* iter, 4454 const char** expected, 4455 int32_t expectedCount) { 4456 UErrorCode ec = U_ZERO_ERROR; 4457 int32_t i = 0, n, j, bit; 4458 int32_t seenMask = 0; 4459 4460 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */ 4461 n = uenum_count(iter, &ec); 4462 if (!assertSuccess("count", &ec)) return -1; 4463 log_verbose("%s = [", msg); 4464 for (;; ++i) { 4465 const char* s = uenum_next(iter, NULL, &ec); 4466 if (!assertSuccess("snext", &ec) || s == NULL) break; 4467 if (i != 0) log_verbose(","); 4468 log_verbose("%s", s); 4469 /* check expected list */ 4470 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) { 4471 if ((seenMask&bit) == 0 && 4472 uprv_strcmp(s, expected[j]) == 0) { 4473 seenMask |= bit; 4474 break; 4475 } 4476 } 4477 } 4478 log_verbose("] (%d)\n", i); 4479 assertTrue("count verified", i==n); 4480 /* did we see all expected strings? */ 4481 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) { 4482 if ((seenMask&bit)!=0) { 4483 log_verbose("Ok: \"%s\" seen\n", expected[j]); 4484 } else { 4485 log_err("FAIL: \"%s\" not seen\n", expected[j]); 4486 } 4487 } 4488 return n; 4489 } 4490 4491 /** 4492 * Test new API added for separate collation tree. 4493 */ 4494 static void TestSeparateTrees(void) { 4495 UErrorCode ec = U_ZERO_ERROR; 4496 UEnumeration *e = NULL; 4497 int32_t n = -1; 4498 UBool isAvailable; 4499 char loc[256]; 4500 4501 static const char* AVAIL[] = { "en", "de" }; 4502 4503 static const char* KW[] = { "collation" }; 4504 4505 static const char* KWVAL[] = { "phonebook", "stroke" }; 4506 4507 #if !UCONFIG_NO_SERVICE 4508 e = ucol_openAvailableLocales(&ec); 4509 if (e != NULL) { 4510 assertSuccess("ucol_openAvailableLocales", &ec); 4511 assertTrue("ucol_openAvailableLocales!=0", e!=0); 4512 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL)); 4513 /* Don't need to check n because we check list */ 4514 uenum_close(e); 4515 } else { 4516 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec)); 4517 } 4518 #endif 4519 4520 e = ucol_getKeywords(&ec); 4521 if (e != NULL) { 4522 assertSuccess("ucol_getKeywords", &ec); 4523 assertTrue("ucol_getKeywords!=0", e!=0); 4524 n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW)); 4525 /* Don't need to check n because we check list */ 4526 uenum_close(e); 4527 } else { 4528 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec)); 4529 } 4530 4531 e = ucol_getKeywordValues(KW[0], &ec); 4532 if (e != NULL) { 4533 assertSuccess("ucol_getKeywordValues", &ec); 4534 assertTrue("ucol_getKeywordValues!=0", e!=0); 4535 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL)); 4536 /* Don't need to check n because we check list */ 4537 uenum_close(e); 4538 } else { 4539 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec)); 4540 } 4541 4542 /* Try setting a warning before calling ucol_getKeywordValues */ 4543 ec = U_USING_FALLBACK_WARNING; 4544 e = ucol_getKeywordValues(KW[0], &ec); 4545 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) { 4546 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0); 4547 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL)); 4548 /* Don't need to check n because we check list */ 4549 uenum_close(e); 4550 } 4551 4552 /* 4553 U_DRAFT int32_t U_EXPORT2 4554 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, 4555 const char* locale, UBool* isAvailable, 4556 UErrorCode* status); 4557 } 4558 */ 4559 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de", 4560 &isAvailable, &ec); 4561 if (assertSuccess("getFunctionalEquivalent", &ec)) { 4562 assertEquals("getFunctionalEquivalent(de)", "de", loc); 4563 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE", 4564 isAvailable == TRUE); 4565 } 4566 4567 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE", 4568 &isAvailable, &ec); 4569 if (assertSuccess("getFunctionalEquivalent", &ec)) { 4570 assertEquals("getFunctionalEquivalent(de_DE)", "de", loc); 4571 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE", 4572 isAvailable == TRUE); 4573 } 4574 } 4575 4576 /* supercedes TestJ784 */ 4577 static void TestBeforePinyin(void) { 4578 const static char rules[] = { 4579 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0" 4580 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8" 4581 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC" 4582 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2" 4583 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9" 4584 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC" 4585 }; 4586 4587 const static char *test[] = { 4588 "l\\u0101", 4589 "la", 4590 "l\\u0101n", 4591 "lan ", 4592 "l\\u0113", 4593 "le", 4594 "l\\u0113n", 4595 "len" 4596 }; 4597 4598 const static char *test2[] = { 4599 "x\\u0101", 4600 "x\\u0100", 4601 "X\\u0101", 4602 "X\\u0100", 4603 "x\\u00E1", 4604 "x\\u00C1", 4605 "X\\u00E1", 4606 "X\\u00C1", 4607 "x\\u01CE", 4608 "x\\u01CD", 4609 "X\\u01CE", 4610 "X\\u01CD", 4611 "x\\u00E0", 4612 "x\\u00C0", 4613 "X\\u00E0", 4614 "X\\u00C0", 4615 "xa", 4616 "xA", 4617 "Xa", 4618 "XA", 4619 "x\\u0101x", 4620 "x\\u0100x", 4621 "x\\u00E1x", 4622 "x\\u00C1x", 4623 "x\\u01CEx", 4624 "x\\u01CDx", 4625 "x\\u00E0x", 4626 "x\\u00C0x", 4627 "xax", 4628 "xAx" 4629 }; 4630 4631 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0])); 4632 genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0])); 4633 genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0])); 4634 genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0])); 4635 } 4636 4637 static void TestBeforeTightening(void) { 4638 static const struct { 4639 const char *rules; 4640 UErrorCode expectedStatus; 4641 } tests[] = { 4642 { "&[before 1]a<x", U_ZERO_ERROR }, 4643 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR }, 4644 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR }, 4645 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR }, 4646 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR }, 4647 { "&[before 2]a<<x",U_ZERO_ERROR }, 4648 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR }, 4649 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR }, 4650 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR }, 4651 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR }, 4652 { "&[before 3]a<<<x",U_ZERO_ERROR }, 4653 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR }, 4654 { "&[before I]a = x",U_INVALID_FORMAT_ERROR } 4655 }; 4656 4657 int32_t i = 0; 4658 4659 UErrorCode status = U_ZERO_ERROR; 4660 UChar rlz[RULE_BUFFER_LEN] = { 0 }; 4661 uint32_t rlen = 0; 4662 4663 UCollator *coll = NULL; 4664 4665 4666 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { 4667 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN); 4668 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); 4669 if(status != tests[i].expectedStatus) { 4670 log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n", 4671 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus)); 4672 } 4673 ucol_close(coll); 4674 status = U_ZERO_ERROR; 4675 } 4676 4677 } 4678 4679 /* 4680 &m < a 4681 &[before 1] a < x <<< X << q <<< Q < z 4682 assert: m <<< M < x <<< X << q <<< Q < z < a < n 4683 4684 &m < a 4685 &[before 2] a << x <<< X << q <<< Q < z 4686 assert: m <<< M < x <<< X << q <<< Q << a < z < n 4687 4688 &m < a 4689 &[before 3] a <<< x <<< X << q <<< Q < z 4690 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n 4691 4692 4693 &m << a 4694 &[before 1] a < x <<< X << q <<< Q < z 4695 assert: x <<< X << q <<< Q < z < m <<< M << a < n 4696 4697 &m << a 4698 &[before 2] a << x <<< X << q <<< Q < z 4699 assert: m <<< M << x <<< X << q <<< Q << a < z < n 4700 4701 &m << a 4702 &[before 3] a <<< x <<< X << q <<< Q < z 4703 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n 4704 4705 4706 &m <<< a 4707 &[before 1] a < x <<< X << q <<< Q < z 4708 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M 4709 4710 &m <<< a 4711 &[before 2] a << x <<< X << q <<< Q < z 4712 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n 4713 4714 &m <<< a 4715 &[before 3] a <<< x <<< X << q <<< Q < z 4716 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n 4717 4718 4719 &[before 1] s < x <<< X << q <<< Q < z 4720 assert: r <<< R < x <<< X << q <<< Q < z < s < n 4721 4722 &[before 2] s << x <<< X << q <<< Q < z 4723 assert: r <<< R < x <<< X << q <<< Q << s < z < n 4724 4725 &[before 3] s <<< x <<< X << q <<< Q < z 4726 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n 4727 4728 4729 &[before 1] \u24DC < x <<< X << q <<< Q < z 4730 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M 4731 4732 &[before 2] \u24DC << x <<< X << q <<< Q < z 4733 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n 4734 4735 &[before 3] \u24DC <<< x <<< X << q <<< Q < z 4736 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n 4737 */ 4738 4739 4740 #if 0 4741 /* requires features not yet supported */ 4742 static void TestMoreBefore(void) { 4743 static const struct { 4744 const char* rules; 4745 const char* order[16]; 4746 int32_t size; 4747 } tests[] = { 4748 { "&m < a &[before 1] a < x <<< X << q <<< Q < z", 4749 { "m","M","x","X","q","Q","z","a","n" }, 9}, 4750 { "&m < a &[before 2] a << x <<< X << q <<< Q < z", 4751 { "m","M","x","X","q","Q","a","z","n" }, 9}, 4752 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z", 4753 { "m","M","x","X","a","q","Q","z","n" }, 9}, 4754 { "&m << a &[before 1] a < x <<< X << q <<< Q < z", 4755 { "x","X","q","Q","z","m","M","a","n" }, 9}, 4756 { "&m << a &[before 2] a << x <<< X << q <<< Q < z", 4757 { "m","M","x","X","q","Q","a","z","n" }, 9}, 4758 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z", 4759 { "m","M","x","X","a","q","Q","z","n" }, 9}, 4760 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z", 4761 { "x","X","q","Q","z","n","m","a","M" }, 9}, 4762 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z", 4763 { "x","X","q","Q","m","a","M","z","n" }, 9}, 4764 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z", 4765 { "m","x","X","a","M","q","Q","z","n" }, 9}, 4766 { "&[before 1] s < x <<< X << q <<< Q < z", 4767 { "r","R","x","X","q","Q","z","s","n" }, 9}, 4768 { "&[before 2] s << x <<< X << q <<< Q < z", 4769 { "r","R","x","X","q","Q","s","z","n" }, 9}, 4770 { "&[before 3] s <<< x <<< X << q <<< Q < z", 4771 { "r","R","x","X","s","q","Q","z","n" }, 9}, 4772 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z", 4773 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9}, 4774 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z", 4775 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9}, 4776 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z", 4777 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9} 4778 }; 4779 4780 int32_t i = 0; 4781 4782 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { 4783 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size); 4784 } 4785 } 4786 #endif 4787 4788 static void TestTailorNULL( void ) { 4789 const static char* rule = "&a <<< '\\u0000'"; 4790 UErrorCode status = U_ZERO_ERROR; 4791 UChar rlz[RULE_BUFFER_LEN] = { 0 }; 4792 uint32_t rlen = 0; 4793 UChar a = 1, null = 0; 4794 UCollationResult res = UCOL_EQUAL; 4795 4796 UCollator *coll = NULL; 4797 4798 4799 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN); 4800 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); 4801 4802 if(U_FAILURE(status)) { 4803 log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status)); 4804 } else { 4805 res = ucol_strcoll(coll, &a, 1, &null, 1); 4806 4807 if(res != UCOL_LESS) { 4808 log_err("NULL was not tailored properly!\n"); 4809 } 4810 } 4811 4812 ucol_close(coll); 4813 } 4814 4815 static void 4816 TestUpperFirstQuaternary(void) 4817 { 4818 const char* tests[] = { "B", "b", "Bb", "bB" }; 4819 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST }; 4820 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST }; 4821 genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0])); 4822 } 4823 4824 static void 4825 TestJ4960(void) 4826 { 4827 const char* tests[] = { "\\u00e2T", "aT" }; 4828 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL }; 4829 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON }; 4830 const char* tests2[] = { "a", "A" }; 4831 const char* rule = "&[first tertiary ignorable]=A=a"; 4832 UColAttribute att2[] = { UCOL_CASE_LEVEL }; 4833 UColAttributeValue attVals2[] = { UCOL_ON }; 4834 /* Test whether we correctly ignore primary ignorables on case level when */ 4835 /* we have only primary & case level */ 4836 genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL); 4837 /* Test whether ICU4J will make case level for sortkeys that have primary strength */ 4838 /* and case level */ 4839 genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0])); 4840 /* Test whether completely ignorable letters have case level info (they shouldn't) */ 4841 genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL); 4842 } 4843 4844 static void 4845 TestJ5223(void) 4846 { 4847 static const char *test = "this is a test string"; 4848 UChar ustr[256]; 4849 int32_t ustr_length = u_unescape(test, ustr, 256); 4850 unsigned char sortkey[256]; 4851 int32_t sortkey_length; 4852 UErrorCode status = U_ZERO_ERROR; 4853 static UCollator *coll = NULL; 4854 coll = ucol_open("root", &status); 4855 if(U_FAILURE(status)) { 4856 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); 4857 return; 4858 } 4859 ucol_setStrength(coll, UCOL_PRIMARY); 4860 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); 4861 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 4862 if (U_FAILURE(status)) { 4863 log_err("Failed setting atributes\n"); 4864 return; 4865 } 4866 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0); 4867 if (sortkey_length > 256) return; 4868 4869 /* we mark the position where the null byte should be written in advance */ 4870 sortkey[sortkey_length-1] = 0xAA; 4871 4872 /* we set the buffer size one byte higher than needed */ 4873 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey, 4874 sortkey_length+1); 4875 4876 /* no error occurs (for me) */ 4877 if (sortkey[sortkey_length-1] == 0xAA) { 4878 log_err("Hit bug at first try\n"); 4879 } 4880 4881 /* we mark the position where the null byte should be written again */ 4882 sortkey[sortkey_length-1] = 0xAA; 4883 4884 /* this time we set the buffer size to the exact amount needed */ 4885 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey, 4886 sortkey_length); 4887 4888 /* now the trailing null byte is not written */ 4889 if (sortkey[sortkey_length-1] == 0xAA) { 4890 log_err("Hit bug at second try\n"); 4891 } 4892 4893 ucol_close(coll); 4894 } 4895 4896 /* Regression test for Thai partial sort key problem */ 4897 static void 4898 TestJ5232(void) 4899 { 4900 const static char *test[] = { 4901 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21", 4902 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21" 4903 }; 4904 4905 genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0])); 4906 } 4907 4908 static void 4909 TestJ5367(void) 4910 { 4911 const static char *test[] = { "a", "y" }; 4912 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a"; 4913 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0])); 4914 } 4915 4916 static void 4917 TestVI5913(void) 4918 { 4919 UErrorCode status = U_ZERO_ERROR; 4920 int32_t i, j; 4921 UCollator *coll =NULL; 4922 uint8_t resColl[100], expColl[100]; 4923 int32_t rLen, tLen, ruleLen, sLen, kLen; 4924 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypogegrammeni*/ 4925 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/ 4926 UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circumflex.*/ 4927 static const UChar tData[][20]={ 4928 {0x1EAC, 0}, 4929 {0x0041, 0x0323, 0x0302, 0}, 4930 {0x1EA0, 0x0302, 0}, 4931 {0x00C2, 0x0323, 0}, 4932 {0x1ED8, 0}, /* O with dot and circumflex */ 4933 {0x1ECC, 0x0302, 0}, 4934 {0x1EB7, 0}, 4935 {0x1EA1, 0x0306, 0}, 4936 }; 4937 static const UChar tailorData[][20]={ 4938 {0x1FA2, 0}, /* Omega with 3 combining marks */ 4939 {0x03C9, 0x0313, 0x0300, 0x0345, 0}, 4940 {0x1FF3, 0x0313, 0x0300, 0}, 4941 {0x1F60, 0x0300, 0x0345, 0}, 4942 {0x1F62, 0x0345, 0}, 4943 {0x1FA0, 0x0300, 0}, 4944 }; 4945 static const UChar tailorData2[][20]={ 4946 {0x1E63, 0x030C, 0}, /* s with dot below + caron */ 4947 {0x0073, 0x0323, 0x030C, 0}, 4948 {0x0073, 0x030C, 0x0323, 0}, 4949 }; 4950 static const UChar tailorData3[][20]={ 4951 {0x007a, 0}, /* z */ 4952 {0x0061, 0x0065, 0}, /* a + e */ 4953 {0x0061, 0x00ea, 0}, /* a + e with circumflex */ 4954 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */ 4955 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */ 4956 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */ 4957 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */ 4958 {0x00EA, 0}, /* e with circumflex */ 4959 }; 4960 4961 /* Test Vietnamese sort. */ 4962 coll = ucol_open("vi", &status); 4963 if(U_FAILURE(status)) { 4964 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status)); 4965 return; 4966 } 4967 log_verbose("\n\nVI collation:"); 4968 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) { 4969 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n"); 4970 } 4971 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) { 4972 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n"); 4973 } 4974 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) { 4975 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n"); 4976 } 4977 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) { 4978 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n"); 4979 } 4980 4981 for (j=0; j<8; j++) { 4982 tLen = u_strlen(tData[j]); 4983 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen); 4984 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100); 4985 for(i = 0; i<rLen; i++) { 4986 log_verbose(" %02X", resColl[i]); 4987 } 4988 } 4989 4990 ucol_close(coll); 4991 4992 /* Test Romanian sort. */ 4993 coll = ucol_open("ro", &status); 4994 log_verbose("\n\nRO collation:"); 4995 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) { 4996 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n"); 4997 } 4998 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) { 4999 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n"); 5000 } 5001 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) { 5002 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n"); 5003 } 5004 5005 for (j=4; j<8; j++) { 5006 tLen = u_strlen(tData[j]); 5007 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen); 5008 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100); 5009 for(i = 0; i<rLen; i++) { 5010 log_verbose(" %02X", resColl[i]); 5011 } 5012 } 5013 ucol_close(coll); 5014 5015 /* Test the precomposed Greek character with 3 combining marks. */ 5016 log_verbose("\n\nTailoring test: Greek character with 3 combining marks"); 5017 ruleLen = u_strlen(rule); 5018 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5019 if (U_FAILURE(status)) { 5020 log_err("ucol_openRules failed with %s\n", u_errorName(status)); 5021 return; 5022 } 5023 sLen = u_strlen(tailorData[0]); 5024 for (j=1; j<6; j++) { 5025 tLen = u_strlen(tailorData[j]); 5026 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) { 5027 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]); 5028 } 5029 } 5030 /* Test getSortKey. */ 5031 tLen = u_strlen(tailorData[0]); 5032 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100); 5033 for (j=0; j<6; j++) { 5034 tLen = u_strlen(tailorData[j]); 5035 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100); 5036 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) { 5037 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 5038 for(i = 0; i<rLen; i++) { 5039 log_err(" %02X", resColl[i]); 5040 } 5041 } 5042 } 5043 ucol_close(coll); 5044 5045 log_verbose("\n\nTailoring test for s with caron:"); 5046 ruleLen = u_strlen(rule2); 5047 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5048 tLen = u_strlen(tailorData2[0]); 5049 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100); 5050 for (j=1; j<3; j++) { 5051 tLen = u_strlen(tailorData2[j]); 5052 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100); 5053 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) { 5054 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 5055 for(i = 0; i<rLen; i++) { 5056 log_err(" %02X", resColl[i]); 5057 } 5058 } 5059 } 5060 ucol_close(coll); 5061 5062 log_verbose("\n\nTailoring test for &z< ae with circumflex:"); 5063 ruleLen = u_strlen(rule3); 5064 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5065 tLen = u_strlen(tailorData3[3]); 5066 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100); 5067 for (j=4; j<6; j++) { 5068 tLen = u_strlen(tailorData3[j]); 5069 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100); 5070 5071 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) { 5072 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 5073 for(i = 0; i<rLen; i++) { 5074 log_err(" %02X", resColl[i]); 5075 } 5076 } 5077 5078 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); 5079 for(i = 0; i<rLen; i++) { 5080 log_verbose(" %02X", resColl[i]); 5081 } 5082 } 5083 ucol_close(coll); 5084 } 5085 5086 static void 5087 TestTailor6179(void) 5088 { 5089 UErrorCode status = U_ZERO_ERROR; 5090 int32_t i; 5091 UCollator *coll =NULL; 5092 uint8_t resColl[100]; 5093 int32_t rLen, tLen, ruleLen; 5094 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */ 5095 static const UChar rule1[]={ 5096 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79, 5097 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20, 5098 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20, 5099 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0}; 5100 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */ 5101 static const UChar rule2[]={ 5102 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61, 5103 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C, 5104 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E, 5105 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C, 5106 0x3C,0x3C,0x20,0x62,0}; 5107 5108 static const UChar tData1[][4]={ 5109 {0x61, 0}, 5110 {0x62, 0}, 5111 { 0xFDD0,0x009E, 0} 5112 }; 5113 static const UChar tData2[][4]={ 5114 {0x61, 0}, 5115 {0x62, 0}, 5116 { 0xFDD0,0x009E, 0} 5117 }; 5118 5119 /* 5120 * These values from FractionalUCA.txt will change, 5121 * and need to be updated here. 5122 */ 5123 static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0}; 5124 static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0}; 5125 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0}; 5126 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0}; 5127 5128 /* Test [Last Primary ignorable] */ 5129 5130 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b\n"); 5131 ruleLen = u_strlen(rule1); 5132 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5133 if (U_FAILURE(status)) { 5134 log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status)); 5135 return; 5136 } 5137 tLen = u_strlen(tData1[0]); 5138 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100); 5139 if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) { 5140 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen); 5141 for(i = 0; i<rLen; i++) { 5142 log_err(" %02X", resColl[i]); 5143 } 5144 log_err("\n"); 5145 } 5146 tLen = u_strlen(tData1[1]); 5147 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100); 5148 if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) { 5149 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen); 5150 for(i = 0; i<rLen; i++) { 5151 log_err(" %02X", resColl[i]); 5152 } 5153 log_err("\n"); 5154 } 5155 ucol_close(coll); 5156 5157 5158 /* Test [Last Secondary ignorable] */ 5159 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n"); 5160 ruleLen = u_strlen(rule1); 5161 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5162 if (U_FAILURE(status)) { 5163 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status)); 5164 return; 5165 } 5166 tLen = u_strlen(tData2[0]); 5167 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100); 5168 if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) { 5169 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen); 5170 for(i = 0; i<rLen; i++) { 5171 log_err(" %02X", resColl[i]); 5172 } 5173 log_err("\n"); 5174 } 5175 if(isICUVersionAtLeast(51, 1, 0)) { /* TODO: debug & fix, see ticket #8982 */ 5176 tLen = u_strlen(tData2[1]); 5177 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100); 5178 if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) { 5179 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen); 5180 for(i = 0; i<rLen; i++) { 5181 log_err(" %02X", resColl[i]); 5182 } 5183 log_err("\n"); 5184 } 5185 } 5186 ucol_close(coll); 5187 } 5188 5189 static void 5190 TestUCAPrecontext(void) 5191 { 5192 UErrorCode status = U_ZERO_ERROR; 5193 int32_t i, j; 5194 UCollator *coll =NULL; 5195 uint8_t resColl[100], prevColl[100]; 5196 int32_t rLen, tLen, ruleLen; 5197 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */ 5198 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0}; 5199 /* & l middle-dot << a a is an expansion. */ 5200 5201 UChar tData1[][20]={ 5202 { 0xb7, 0}, /* standalone middle dot(0xb7) */ 5203 { 0x387, 0}, /* standalone middle dot(0x387) */ 5204 { 0x61, 0}, /* a */ 5205 { 0x6C, 0}, /* l */ 5206 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */ 5207 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */ 5208 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */ 5209 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */ 5210 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */ 5211 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */ 5212 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */ 5213 }; 5214 5215 log_verbose("\n\nEN collation:"); 5216 coll = ucol_open("en", &status); 5217 if (U_FAILURE(status)) { 5218 log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status)); 5219 return; 5220 } 5221 for (j=0; j<11; j++) { 5222 tLen = u_strlen(tData1[j]); 5223 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 5224 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 5225 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 5226 j, tData1[j]); 5227 } 5228 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 5229 for(i = 0; i<rLen; i++) { 5230 log_verbose(" %02X", resColl[i]); 5231 } 5232 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 5233 } 5234 ucol_close(coll); 5235 5236 5237 log_verbose("\n\nJA collation:"); 5238 coll = ucol_open("ja", &status); 5239 if (U_FAILURE(status)) { 5240 log_err("Tailoring test: &z <<a|- failed!"); 5241 return; 5242 } 5243 for (j=0; j<11; j++) { 5244 tLen = u_strlen(tData1[j]); 5245 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 5246 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 5247 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 5248 j, tData1[j]); 5249 } 5250 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 5251 for(i = 0; i<rLen; i++) { 5252 log_verbose(" %02X", resColl[i]); 5253 } 5254 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 5255 } 5256 ucol_close(coll); 5257 5258 5259 log_verbose("\n\nTailoring test: & middle dot < a "); 5260 ruleLen = u_strlen(rule1); 5261 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5262 if (U_FAILURE(status)) { 5263 log_err("Tailoring test: & middle dot < a failed!"); 5264 return; 5265 } 5266 for (j=0; j<11; j++) { 5267 tLen = u_strlen(tData1[j]); 5268 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 5269 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 5270 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 5271 j, tData1[j]); 5272 } 5273 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 5274 for(i = 0; i<rLen; i++) { 5275 log_verbose(" %02X", resColl[i]); 5276 } 5277 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 5278 } 5279 ucol_close(coll); 5280 5281 5282 log_verbose("\n\nTailoring test: & l middle-dot << a "); 5283 ruleLen = u_strlen(rule2); 5284 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); 5285 if (U_FAILURE(status)) { 5286 log_err("Tailoring test: & l middle-dot << a failed!"); 5287 return; 5288 } 5289 for (j=0; j<11; j++) { 5290 tLen = u_strlen(tData1[j]); 5291 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); 5292 if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) { 5293 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.", 5294 j, tData1[j]); 5295 } 5296 if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) { 5297 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.", 5298 j, tData1[j]); 5299 } 5300 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); 5301 for(i = 0; i<rLen; i++) { 5302 log_verbose(" %02X", resColl[i]); 5303 } 5304 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); 5305 } 5306 ucol_close(coll); 5307 } 5308 5309 static void 5310 TestOutOfBuffer5468(void) 5311 { 5312 static const char *test = "\\u4e00"; 5313 UChar ustr[256]; 5314 int32_t ustr_length = u_unescape(test, ustr, 256); 5315 unsigned char shortKeyBuf[1]; 5316 int32_t sortkey_length; 5317 UErrorCode status = U_ZERO_ERROR; 5318 static UCollator *coll = NULL; 5319 5320 coll = ucol_open("root", &status); 5321 if(U_FAILURE(status)) { 5322 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); 5323 return; 5324 } 5325 ucol_setStrength(coll, UCOL_PRIMARY); 5326 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); 5327 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 5328 if (U_FAILURE(status)) { 5329 log_err("Failed setting atributes\n"); 5330 return; 5331 } 5332 5333 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf)); 5334 if (sortkey_length != 4) { 5335 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length); 5336 } 5337 log_verbose("length of sortKey is %d", sortkey_length); 5338 ucol_close(coll); 5339 } 5340 5341 #define TSKC_DATA_SIZE 5 5342 #define TSKC_BUF_SIZE 50 5343 static void 5344 TestSortKeyConsistency(void) 5345 { 5346 UErrorCode icuRC = U_ZERO_ERROR; 5347 UCollator* ucol; 5348 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD}; 5349 5350 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE]; 5351 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE]; 5352 int32_t i, j, i2; 5353 5354 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC); 5355 if (U_FAILURE(icuRC)) 5356 { 5357 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC)); 5358 return; 5359 } 5360 5361 for (i = 0; i < TSKC_DATA_SIZE; i++) 5362 { 5363 UCharIterator uiter; 5364 uint32_t state[2] = { 0, 0 }; 5365 int32_t dataLen = i+1; 5366 for (j=0; j<TSKC_BUF_SIZE; j++) 5367 bufFull[i][j] = bufPart[i][j] = 0; 5368 5369 /* Full sort key */ 5370 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE); 5371 5372 /* Partial sort key */ 5373 uiter_setString(&uiter, data, dataLen); 5374 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC); 5375 if (U_FAILURE(icuRC)) 5376 { 5377 log_err("ucol_nextSortKeyPart failed\n"); 5378 ucol_close(ucol); 5379 return; 5380 } 5381 5382 for (i2=0; i2<i; i2++) 5383 { 5384 UBool fullMatch = TRUE; 5385 UBool partMatch = TRUE; 5386 for (j=0; j<TSKC_BUF_SIZE; j++) 5387 { 5388 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]); 5389 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]); 5390 } 5391 if (fullMatch != partMatch) { 5392 log_err(fullMatch ? "full key was consistent, but partial key changed\n" 5393 : "partial key was consistent, but full key changed\n"); 5394 ucol_close(ucol); 5395 return; 5396 } 5397 } 5398 } 5399 5400 /*=============================================*/ 5401 ucol_close(ucol); 5402 } 5403 5404 /* ticket: 6101 */ 5405 static void TestCroatianSortKey(void) { 5406 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3"; 5407 UErrorCode status = U_ZERO_ERROR; 5408 UCollator *ucol; 5409 UCharIterator iter; 5410 5411 static const UChar text[] = { 0x0044, 0xD81A }; 5412 5413 size_t length = sizeof(text)/sizeof(*text); 5414 5415 uint8_t textSortKey[32]; 5416 size_t lenSortKey = 32; 5417 size_t actualSortKeyLen; 5418 uint32_t uStateInfo[2] = { 0, 0 }; 5419 5420 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status); 5421 if (U_FAILURE(status)) { 5422 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status)); 5423 return; 5424 } 5425 5426 uiter_setString(&iter, text, length); 5427 5428 actualSortKeyLen = ucol_nextSortKeyPart( 5429 ucol, &iter, (uint32_t*)uStateInfo, 5430 textSortKey, lenSortKey, &status 5431 ); 5432 5433 if (actualSortKeyLen == lenSortKey) { 5434 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n"); 5435 } 5436 5437 ucol_close(ucol); 5438 } 5439 5440 /* ticket: 6140 */ 5441 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since 5442 * they are both Hiragana and Katakana 5443 */ 5444 #define SORTKEYLEN 50 5445 static void TestHiragana(void) { 5446 UErrorCode status = U_ZERO_ERROR; 5447 UCollator* ucol; 5448 UCollationResult strcollresult; 5449 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */ 5450 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 }; 5451 int32_t data1Len = sizeof(data1)/sizeof(*data1); 5452 int32_t data2Len = sizeof(data2)/sizeof(*data2); 5453 int32_t i, j; 5454 uint8_t sortKey1[SORTKEYLEN]; 5455 uint8_t sortKey2[SORTKEYLEN]; 5456 5457 UCharIterator uiter1; 5458 UCharIterator uiter2; 5459 uint32_t state1[2] = { 0, 0 }; 5460 uint32_t state2[2] = { 0, 0 }; 5461 int32_t keySize1; 5462 int32_t keySize2; 5463 5464 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL, 5465 &status); 5466 if (U_FAILURE(status)) { 5467 log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status)); 5468 return; 5469 } 5470 5471 /* Start of full sort keys */ 5472 /* Full sort key1 */ 5473 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN); 5474 /* Full sort key2 */ 5475 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN); 5476 if (keySize1 == keySize2) { 5477 for (i = 0; i < keySize1; i++) { 5478 if (sortKey1[i] != sortKey2[i]) { 5479 log_err("Full sort keys are different. Should be equal."); 5480 } 5481 } 5482 } else { 5483 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2); 5484 } 5485 /* End of full sort keys */ 5486 5487 /* Start of partial sort keys */ 5488 /* Partial sort key1 */ 5489 uiter_setString(&uiter1, data1, data1Len); 5490 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status); 5491 /* Partial sort key2 */ 5492 uiter_setString(&uiter2, data2, data2Len); 5493 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status); 5494 if (U_SUCCESS(status) && keySize1 == keySize2) { 5495 for (j = 0; j < keySize1; j++) { 5496 if (sortKey1[j] != sortKey2[j]) { 5497 log_err("Partial sort keys are different. Should be equal"); 5498 } 5499 } 5500 } else { 5501 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2); 5502 } 5503 /* End of partial sort keys */ 5504 5505 /* Start of strcoll */ 5506 /* Use ucol_strcoll() to determine ordering */ 5507 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len); 5508 if (strcollresult != UCOL_EQUAL) { 5509 log_err("Result from ucol_strcoll() should be UCOL_EQUAL."); 5510 } 5511 5512 ucol_close(ucol); 5513 } 5514 5515 /* Convenient struct for running collation tests */ 5516 typedef struct { 5517 const UChar source[MAX_TOKEN_LEN]; /* String on left */ 5518 const UChar target[MAX_TOKEN_LEN]; /* String on right */ 5519 UCollationResult result; /* -1, 0 or +1, depending on collation */ 5520 } OneTestCase; 5521 5522 /* 5523 * Utility function to test one collation test case. 5524 * @param testcases Array of test cases. 5525 * @param n_testcases Size of the array testcases. 5526 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats. 5527 * @param n_rules Size of the array str_rules. 5528 */ 5529 static void doTestOneTestCase(const OneTestCase testcases[], 5530 int n_testcases, 5531 const char* str_rules[], 5532 int n_rules) 5533 { 5534 int rule_no, testcase_no; 5535 UChar rule[500]; 5536 int32_t length = 0; 5537 UErrorCode status = U_ZERO_ERROR; 5538 UParseError parse_error; 5539 UCollator *myCollation; 5540 5541 for (rule_no = 0; rule_no < n_rules; ++rule_no) { 5542 5543 length = u_unescape(str_rules[rule_no], rule, 500); 5544 if (length == 0) { 5545 log_err("ERROR: The rule cannot be unescaped: %s\n"); 5546 return; 5547 } 5548 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status); 5549 if(U_FAILURE(status)){ 5550 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 5551 return; 5552 } 5553 log_verbose("Testing the <<* syntax\n"); 5554 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 5555 ucol_setStrength(myCollation, UCOL_TERTIARY); 5556 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) { 5557 doTest(myCollation, 5558 testcases[testcase_no].source, 5559 testcases[testcase_no].target, 5560 testcases[testcase_no].result 5561 ); 5562 } 5563 ucol_close(myCollation); 5564 } 5565 } 5566 5567 const static OneTestCase rangeTestcases[] = { 5568 { {0x0061}, {0x0062}, UCOL_LESS }, /* "a" < "b" */ 5569 { {0x0062}, {0x0063}, UCOL_LESS }, /* "b" < "c" */ 5570 { {0x0061}, {0x0063}, UCOL_LESS }, /* "a" < "c" */ 5571 5572 { {0x0062}, {0x006b}, UCOL_LESS }, /* "b" << "k" */ 5573 { {0x006b}, {0x006c}, UCOL_LESS }, /* "k" << "l" */ 5574 { {0x0062}, {0x006c}, UCOL_LESS }, /* "b" << "l" */ 5575 { {0x0061}, {0x006c}, UCOL_LESS }, /* "a" < "l" */ 5576 { {0x0061}, {0x006d}, UCOL_LESS }, /* "a" < "m" */ 5577 5578 { {0x0079}, {0x006d}, UCOL_LESS }, /* "y" < "f" */ 5579 { {0x0079}, {0x0067}, UCOL_LESS }, /* "y" < "g" */ 5580 { {0x0061}, {0x0068}, UCOL_LESS }, /* "y" < "h" */ 5581 { {0x0061}, {0x0065}, UCOL_LESS }, /* "g" < "e" */ 5582 5583 { {0x0061}, {0x0031}, UCOL_EQUAL }, /* "a" = "1" */ 5584 { {0x0061}, {0x0032}, UCOL_EQUAL }, /* "a" = "2" */ 5585 { {0x0061}, {0x0033}, UCOL_EQUAL }, /* "a" = "3" */ 5586 { {0x0061}, {0x0066}, UCOL_LESS }, /* "a" < "f" */ 5587 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS }, /* "la" < "123" */ 5588 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL }, /* "aaa" = "123" */ 5589 { {0x0062}, {0x007a}, UCOL_LESS }, /* "b" < "z" */ 5590 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS }, /* "azm" = "2yc" */ 5591 }; 5592 5593 static int nRangeTestcases = LEN(rangeTestcases); 5594 5595 const static OneTestCase rangeTestcasesSupplemental[] = { 5596 { {0xfffe}, {0xffff}, UCOL_LESS }, /* U+FFFE < U+FFFF */ 5597 { {0xffff}, {0xd800, 0xdc00}, UCOL_LESS }, /* U+FFFF < U+10000 */ 5598 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+10000 < U+10001 */ 5599 { {0xfffe}, {0xd800, 0xdc01}, UCOL_LESS }, /* U+FFFE < U+10001 */ 5600 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */ 5601 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+10000 < U+10001 */ 5602 { {0xfffe}, {0xd800, 0xdc02}, UCOL_LESS }, /* U+FFFE < U+10001 */ 5603 }; 5604 5605 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental); 5606 5607 const static OneTestCase rangeTestcasesQwerty[] = { 5608 { {0x0071}, {0x0077}, UCOL_LESS }, /* "q" < "w" */ 5609 { {0x0077}, {0x0065}, UCOL_LESS }, /* "w" < "e" */ 5610 5611 { {0x0079}, {0x0075}, UCOL_LESS }, /* "y" < "u" */ 5612 { {0x0071}, {0x0075}, UCOL_LESS }, /* "q" << "u" */ 5613 5614 { {0x0074}, {0x0069}, UCOL_LESS }, /* "t" << "i" */ 5615 { {0x006f}, {0x0070}, UCOL_LESS }, /* "o" << "p" */ 5616 5617 { {0x0079}, {0x0065}, UCOL_LESS }, /* "y" < "e" */ 5618 { {0x0069}, {0x0075}, UCOL_LESS }, /* "i" < "u" */ 5619 5620 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, 5621 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS }, /* "quest" < "were" */ 5622 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b}, 5623 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS }, /* "quack" < "quest" */ 5624 }; 5625 5626 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty); 5627 5628 static void TestSameStrengthList(void) 5629 { 5630 const char* strRules[] = { 5631 /* Normal */ 5632 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3", 5633 5634 /* Lists */ 5635 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123", 5636 }; 5637 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); 5638 } 5639 5640 static void TestSameStrengthListQuoted(void) 5641 { 5642 const char* strRules[] = { 5643 /* Lists with quoted characters */ 5644 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123", 5645 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123", 5646 5647 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033", 5648 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'", 5649 5650 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033", 5651 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'", 5652 }; 5653 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); 5654 } 5655 5656 static void TestSameStrengthListSupplemental(void) 5657 { 5658 const char* strRules[] = { 5659 "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002", 5660 "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02", 5661 "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002", 5662 "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02", 5663 }; 5664 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules)); 5665 } 5666 5667 static void TestSameStrengthListQwerty(void) 5668 { 5669 const char* strRules[] = { 5670 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */ 5671 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */ 5672 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064", 5673 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064", 5674 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064", 5675 5676 /* Quoted characters also will work if two quoted characters are not consecutive. */ 5677 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064", 5678 5679 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */ 5680 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/ 5681 5682 }; 5683 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules)); 5684 } 5685 5686 static void TestSameStrengthListQuotedQwerty(void) 5687 { 5688 const char* strRules[] = { 5689 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */ 5690 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */ 5691 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */ 5692 5693 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */ 5694 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */ 5695 }; 5696 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules)); 5697 } 5698 5699 static void TestSameStrengthListRanges(void) 5700 { 5701 const char* strRules[] = { 5702 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3", 5703 }; 5704 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); 5705 } 5706 5707 static void TestSameStrengthListSupplementalRanges(void) 5708 { 5709 const char* strRules[] = { 5710 "&\\ufffe<*\\uffff-\\U00010002", 5711 }; 5712 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules)); 5713 } 5714 5715 static void TestSpecialCharacters(void) 5716 { 5717 const char* strRules[] = { 5718 /* Normal */ 5719 "&';'<'+'<','<'-'<'&'<'*'", 5720 5721 /* List */ 5722 "&';'<*'+,-&*'", 5723 5724 /* Range */ 5725 "&';'<*'+'-'-&*'", 5726 }; 5727 5728 const static OneTestCase specialCharacterStrings[] = { 5729 { {0x003b}, {0x002b}, UCOL_LESS }, /* ; < + */ 5730 { {0x002b}, {0x002c}, UCOL_LESS }, /* + < , */ 5731 { {0x002c}, {0x002d}, UCOL_LESS }, /* , < - */ 5732 { {0x002d}, {0x0026}, UCOL_LESS }, /* - < & */ 5733 }; 5734 doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules)); 5735 } 5736 5737 static void TestPrivateUseCharacters(void) 5738 { 5739 const char* strRules[] = { 5740 /* Normal */ 5741 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'", 5742 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d", 5743 }; 5744 5745 const static OneTestCase privateUseCharacterStrings[] = { 5746 { {0x5ea7}, {0xe2d8}, UCOL_LESS }, 5747 { {0xe2d8}, {0xe2d9}, UCOL_LESS }, 5748 { {0xe2d9}, {0xe2da}, UCOL_LESS }, 5749 { {0xe2da}, {0xe2db}, UCOL_LESS }, 5750 { {0xe2db}, {0xe2dc}, UCOL_LESS }, 5751 { {0xe2dc}, {0x4e8d}, UCOL_LESS }, 5752 }; 5753 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 5754 } 5755 5756 static void TestPrivateUseCharactersInList(void) 5757 { 5758 const char* strRules[] = { 5759 /* List */ 5760 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'", 5761 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */ 5762 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d", 5763 }; 5764 5765 const static OneTestCase privateUseCharacterStrings[] = { 5766 { {0x5ea7}, {0xe2d8}, UCOL_LESS }, 5767 { {0xe2d8}, {0xe2d9}, UCOL_LESS }, 5768 { {0xe2d9}, {0xe2da}, UCOL_LESS }, 5769 { {0xe2da}, {0xe2db}, UCOL_LESS }, 5770 { {0xe2db}, {0xe2dc}, UCOL_LESS }, 5771 { {0xe2dc}, {0x4e8d}, UCOL_LESS }, 5772 }; 5773 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 5774 } 5775 5776 static void TestPrivateUseCharactersInRange(void) 5777 { 5778 const char* strRules[] = { 5779 /* Range */ 5780 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'", 5781 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d", 5782 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */ 5783 }; 5784 5785 const static OneTestCase privateUseCharacterStrings[] = { 5786 { {0x5ea7}, {0xe2d8}, UCOL_LESS }, 5787 { {0xe2d8}, {0xe2d9}, UCOL_LESS }, 5788 { {0xe2d9}, {0xe2da}, UCOL_LESS }, 5789 { {0xe2da}, {0xe2db}, UCOL_LESS }, 5790 { {0xe2db}, {0xe2dc}, UCOL_LESS }, 5791 { {0xe2dc}, {0x4e8d}, UCOL_LESS }, 5792 }; 5793 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 5794 } 5795 5796 static void TestInvalidListsAndRanges(void) 5797 { 5798 const char* invalidRules[] = { 5799 /* Range not in starred expression */ 5800 "&\\ufffe<\\uffff-\\U00010002", 5801 5802 /* Range without start */ 5803 "&a<*-c", 5804 5805 /* Range without end */ 5806 "&a<*b-", 5807 5808 /* More than one hyphen */ 5809 "&a<*b-g-l", 5810 5811 /* Range in the wrong order */ 5812 "&a<*k-b", 5813 5814 }; 5815 5816 UChar rule[500]; 5817 UErrorCode status = U_ZERO_ERROR; 5818 UParseError parse_error; 5819 int n_rules = LEN(invalidRules); 5820 int rule_no; 5821 int length; 5822 UCollator *myCollation; 5823 5824 for (rule_no = 0; rule_no < n_rules; ++rule_no) { 5825 5826 length = u_unescape(invalidRules[rule_no], rule, 500); 5827 if (length == 0) { 5828 log_err("ERROR: The rule cannot be unescaped: %s\n"); 5829 return; 5830 } 5831 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status); 5832 if(!U_FAILURE(status)){ 5833 log_err("ERROR: Could not cause a failure as expected: \n"); 5834 } 5835 status = U_ZERO_ERROR; 5836 } 5837 } 5838 5839 /* 5840 * This test ensures that characters placed before a character in a different script have the same lead byte 5841 * in their collation key before and after script reordering. 5842 */ 5843 static void TestBeforeRuleWithScriptReordering(void) 5844 { 5845 UParseError error; 5846 UErrorCode status = U_ZERO_ERROR; 5847 UCollator *myCollation; 5848 char srules[500] = "&[before 1]\\u03b1 < \\u0e01"; 5849 UChar rules[500]; 5850 uint32_t rulesLength = 0; 5851 int32_t reorderCodes[1] = {USCRIPT_GREEK}; 5852 UCollationResult collResult; 5853 5854 uint8_t baseKey[256]; 5855 uint32_t baseKeyLength; 5856 uint8_t beforeKey[256]; 5857 uint32_t beforeKeyLength; 5858 5859 UChar base[] = { 0x03b1 }; /* base */ 5860 int32_t baseLen = sizeof(base)/sizeof(*base); 5861 5862 UChar before[] = { 0x0e01 }; /* ko kai */ 5863 int32_t beforeLen = sizeof(before)/sizeof(*before); 5864 5865 /*UChar *data[] = { before, base }; 5866 genericRulesStarter(srules, data, 2);*/ 5867 5868 log_verbose("Testing the &[before 1] rule with [reorder grek]\n"); 5869 5870 5871 /* build collator */ 5872 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n"); 5873 5874 rulesLength = u_unescape(srules, rules, LEN(rules)); 5875 myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status); 5876 if(U_FAILURE(status)) { 5877 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 5878 return; 5879 } 5880 5881 /* check collation results - before rule applied but not script reordering */ 5882 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen); 5883 if (collResult != UCOL_GREATER) { 5884 log_err("Collation result not correct before script reordering = %d\n", collResult); 5885 } 5886 5887 /* check the lead byte of the collation keys before script reordering */ 5888 baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256); 5889 beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256); 5890 if (baseKey[0] != beforeKey[0]) { 5891 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]); 5892 } 5893 5894 /* reorder the scripts */ 5895 ucol_setReorderCodes(myCollation, reorderCodes, 1, &status); 5896 if(U_FAILURE(status)) { 5897 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status)); 5898 return; 5899 } 5900 5901 /* check collation results - before rule applied and after script reordering */ 5902 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen); 5903 if (collResult != UCOL_GREATER) { 5904 log_err("Collation result not correct after script reordering = %d\n", collResult); 5905 } 5906 5907 /* check the lead byte of the collation keys after script reordering */ 5908 ucol_getSortKey(myCollation, base, baseLen, baseKey, 256); 5909 ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256); 5910 if (baseKey[0] != beforeKey[0]) { 5911 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]); 5912 } 5913 5914 ucol_close(myCollation); 5915 } 5916 5917 /* 5918 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering. 5919 */ 5920 static void TestNonLeadBytesDuringCollationReordering(void) 5921 { 5922 UErrorCode status = U_ZERO_ERROR; 5923 UCollator *myCollation; 5924 int32_t reorderCodes[1] = {USCRIPT_GREEK}; 5925 5926 uint8_t baseKey[256]; 5927 uint32_t baseKeyLength; 5928 uint8_t reorderKey[256]; 5929 uint32_t reorderKeyLength; 5930 5931 UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 }; 5932 5933 uint32_t i; 5934 5935 5936 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n"); 5937 5938 /* build collator tertiary */ 5939 myCollation = ucol_open("", &status); 5940 ucol_setStrength(myCollation, UCOL_TERTIARY); 5941 if(U_FAILURE(status)) { 5942 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 5943 return; 5944 } 5945 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256); 5946 5947 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 5948 if(U_FAILURE(status)) { 5949 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 5950 return; 5951 } 5952 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256); 5953 5954 if (baseKeyLength != reorderKeyLength) { 5955 log_err("Key lengths not the same during reordering.\n"); 5956 return; 5957 } 5958 5959 for (i = 1; i < baseKeyLength; i++) { 5960 if (baseKey[i] != reorderKey[i]) { 5961 log_err("Collation key bytes not the same at position %d.\n", i); 5962 return; 5963 } 5964 } 5965 ucol_close(myCollation); 5966 5967 /* build collator quaternary */ 5968 myCollation = ucol_open("", &status); 5969 ucol_setStrength(myCollation, UCOL_QUATERNARY); 5970 if(U_FAILURE(status)) { 5971 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 5972 return; 5973 } 5974 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256); 5975 5976 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 5977 if(U_FAILURE(status)) { 5978 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 5979 return; 5980 } 5981 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256); 5982 5983 if (baseKeyLength != reorderKeyLength) { 5984 log_err("Key lengths not the same during reordering.\n"); 5985 return; 5986 } 5987 5988 for (i = 1; i < baseKeyLength; i++) { 5989 if (baseKey[i] != reorderKey[i]) { 5990 log_err("Collation key bytes not the same at position %d.\n", i); 5991 return; 5992 } 5993 } 5994 ucol_close(myCollation); 5995 } 5996 5997 /* 5998 * Test reordering API. 5999 */ 6000 static void TestReorderingAPI(void) 6001 { 6002 UErrorCode status = U_ZERO_ERROR; 6003 UCollator *myCollation; 6004 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; 6005 int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS}; 6006 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; 6007 UCollationResult collResult; 6008 int32_t retrievedReorderCodesLength; 6009 int32_t retrievedReorderCodes[10]; 6010 UChar greekString[] = { 0x03b1 }; 6011 UChar punctuationString[] = { 0x203e }; 6012 int loopIndex; 6013 6014 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n"); 6015 6016 /* build collator tertiary */ 6017 myCollation = ucol_open("", &status); 6018 ucol_setStrength(myCollation, UCOL_TERTIARY); 6019 if(U_FAILURE(status)) { 6020 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 6021 return; 6022 } 6023 6024 /* set the reorderding */ 6025 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 6026 if (U_FAILURE(status)) { 6027 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 6028 return; 6029 } 6030 6031 /* get the reordering */ 6032 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status); 6033 if (status != U_BUFFER_OVERFLOW_ERROR) { 6034 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status)); 6035 return; 6036 } 6037 status = U_ZERO_ERROR; 6038 if (retrievedReorderCodesLength != LEN(reorderCodes)) { 6039 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); 6040 return; 6041 } 6042 /* now let's really get it */ 6043 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status); 6044 if (U_FAILURE(status)) { 6045 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status)); 6046 return; 6047 } 6048 if (retrievedReorderCodesLength != LEN(reorderCodes)) { 6049 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); 6050 return; 6051 } 6052 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { 6053 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) { 6054 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex); 6055 return; 6056 } 6057 } 6058 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString)); 6059 if (collResult != UCOL_LESS) { 6060 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n"); 6061 return; 6062 } 6063 6064 /* clear the reordering */ 6065 ucol_setReorderCodes(myCollation, NULL, 0, &status); 6066 if (U_FAILURE(status)) { 6067 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status)); 6068 return; 6069 } 6070 6071 /* get the reordering again */ 6072 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status); 6073 if (retrievedReorderCodesLength != 0) { 6074 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0); 6075 return; 6076 } 6077 6078 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString)); 6079 if (collResult != UCOL_GREATER) { 6080 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n"); 6081 return; 6082 } 6083 6084 /* test for error condition on duplicate reorder codes */ 6085 ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status); 6086 if (!U_FAILURE(status)) { 6087 log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n"); 6088 return; 6089 } 6090 6091 status = U_ZERO_ERROR; 6092 /* test for reorder codes after a reset code */ 6093 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status); 6094 if (!U_FAILURE(status)) { 6095 log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n"); 6096 return; 6097 } 6098 6099 ucol_close(myCollation); 6100 } 6101 6102 /* 6103 * Test reordering API. 6104 */ 6105 static void TestReorderingAPIWithRuleCreatedCollator(void) 6106 { 6107 UErrorCode status = U_ZERO_ERROR; 6108 UCollator *myCollation; 6109 UChar rules[90]; 6110 int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK}; 6111 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; 6112 UCollationResult collResult; 6113 int32_t retrievedReorderCodesLength; 6114 int32_t retrievedReorderCodes[10]; 6115 UChar greekString[] = { 0x03b1 }; 6116 UChar punctuationString[] = { 0x203e }; 6117 UChar hanString[] = { 0x65E5, 0x672C }; 6118 int loopIndex; 6119 6120 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n"); 6121 6122 /* build collator from rules */ 6123 u_uastrcpy(rules, "[reorder Hani Grek]"); 6124 myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status); 6125 if(U_FAILURE(status)) { 6126 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 6127 return; 6128 } 6129 6130 /* get the reordering */ 6131 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status); 6132 if (U_FAILURE(status)) { 6133 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status)); 6134 return; 6135 } 6136 if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) { 6137 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes)); 6138 return; 6139 } 6140 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { 6141 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) { 6142 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex); 6143 return; 6144 } 6145 } 6146 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString)); 6147 if (collResult != UCOL_GREATER) { 6148 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n"); 6149 return; 6150 } 6151 6152 6153 /* set the reorderding */ 6154 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 6155 if (U_FAILURE(status)) { 6156 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 6157 return; 6158 } 6159 6160 /* get the reordering */ 6161 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status); 6162 if (status != U_BUFFER_OVERFLOW_ERROR) { 6163 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status)); 6164 return; 6165 } 6166 status = U_ZERO_ERROR; 6167 if (retrievedReorderCodesLength != LEN(reorderCodes)) { 6168 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); 6169 return; 6170 } 6171 /* now let's really get it */ 6172 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status); 6173 if (U_FAILURE(status)) { 6174 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status)); 6175 return; 6176 } 6177 if (retrievedReorderCodesLength != LEN(reorderCodes)) { 6178 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); 6179 return; 6180 } 6181 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { 6182 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) { 6183 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex); 6184 return; 6185 } 6186 } 6187 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString)); 6188 if (collResult != UCOL_LESS) { 6189 log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n"); 6190 return; 6191 } 6192 6193 /* clear the reordering */ 6194 ucol_setReorderCodes(myCollation, NULL, 0, &status); 6195 if (U_FAILURE(status)) { 6196 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status)); 6197 return; 6198 } 6199 6200 /* get the reordering again */ 6201 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status); 6202 if (retrievedReorderCodesLength != 0) { 6203 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0); 6204 return; 6205 } 6206 6207 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString)); 6208 if (collResult != UCOL_GREATER) { 6209 log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n"); 6210 return; 6211 } 6212 6213 ucol_close(myCollation); 6214 } 6215 6216 static int compareUScriptCodes(const void * a, const void * b) 6217 { 6218 return ( *(int32_t*)a - *(int32_t*)b ); 6219 } 6220 6221 static void TestEquivalentReorderingScripts(void) { 6222 UErrorCode status = U_ZERO_ERROR; 6223 int32_t equivalentScripts[50]; 6224 int32_t equivalentScriptsLength; 6225 int loopIndex; 6226 int32_t equivalentScriptsResult[] = { 6227 USCRIPT_BOPOMOFO, 6228 USCRIPT_LISU, 6229 USCRIPT_LYCIAN, 6230 USCRIPT_CARIAN, 6231 USCRIPT_LYDIAN, 6232 USCRIPT_YI, 6233 USCRIPT_OLD_ITALIC, 6234 USCRIPT_GOTHIC, 6235 USCRIPT_DESERET, 6236 USCRIPT_SHAVIAN, 6237 USCRIPT_OSMANYA, 6238 USCRIPT_LINEAR_B, 6239 USCRIPT_CYPRIOT, 6240 USCRIPT_OLD_SOUTH_ARABIAN, 6241 USCRIPT_AVESTAN, 6242 USCRIPT_IMPERIAL_ARAMAIC, 6243 USCRIPT_INSCRIPTIONAL_PARTHIAN, 6244 USCRIPT_INSCRIPTIONAL_PAHLAVI, 6245 USCRIPT_UGARITIC, 6246 USCRIPT_OLD_PERSIAN, 6247 USCRIPT_CUNEIFORM, 6248 USCRIPT_EGYPTIAN_HIEROGLYPHS, 6249 USCRIPT_PHONETIC_POLLARD, 6250 USCRIPT_SORA_SOMPENG, 6251 USCRIPT_MEROITIC_CURSIVE, 6252 USCRIPT_MEROITIC_HIEROGLYPHS 6253 }; 6254 6255 qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes); 6256 6257 /* UScript.GOTHIC */ 6258 equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status); 6259 if (U_FAILURE(status)) { 6260 log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status)); 6261 return; 6262 } 6263 /* 6264 fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); 6265 fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength); 6266 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { 6267 fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]); 6268 } 6269 */ 6270 if (equivalentScriptsLength != LEN(equivalentScriptsResult)) { 6271 log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength); 6272 return; 6273 } 6274 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { 6275 if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) { 6276 log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]); 6277 return; 6278 } 6279 } 6280 6281 /* UScript.SHAVIAN */ 6282 equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status); 6283 if (U_FAILURE(status)) { 6284 log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status)); 6285 return; 6286 } 6287 if (equivalentScriptsLength != LEN(equivalentScriptsResult)) { 6288 log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength); 6289 return; 6290 } 6291 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) { 6292 if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) { 6293 log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]); 6294 return; 6295 } 6296 } 6297 } 6298 6299 static void TestReorderingAcrossCloning(void) 6300 { 6301 UErrorCode status = U_ZERO_ERROR; 6302 UCollator *myCollation; 6303 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; 6304 UCollator *clonedCollation; 6305 int32_t bufferSize; 6306 int32_t retrievedReorderCodesLength; 6307 int32_t retrievedReorderCodes[10]; 6308 int loopIndex; 6309 6310 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n"); 6311 6312 /* build collator tertiary */ 6313 myCollation = ucol_open("", &status); 6314 ucol_setStrength(myCollation, UCOL_TERTIARY); 6315 if(U_FAILURE(status)) { 6316 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 6317 return; 6318 } 6319 6320 /* set the reorderding */ 6321 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); 6322 if (U_FAILURE(status)) { 6323 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status)); 6324 return; 6325 } 6326 6327 /* clone the collator */ 6328 clonedCollation = ucol_safeClone(myCollation, NULL, &bufferSize, &status); 6329 if (U_FAILURE(status)) { 6330 log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status)); 6331 return; 6332 } 6333 6334 /* get the reordering */ 6335 retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status); 6336 if (U_FAILURE(status)) { 6337 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status)); 6338 return; 6339 } 6340 if (retrievedReorderCodesLength != LEN(reorderCodes)) { 6341 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); 6342 return; 6343 } 6344 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { 6345 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) { 6346 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex); 6347 return; 6348 } 6349 } 6350 6351 /*uprv_free(buffer);*/ 6352 ucol_close(myCollation); 6353 ucol_close(clonedCollation); 6354 } 6355 6356 /* 6357 * Utility function to test one collation reordering test case set. 6358 * @param testcases Array of test cases. 6359 * @param n_testcases Size of the array testcases. 6360 * @param reorderTokens Array of reordering codes. 6361 * @param reorderTokensLen Size of the array reorderTokens. 6362 */ 6363 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen) 6364 { 6365 uint32_t testCaseNum; 6366 UErrorCode status = U_ZERO_ERROR; 6367 UCollator *myCollation; 6368 6369 myCollation = ucol_open("", &status); 6370 if (U_FAILURE(status)) { 6371 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 6372 return; 6373 } 6374 ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status); 6375 if(U_FAILURE(status)) { 6376 log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status)); 6377 return; 6378 } 6379 6380 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) { 6381 doTest(myCollation, 6382 testCases[testCaseNum].source, 6383 testCases[testCaseNum].target, 6384 testCases[testCaseNum].result 6385 ); 6386 } 6387 ucol_close(myCollation); 6388 } 6389 6390 static void TestGreekFirstReorder(void) 6391 { 6392 const char* strRules[] = { 6393 "[reorder Grek]" 6394 }; 6395 6396 const int32_t apiRules[] = { 6397 USCRIPT_GREEK 6398 }; 6399 6400 const static OneTestCase privateUseCharacterStrings[] = { 6401 { {0x0391}, {0x0391}, UCOL_EQUAL }, 6402 { {0x0041}, {0x0391}, UCOL_GREATER }, 6403 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER }, 6404 { {0x0060}, {0x0391}, UCOL_LESS }, 6405 { {0x0391}, {0xe2dc}, UCOL_LESS }, 6406 { {0x0391}, {0x0060}, UCOL_GREATER }, 6407 }; 6408 6409 /* Test rules creation */ 6410 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 6411 6412 /* Test collation reordering API */ 6413 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules)); 6414 } 6415 6416 static void TestGreekLastReorder(void) 6417 { 6418 const char* strRules[] = { 6419 "[reorder Zzzz Grek]" 6420 }; 6421 6422 const int32_t apiRules[] = { 6423 USCRIPT_UNKNOWN, USCRIPT_GREEK 6424 }; 6425 6426 const static OneTestCase privateUseCharacterStrings[] = { 6427 { {0x0391}, {0x0391}, UCOL_EQUAL }, 6428 { {0x0041}, {0x0391}, UCOL_LESS }, 6429 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS }, 6430 { {0x0060}, {0x0391}, UCOL_LESS }, 6431 { {0x0391}, {0xe2dc}, UCOL_GREATER }, 6432 }; 6433 6434 /* Test rules creation */ 6435 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 6436 6437 /* Test collation reordering API */ 6438 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules)); 6439 } 6440 6441 static void TestNonScriptReorder(void) 6442 { 6443 const char* strRules[] = { 6444 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]" 6445 }; 6446 6447 const int32_t apiRules[] = { 6448 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, 6449 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN, 6450 UCOL_REORDER_CODE_CURRENCY 6451 }; 6452 6453 const static OneTestCase privateUseCharacterStrings[] = { 6454 { {0x0391}, {0x0041}, UCOL_LESS }, 6455 { {0x0041}, {0x0391}, UCOL_GREATER }, 6456 { {0x0060}, {0x0041}, UCOL_LESS }, 6457 { {0x0060}, {0x0391}, UCOL_GREATER }, 6458 { {0x0024}, {0x0041}, UCOL_GREATER }, 6459 }; 6460 6461 /* Test rules creation */ 6462 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 6463 6464 /* Test collation reordering API */ 6465 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules)); 6466 } 6467 6468 static void TestHaniReorder(void) 6469 { 6470 const char* strRules[] = { 6471 "[reorder Hani]" 6472 }; 6473 const int32_t apiRules[] = { 6474 USCRIPT_HAN 6475 }; 6476 6477 const static OneTestCase privateUseCharacterStrings[] = { 6478 { {0x4e00}, {0x0041}, UCOL_LESS }, 6479 { {0x4e00}, {0x0060}, UCOL_GREATER }, 6480 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS }, 6481 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER }, 6482 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS }, 6483 { {0xfa27}, {0x0041}, UCOL_LESS }, 6484 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS }, 6485 }; 6486 6487 /* Test rules creation */ 6488 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 6489 6490 /* Test collation reordering API */ 6491 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules)); 6492 } 6493 6494 static void TestHaniReorderWithOtherRules(void) 6495 { 6496 const char* strRules[] = { 6497 "[reorder Hani] &b<a" 6498 }; 6499 /*const int32_t apiRules[] = { 6500 USCRIPT_HAN 6501 };*/ 6502 6503 const static OneTestCase privateUseCharacterStrings[] = { 6504 { {0x4e00}, {0x0041}, UCOL_LESS }, 6505 { {0x4e00}, {0x0060}, UCOL_GREATER }, 6506 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS }, 6507 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER }, 6508 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS }, 6509 { {0xfa27}, {0x0041}, UCOL_LESS }, 6510 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS }, 6511 { {0x0062}, {0x0061}, UCOL_LESS }, 6512 }; 6513 6514 /* Test rules creation */ 6515 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules)); 6516 } 6517 6518 static void TestMultipleReorder(void) 6519 { 6520 const char* strRules[] = { 6521 "[reorder Grek Zzzz DIGIT Latn Hani]" 6522 }; 6523 6524 const int32_t apiRules[] = { 6525 USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN 6526 }; 6527 6528 const static OneTestCase collationTestCases[] = { 6529 { {0x0391}, {0x0041}, UCOL_LESS}, 6530 { {0x0031}, {0x0041}, UCOL_LESS}, 6531 { {0x0041}, {0x4e00}, UCOL_LESS}, 6532 }; 6533 6534 /* Test rules creation */ 6535 doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules)); 6536 6537 /* Test collation reordering API */ 6538 doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules)); 6539 } 6540 6541 /* 6542 * Test that covers issue reported in ticket 8814 6543 */ 6544 static void TestReorderWithNumericCollation(void) 6545 { 6546 UErrorCode status = U_ZERO_ERROR; 6547 UCollator *myCollation; 6548 UCollator *myReorderCollation; 6549 int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS}; 6550 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 }; 6551 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */ 6552 UChar fortyS[] = { 0x0053 }; 6553 UChar fortyThreeP[] = { 0x0050 }; 6554 uint8_t fortyS_sortKey[128]; 6555 int32_t fortyS_sortKey_Length; 6556 uint8_t fortyThreeP_sortKey[128]; 6557 int32_t fortyThreeP_sortKey_Length; 6558 uint8_t fortyS_sortKey_reorder[128]; 6559 int32_t fortyS_sortKey_reorder_Length; 6560 uint8_t fortyThreeP_sortKey_reorder[128]; 6561 int32_t fortyThreeP_sortKey_reorder_Length; 6562 UCollationResult collResult; 6563 UCollationResult collResultReorder; 6564 6565 log_verbose("Testing reordering with and without numeric collation\n"); 6566 6567 /* build collator tertiary with numeric */ 6568 myCollation = ucol_open("", &status); 6569 /* 6570 ucol_setStrength(myCollation, UCOL_TERTIARY); 6571 */ 6572 ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); 6573 if(U_FAILURE(status)) { 6574 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 6575 return; 6576 } 6577 6578 /* build collator tertiary with numeric and reordering */ 6579 myReorderCollation = ucol_open("", &status); 6580 /* 6581 ucol_setStrength(myReorderCollation, UCOL_TERTIARY); 6582 */ 6583 ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); 6584 ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status); 6585 if(U_FAILURE(status)) { 6586 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status)); 6587 return; 6588 } 6589 6590 fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128); 6591 fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128); 6592 fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128); 6593 fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128); 6594 6595 if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) { 6596 log_err_status(status, "ERROR: couldn't generate sort keys\n"); 6597 return; 6598 } 6599 collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP)); 6600 collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP)); 6601 /* 6602 fprintf(stderr, "\tcollResult = %x\n", collResult); 6603 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder); 6604 fprintf(stderr, "\nfortyS\n"); 6605 for (i = 0; i < fortyS_sortKey_Length; i++) { 6606 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]); 6607 } 6608 fprintf(stderr, "\nfortyThreeP\n"); 6609 for (i = 0; i < fortyThreeP_sortKey_Length; i++) { 6610 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]); 6611 } 6612 */ 6613 if (collResult != collResultReorder) { 6614 log_err_status(status, "ERROR: collation results should have been the same.\n"); 6615 return; 6616 } 6617 6618 ucol_close(myCollation); 6619 ucol_close(myReorderCollation); 6620 } 6621 6622 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b) 6623 { 6624 for (; *a == *b; ++a, ++b) { 6625 if (*a == 0) { 6626 return 0; 6627 } 6628 } 6629 return (*a < *b ? -1 : 1); 6630 } 6631 6632 static void TestImportRulesDeWithPhonebook(void) 6633 { 6634 const char* normalRules[] = { 6635 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc", 6636 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc", 6637 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc", 6638 }; 6639 const OneTestCase normalTests[] = { 6640 { {0x00e6}, {0x00c6}, UCOL_LESS}, 6641 { {0x00fc}, {0x00dc}, UCOL_GREATER}, 6642 }; 6643 6644 const char* importRules[] = { 6645 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]", 6646 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]", 6647 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]", 6648 }; 6649 const OneTestCase importTests[] = { 6650 { {0x00e6}, {0x00c6}, UCOL_LESS}, 6651 { {0x00fc}, {0x00dc}, UCOL_LESS}, 6652 }; 6653 6654 doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules)); 6655 doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules)); 6656 } 6657 6658 #if 0 6659 static void TestImportRulesFiWithEor(void) 6660 { 6661 /* DUCET. */ 6662 const char* defaultRules[] = { 6663 "&a<b", /* Dummy rule. */ 6664 }; 6665 6666 const OneTestCase defaultTests[] = { 6667 { {0x0110}, {0x00F0}, UCOL_LESS}, 6668 { {0x00a3}, {0x00a5}, UCOL_LESS}, 6669 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS}, 6670 }; 6671 6672 /* European Ordering rules: ignore currency characters. */ 6673 const char* eorRules[] = { 6674 "[import root-u-co-eor]", 6675 }; 6676 6677 const OneTestCase eorTests[] = { 6678 { {0x0110}, {0x00F0}, UCOL_LESS}, 6679 { {0x00a3}, {0x00a5}, UCOL_EQUAL}, 6680 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL}, 6681 }; 6682 6683 const char* fiStdRules[] = { 6684 "[import fi-u-co-standard]", 6685 }; 6686 6687 const OneTestCase fiStdTests[] = { 6688 { {0x0110}, {0x00F0}, UCOL_GREATER}, 6689 { {0x00a3}, {0x00a5}, UCOL_LESS}, 6690 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS}, 6691 }; 6692 6693 /* Both European Ordering Rules and Fi Standard Rules. */ 6694 const char* eorFiStdRules[] = { 6695 "[import root-u-co-eor][import fi-u-co-standard]", 6696 }; 6697 6698 /* This is essentially same as the one before once fi.txt is updated with import. */ 6699 const char* fiEorRules[] = { 6700 "[import fi-u-co-eor]", 6701 }; 6702 6703 const OneTestCase fiEorTests[] = { 6704 { {0x0110}, {0x00F0}, UCOL_GREATER}, 6705 { {0x00a3}, {0x00a5}, UCOL_EQUAL}, 6706 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL}, 6707 }; 6708 6709 doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules)); 6710 doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules)); 6711 doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules)); 6712 doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules)); 6713 6714 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule: 6715 eor{ 6716 Sequence{ 6717 "[import root-u-co-eor][import fi-u-co-standard]" 6718 } 6719 Version{"21.0"} 6720 } 6721 */ 6722 /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */ 6723 6724 } 6725 #endif 6726 6727 #if 0 6728 /* 6729 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless 6730 * the resource files are built with -includeUnihanColl option. 6731 * TODO: Uncomment this function and make it work when unihan rules are built by default. 6732 */ 6733 static void TestImportRulesCJKWithUnihan(void) 6734 { 6735 /* DUCET. */ 6736 const char* defaultRules[] = { 6737 "&a<b", /* Dummy rule. */ 6738 }; 6739 6740 const OneTestCase defaultTests[] = { 6741 { {0x3402}, {0x4e1e}, UCOL_GREATER}, 6742 }; 6743 6744 /* European Ordering rules: ignore currency characters. */ 6745 const char* unihanRules[] = { 6746 "[import ko-u-co-unihan]", 6747 }; 6748 6749 const OneTestCase unihanTests[] = { 6750 { {0x3402}, {0x4e1e}, UCOL_LESS}, 6751 }; 6752 6753 doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules)); 6754 doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules)); 6755 6756 } 6757 #endif 6758 6759 static void TestImport(void) 6760 { 6761 UCollator* vicoll; 6762 UCollator* escoll; 6763 UCollator* viescoll; 6764 UCollator* importviescoll; 6765 UParseError error; 6766 UErrorCode status = U_ZERO_ERROR; 6767 UChar* virules; 6768 int32_t viruleslength; 6769 UChar* esrules; 6770 int32_t esruleslength; 6771 UChar* viesrules; 6772 int32_t viesruleslength; 6773 char srules[500] = "[import vi][import es]"; 6774 UChar rules[500]; 6775 uint32_t length = 0; 6776 int32_t itemCount; 6777 int32_t i, k; 6778 UChar32 start; 6779 UChar32 end; 6780 UChar str[500]; 6781 int32_t strLength; 6782 6783 uint8_t sk1[500]; 6784 uint8_t sk2[500]; 6785 6786 UBool b; 6787 USet* tailoredSet; 6788 USet* importTailoredSet; 6789 6790 6791 vicoll = ucol_open("vi", &status); 6792 if(U_FAILURE(status)){ 6793 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status)); 6794 return; 6795 } 6796 6797 virules = (UChar*) ucol_getRules(vicoll, &viruleslength); 6798 escoll = ucol_open("es", &status); 6799 esrules = (UChar*) ucol_getRules(escoll, &esruleslength); 6800 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*)); 6801 viesrules[0] = 0; 6802 u_strcat(viesrules, virules); 6803 u_strcat(viesrules, esrules); 6804 viesruleslength = viruleslength + esruleslength; 6805 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status); 6806 6807 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */ 6808 length = u_unescape(srules, rules, 500); 6809 importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status); 6810 if(U_FAILURE(status)){ 6811 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 6812 return; 6813 } 6814 6815 tailoredSet = ucol_getTailoredSet(viescoll, &status); 6816 importTailoredSet = ucol_getTailoredSet(importviescoll, &status); 6817 6818 if(!uset_equals(tailoredSet, importTailoredSet)){ 6819 log_err("Tailored sets not equal"); 6820 } 6821 6822 uset_close(importTailoredSet); 6823 6824 itemCount = uset_getItemCount(tailoredSet); 6825 6826 for( i = 0; i < itemCount; i++){ 6827 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status); 6828 if(strLength < 2){ 6829 for (; start <= end; start++){ 6830 k = 0; 6831 U16_APPEND(str, k, 500, start, b); 6832 ucol_getSortKey(viescoll, str, 1, sk1, 500); 6833 ucol_getSortKey(importviescoll, str, 1, sk2, 500); 6834 if(compare_uint8_t_arrays(sk1, sk2) != 0){ 6835 log_err("Sort key for %s not equal\n", str); 6836 break; 6837 } 6838 } 6839 }else{ 6840 ucol_getSortKey(viescoll, str, strLength, sk1, 500); 6841 ucol_getSortKey(importviescoll, str, strLength, sk2, 500); 6842 if(compare_uint8_t_arrays(sk1, sk2) != 0){ 6843 log_err("ZZSort key for %s not equal\n", str); 6844 break; 6845 } 6846 6847 } 6848 } 6849 6850 uset_close(tailoredSet); 6851 6852 uprv_free(viesrules); 6853 6854 ucol_close(vicoll); 6855 ucol_close(escoll); 6856 ucol_close(viescoll); 6857 ucol_close(importviescoll); 6858 } 6859 6860 static void TestImportWithType(void) 6861 { 6862 UCollator* vicoll; 6863 UCollator* decoll; 6864 UCollator* videcoll; 6865 UCollator* importvidecoll; 6866 UParseError error; 6867 UErrorCode status = U_ZERO_ERROR; 6868 const UChar* virules; 6869 int32_t viruleslength; 6870 const UChar* derules; 6871 int32_t deruleslength; 6872 UChar* viderules; 6873 int32_t videruleslength; 6874 const char srules[500] = "[import vi][import de-u-co-phonebk]"; 6875 UChar rules[500]; 6876 uint32_t length = 0; 6877 int32_t itemCount; 6878 int32_t i, k; 6879 UChar32 start; 6880 UChar32 end; 6881 UChar str[500]; 6882 int32_t strLength; 6883 6884 uint8_t sk1[500]; 6885 uint8_t sk2[500]; 6886 6887 USet* tailoredSet; 6888 USet* importTailoredSet; 6889 6890 vicoll = ucol_open("vi", &status); 6891 if(U_FAILURE(status)){ 6892 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 6893 return; 6894 } 6895 virules = ucol_getRules(vicoll, &viruleslength); 6896 /* decoll = ucol_open("de@collation=phonebook", &status); */ 6897 decoll = ucol_open("de-u-co-phonebk", &status); 6898 if(U_FAILURE(status)){ 6899 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 6900 return; 6901 } 6902 6903 6904 derules = ucol_getRules(decoll, &deruleslength); 6905 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*)); 6906 viderules[0] = 0; 6907 u_strcat(viderules, virules); 6908 u_strcat(viderules, derules); 6909 videruleslength = viruleslength + deruleslength; 6910 videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status); 6911 6912 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */ 6913 length = u_unescape(srules, rules, 500); 6914 importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status); 6915 if(U_FAILURE(status)){ 6916 log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); 6917 return; 6918 } 6919 6920 tailoredSet = ucol_getTailoredSet(videcoll, &status); 6921 importTailoredSet = ucol_getTailoredSet(importvidecoll, &status); 6922 6923 if(!uset_equals(tailoredSet, importTailoredSet)){ 6924 log_err("Tailored sets not equal"); 6925 } 6926 6927 uset_close(importTailoredSet); 6928 6929 itemCount = uset_getItemCount(tailoredSet); 6930 6931 for( i = 0; i < itemCount; i++){ 6932 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status); 6933 if(strLength < 2){ 6934 for (; start <= end; start++){ 6935 k = 0; 6936 U16_APPEND_UNSAFE(str, k, start); 6937 ucol_getSortKey(videcoll, str, 1, sk1, 500); 6938 ucol_getSortKey(importvidecoll, str, 1, sk2, 500); 6939 if(compare_uint8_t_arrays(sk1, sk2) != 0){ 6940 log_err("Sort key for %s not equal\n", str); 6941 break; 6942 } 6943 } 6944 }else{ 6945 ucol_getSortKey(videcoll, str, strLength, sk1, 500); 6946 ucol_getSortKey(importvidecoll, str, strLength, sk2, 500); 6947 if(compare_uint8_t_arrays(sk1, sk2) != 0){ 6948 log_err("Sort key for %s not equal\n", str); 6949 break; 6950 } 6951 6952 } 6953 } 6954 6955 uset_close(tailoredSet); 6956 6957 uprv_free(viderules); 6958 6959 ucol_close(videcoll); 6960 ucol_close(importvidecoll); 6961 ucol_close(vicoll); 6962 ucol_close(decoll); 6963 } 6964 6965 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */ 6966 static const UChar longUpperStr1[]= { /* 155 chars */ 6967 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 6968 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52, 6969 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E, 6970 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C, 6971 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E, 6972 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20, 6973 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45, 6974 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32, 6975 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62, 6976 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61 6977 }; 6978 6979 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */ 6980 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */ 6981 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20, 6982 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20, 6983 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20, 6984 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20, 6985 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20 6986 }; 6987 6988 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */ 6989 static const UChar longUpperStr3[]= { /* 324 chars */ 6990 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 6991 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 6992 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 6993 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 6994 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 6995 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 6996 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 6997 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 6998 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 6999 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 7000 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, 7001 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20 7002 }; 7003 7004 #define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0])) 7005 7006 typedef struct { 7007 const UChar * longUpperStrPtr; 7008 int32_t longUpperStrLen; 7009 } LongUpperStrItem; 7010 7011 /* String pointers must be in reverse collation order of the corresponding strings */ 7012 static const LongUpperStrItem longUpperStrItems[] = { 7013 { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) }, 7014 { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) }, 7015 { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) }, 7016 { NULL, 0 } 7017 }; 7018 7019 enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with collation changes */ 7020 7021 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */ 7022 static void TestCaseLevelBufferOverflow(void) 7023 { 7024 UErrorCode status = U_ZERO_ERROR; 7025 UCollator * ucol = ucol_open("root", &status); 7026 if ( U_SUCCESS(status) ) { 7027 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status); 7028 if ( U_SUCCESS(status) ) { 7029 const LongUpperStrItem * itemPtr; 7030 uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax]; 7031 for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) { 7032 int32_t sortKeyLen; 7033 if (itemPtr > longUpperStrItems) { 7034 uprv_strcpy((char *)sortKeyB, (char *)sortKeyA); 7035 } 7036 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax); 7037 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) { 7038 log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen); 7039 break; 7040 } 7041 if ( itemPtr > longUpperStrItems ) { 7042 int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB); 7043 if (compareResult >= 0) { 7044 log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult); 7045 } 7046 } 7047 } 7048 } else { 7049 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status)); 7050 } 7051 ucol_close(ucol); 7052 } else { 7053 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status)); 7054 } 7055 } 7056 7057 7058 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x) 7059 7060 void addMiscCollTest(TestNode** root) 7061 { 7062 TEST(TestRuleOptions); 7063 TEST(TestBeforePrefixFailure); 7064 TEST(TestContractionClosure); 7065 TEST(TestPrefixCompose); 7066 TEST(TestStrCollIdenticalPrefix); 7067 TEST(TestPrefix); 7068 TEST(TestNewJapanese); 7069 /*TEST(TestLimitations);*/ 7070 TEST(TestNonChars); 7071 TEST(TestExtremeCompression); 7072 TEST(TestSurrogates); 7073 /* BEGIN android-removed 7074 To save space, Android does not include the collation tailoring rules. 7075 We skip the tailing tests for collations. */ 7076 /* TEST(TestVariableTopSetting); */ 7077 /* END android-removed */ 7078 TEST(TestBocsuCoverage); 7079 TEST(TestCyrillicTailoring); 7080 TEST(TestCase); 7081 TEST(IncompleteCntTest); 7082 TEST(BlackBirdTest); 7083 TEST(FunkyATest); 7084 TEST(BillFairmanTest); 7085 TEST(RamsRulesTest); 7086 TEST(IsTailoredTest); 7087 TEST(TestCollations); 7088 TEST(TestChMove); 7089 TEST(TestImplicitTailoring); 7090 TEST(TestFCDProblem); 7091 TEST(TestEmptyRule); 7092 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */ 7093 TEST(TestJ815); 7094 /*TEST(TestJ831);*/ /* we changed lv locale */ 7095 TEST(TestBefore); 7096 TEST(TestRedundantRules); 7097 TEST(TestExpansionSyntax); 7098 TEST(TestHangulTailoring); 7099 TEST(TestUCARules); 7100 TEST(TestIncrementalNormalize); 7101 TEST(TestComposeDecompose); 7102 TEST(TestCompressOverlap); 7103 TEST(TestContraction); 7104 TEST(TestExpansion); 7105 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */ 7106 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */ 7107 TEST(TestOptimize); 7108 TEST(TestSuppressContractions); 7109 TEST(Alexis2); 7110 TEST(TestHebrewUCA); 7111 TEST(TestPartialSortKeyTermination); 7112 TEST(TestSettings); 7113 TEST(TestEquals); 7114 TEST(TestJ2726); 7115 TEST(NullRule); 7116 TEST(TestNumericCollation); 7117 TEST(TestTibetanConformance); 7118 TEST(TestPinyinProblem); 7119 TEST(TestImplicitGeneration); 7120 TEST(TestSeparateTrees); 7121 TEST(TestBeforePinyin); 7122 TEST(TestBeforeTightening); 7123 /*TEST(TestMoreBefore);*/ 7124 TEST(TestTailorNULL); 7125 TEST(TestUpperFirstQuaternary); 7126 TEST(TestJ4960); 7127 TEST(TestJ5223); 7128 TEST(TestJ5232); 7129 TEST(TestJ5367); 7130 TEST(TestHiragana); 7131 TEST(TestSortKeyConsistency); 7132 TEST(TestVI5913); /* VI, RO tailored rules */ 7133 TEST(TestCroatianSortKey); 7134 TEST(TestTailor6179); 7135 TEST(TestUCAPrecontext); 7136 TEST(TestOutOfBuffer5468); 7137 TEST(TestSameStrengthList); 7138 7139 TEST(TestSameStrengthListQuoted); 7140 TEST(TestSameStrengthListSupplemental); 7141 TEST(TestSameStrengthListQwerty); 7142 TEST(TestSameStrengthListQuotedQwerty); 7143 TEST(TestSameStrengthListRanges); 7144 TEST(TestSameStrengthListSupplementalRanges); 7145 TEST(TestSpecialCharacters); 7146 TEST(TestPrivateUseCharacters); 7147 TEST(TestPrivateUseCharactersInList); 7148 TEST(TestPrivateUseCharactersInRange); 7149 TEST(TestInvalidListsAndRanges); 7150 TEST(TestImportRulesDeWithPhonebook); 7151 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */ 7152 /* TEST(TestImportRulesCJKWithUnihan); */ 7153 /* BEGIN android-removed: Due to Android does not include reverse UCA table. 7154 TEST(TestImport); 7155 TEST(TestImportWithType); 7156 END android-removed */ 7157 7158 TEST(TestBeforeRuleWithScriptReordering); 7159 TEST(TestNonLeadBytesDuringCollationReordering); 7160 TEST(TestReorderingAPI); 7161 TEST(TestReorderingAPIWithRuleCreatedCollator); 7162 TEST(TestEquivalentReorderingScripts); 7163 TEST(TestGreekFirstReorder); 7164 TEST(TestGreekLastReorder); 7165 TEST(TestNonScriptReorder); 7166 TEST(TestHaniReorder); 7167 TEST(TestHaniReorderWithOtherRules); 7168 TEST(TestMultipleReorder); 7169 TEST(TestReorderingAcrossCloning); 7170 TEST(TestReorderWithNumericCollation); 7171 7172 TEST(TestCaseLevelBufferOverflow); 7173 } 7174 7175 #endif /* #if !UCONFIG_NO_COLLATION */ 7176