Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 2001-2009, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /*******************************************************************************
      7 *
      8 * File cmsccoll.C
      9 *
     10 *******************************************************************************/
     11 /**
     12  * These are the tests specific to ICU 1.8 and above, that I didn't know where
     13  * to fit.
     14  */
     15 
     16 #include <stdio.h>
     17 
     18 #include "unicode/utypes.h"
     19 
     20 #if !UCONFIG_NO_COLLATION
     21 
     22 #include "unicode/ucol.h"
     23 #include "unicode/ucoleitr.h"
     24 #include "unicode/uloc.h"
     25 #include "cintltst.h"
     26 #include "ccolltst.h"
     27 #include "callcoll.h"
     28 #include "unicode/ustring.h"
     29 #include "string.h"
     30 #include "ucol_imp.h"
     31 #include "ucol_tok.h"
     32 #include "cmemory.h"
     33 #include "cstring.h"
     34 #include "uassert.h"
     35 #include "unicode/parseerr.h"
     36 #include "unicode/ucnv.h"
     37 #include "unicode/ures.h"
     38 #include "uparse.h"
     39 #include "putilimp.h"
     40 
     41 
     42 #define LEN(a) (sizeof(a)/sizeof(a[0]))
     43 
     44 #define MAX_TOKEN_LEN 16
     45 
     46 typedef UCollationResult tst_strcoll(void *collator, const int object,
     47                         const UChar *source, const int sLen,
     48                         const UChar *target, const int tLen);
     49 
     50 
     51 
     52 const static char cnt1[][10] = {
     53 
     54   "AA",
     55   "AC",
     56   "AZ",
     57   "AQ",
     58   "AB",
     59   "ABZ",
     60   "ABQ",
     61   "Z",
     62   "ABC",
     63   "Q",
     64   "B"
     65 };
     66 
     67 const static char cnt2[][10] = {
     68   "DA",
     69   "DAD",
     70   "DAZ",
     71   "MAR",
     72   "Z",
     73   "DAVIS",
     74   "MARK",
     75   "DAV",
     76   "DAVI"
     77 };
     78 
     79 static void IncompleteCntTest(void)
     80 {
     81   UErrorCode status = U_ZERO_ERROR;
     82   UChar temp[90];
     83   UChar t1[90];
     84   UChar t2[90];
     85 
     86   UCollator *coll =  NULL;
     87   uint32_t i = 0, j = 0;
     88   uint32_t size = 0;
     89 
     90   u_uastrcpy(temp, " & Z < ABC < Q < B");
     91 
     92   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
     93 
     94   if(U_SUCCESS(status)) {
     95     size = sizeof(cnt1)/sizeof(cnt1[0]);
     96     for(i = 0; i < size-1; i++) {
     97       for(j = i+1; j < size; j++) {
     98         UCollationElements *iter;
     99         u_uastrcpy(t1, cnt1[i]);
    100         u_uastrcpy(t2, cnt1[j]);
    101         doTest(coll, t1, t2, UCOL_LESS);
    102         /* synwee : added collation element iterator test */
    103         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    104         if (U_FAILURE(status)) {
    105           log_err("Creation of iterator failed\n");
    106           break;
    107         }
    108         backAndForth(iter);
    109         ucol_closeElements(iter);
    110       }
    111     }
    112   }
    113 
    114   ucol_close(coll);
    115 
    116 
    117   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
    118   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
    119 
    120   if(U_SUCCESS(status)) {
    121     size = sizeof(cnt2)/sizeof(cnt2[0]);
    122     for(i = 0; i < size-1; i++) {
    123       for(j = i+1; j < size; j++) {
    124         UCollationElements *iter;
    125         u_uastrcpy(t1, cnt2[i]);
    126         u_uastrcpy(t2, cnt2[j]);
    127         doTest(coll, t1, t2, UCOL_LESS);
    128 
    129         /* synwee : added collation element iterator test */
    130         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    131         if (U_FAILURE(status)) {
    132           log_err("Creation of iterator failed\n");
    133           break;
    134         }
    135         backAndForth(iter);
    136         ucol_closeElements(iter);
    137       }
    138     }
    139   }
    140 
    141   ucol_close(coll);
    142 
    143 
    144 }
    145 
    146 const static char shifted[][20] = {
    147   "black bird",
    148   "black-bird",
    149   "blackbird",
    150   "black Bird",
    151   "black-Bird",
    152   "blackBird",
    153   "black birds",
    154   "black-birds",
    155   "blackbirds"
    156 };
    157 
    158 const static UCollationResult shiftedTert[] = {
    159   UCOL_EQUAL,
    160   UCOL_EQUAL,
    161   UCOL_EQUAL,
    162   UCOL_LESS,
    163   UCOL_EQUAL,
    164   UCOL_EQUAL,
    165   UCOL_LESS,
    166   UCOL_EQUAL,
    167   UCOL_EQUAL
    168 };
    169 
    170 const static char nonignorable[][20] = {
    171   "black bird",
    172   "black Bird",
    173   "black birds",
    174   "black-bird",
    175   "black-Bird",
    176   "black-birds",
    177   "blackbird",
    178   "blackBird",
    179   "blackbirds"
    180 };
    181 
    182 static void BlackBirdTest(void) {
    183   UErrorCode status = U_ZERO_ERROR;
    184   UChar t1[90];
    185   UChar t2[90];
    186 
    187   uint32_t i = 0, j = 0;
    188   uint32_t size = 0;
    189   UCollator *coll = ucol_open("en_US", &status);
    190 
    191   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
    192   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
    193 
    194   if(U_SUCCESS(status)) {
    195     size = sizeof(nonignorable)/sizeof(nonignorable[0]);
    196     for(i = 0; i < size-1; i++) {
    197       for(j = i+1; j < size; j++) {
    198         u_uastrcpy(t1, nonignorable[i]);
    199         u_uastrcpy(t2, nonignorable[j]);
    200         doTest(coll, t1, t2, UCOL_LESS);
    201       }
    202     }
    203   }
    204 
    205   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
    206   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
    207 
    208   if(U_SUCCESS(status)) {
    209     size = sizeof(shifted)/sizeof(shifted[0]);
    210     for(i = 0; i < size-1; i++) {
    211       for(j = i+1; j < size; j++) {
    212         u_uastrcpy(t1, shifted[i]);
    213         u_uastrcpy(t2, shifted[j]);
    214         doTest(coll, t1, t2, UCOL_LESS);
    215       }
    216     }
    217   }
    218 
    219   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
    220   if(U_SUCCESS(status)) {
    221     size = sizeof(shifted)/sizeof(shifted[0]);
    222     for(i = 1; i < size; i++) {
    223       u_uastrcpy(t1, shifted[i-1]);
    224       u_uastrcpy(t2, shifted[i]);
    225       doTest(coll, t1, t2, shiftedTert[i]);
    226     }
    227   }
    228 
    229   ucol_close(coll);
    230 }
    231 
    232 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
    233     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
    234     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
    235     {0x0041/*'A'*/, 0x0300, 0x0000},
    236     {0x00C0, 0x0301, 0x0000},
    237     /* this would work with forced normalization */
    238     {0x00C0, 0x0316, 0x0000}
    239 };
    240 
    241 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
    242     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    243     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
    244     {0x00C0, 0},
    245     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    246     /* this would work with forced normalization */
    247     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
    248 };
    249 
    250 const static UCollationResult results[] = {
    251     UCOL_GREATER,
    252     UCOL_EQUAL,
    253     UCOL_EQUAL,
    254     UCOL_GREATER,
    255     UCOL_EQUAL
    256 };
    257 
    258 static void FunkyATest(void)
    259 {
    260 
    261     int32_t i;
    262     UErrorCode status = U_ZERO_ERROR;
    263     UCollator  *myCollation;
    264     myCollation = ucol_open("en_US", &status);
    265     if(U_FAILURE(status)){
    266         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    267         return;
    268     }
    269     log_verbose("Testing some A letters, for some reason\n");
    270     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    271     ucol_setStrength(myCollation, UCOL_TERTIARY);
    272     for (i = 0; i < 4 ; i++)
    273     {
    274         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    275     }
    276     ucol_close(myCollation);
    277 }
    278 
    279 UColAttributeValue caseFirst[] = {
    280     UCOL_OFF,
    281     UCOL_LOWER_FIRST,
    282     UCOL_UPPER_FIRST
    283 };
    284 
    285 
    286 UColAttributeValue alternateHandling[] = {
    287     UCOL_NON_IGNORABLE,
    288     UCOL_SHIFTED
    289 };
    290 
    291 UColAttributeValue caseLevel[] = {
    292     UCOL_OFF,
    293     UCOL_ON
    294 };
    295 
    296 UColAttributeValue strengths[] = {
    297     UCOL_PRIMARY,
    298     UCOL_SECONDARY,
    299     UCOL_TERTIARY,
    300     UCOL_QUATERNARY,
    301     UCOL_IDENTICAL
    302 };
    303 
    304 #if 0
    305 static const char * strengthsC[] = {
    306     "UCOL_PRIMARY",
    307     "UCOL_SECONDARY",
    308     "UCOL_TERTIARY",
    309     "UCOL_QUATERNARY",
    310     "UCOL_IDENTICAL"
    311 };
    312 
    313 static const char * caseFirstC[] = {
    314     "UCOL_OFF",
    315     "UCOL_LOWER_FIRST",
    316     "UCOL_UPPER_FIRST"
    317 };
    318 
    319 
    320 static const char * alternateHandlingC[] = {
    321     "UCOL_NON_IGNORABLE",
    322     "UCOL_SHIFTED"
    323 };
    324 
    325 static const char * caseLevelC[] = {
    326     "UCOL_OFF",
    327     "UCOL_ON"
    328 };
    329 
    330 /* not used currently - does not test only prints */
    331 static void PrintMarkDavis(void)
    332 {
    333   UErrorCode status = U_ZERO_ERROR;
    334   UChar m[256];
    335   uint8_t sortkey[256];
    336   UCollator *coll = ucol_open("en_US", &status);
    337   uint32_t h,i,j,k, sortkeysize;
    338   uint32_t sizem = 0;
    339   char buffer[512];
    340   uint32_t len = 512;
    341 
    342   log_verbose("PrintMarkDavis");
    343 
    344   u_uastrcpy(m, "Mark Davis");
    345   sizem = u_strlen(m);
    346 
    347 
    348   m[1] = 0xe4;
    349 
    350   for(i = 0; i<sizem; i++) {
    351     fprintf(stderr, "\\u%04X ", m[i]);
    352   }
    353   fprintf(stderr, "\n");
    354 
    355   for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
    356     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
    357     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
    358 
    359     for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
    360       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
    361       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
    362 
    363       for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
    364         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
    365         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
    366 
    367         for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
    368           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
    369           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
    370           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
    371           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
    372         }
    373 
    374       }
    375 
    376     }
    377 
    378   }
    379 }
    380 #endif
    381 
    382 static void BillFairmanTest(void) {
    383 /*
    384 ** check for actual locale via ICU resource bundles
    385 **
    386 ** lp points to the original locale ("fr_FR_....")
    387 */
    388 
    389     UResourceBundle *lr,*cr;
    390     UErrorCode              lec = U_ZERO_ERROR;
    391     const char *lp = "fr_FR_you_ll_never_find_this_locale";
    392 
    393     log_verbose("BillFairmanTest\n");
    394 
    395     lr = ures_open(NULL,lp,&lec);
    396     if (lr) {
    397         cr = ures_getByKey(lr,"collations",0,&lec);
    398         if (cr) {
    399             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
    400             if (lp) {
    401                 if (U_SUCCESS(lec)) {
    402                     if(strcmp(lp, "fr") != 0) {
    403                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
    404                     }
    405                 }
    406             }
    407             ures_close(cr);
    408         }
    409         ures_close(lr);
    410     }
    411 }
    412 
    413 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
    414     UChar source[256] = { '\0'};
    415     UChar target[256] = { '\0'};
    416     UChar preP = 0x31a3;
    417     UChar preQ = 0x310d;
    418 /*
    419     UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
    420     UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
    421 */
    422     /*log_verbose("Testing primary\n");*/
    423 
    424     doTest(col, p, q, UCOL_LESS);
    425 /*
    426     UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
    427 
    428     if(result!=UCOL_LESS){
    429        aescstrdup(p,utfSource,256);
    430        aescstrdup(q,utfTarget,256);
    431        fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
    432     }
    433 */
    434     source[0] = preP;
    435     u_strcpy(source+1,p);
    436     target[0] = preQ;
    437     u_strcpy(target+1,q);
    438     doTest(col, source, target, UCOL_LESS);
    439 /*
    440     fprintf(file,"Primary swamps 2nd failed  source: %s target: %s \n", utfSource,utfTarget);
    441 */
    442 }
    443 
    444 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
    445     UChar source[256] = { '\0'};
    446     UChar target[256] = { '\0'};
    447 
    448     /*log_verbose("Testing secondary\n");*/
    449 
    450     doTest(col, p, q, UCOL_LESS);
    451 /*
    452     fprintf(file,"secondary failed  source: %s target: %s \n", utfSource,utfTarget);
    453 */
    454     source[0] = 0x0053;
    455     u_strcpy(source+1,p);
    456     target[0]= 0x0073;
    457     u_strcpy(target+1,q);
    458 
    459     doTest(col, source, target, UCOL_LESS);
    460 /*
    461     fprintf(file,"secondary swamps 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
    462 */
    463 
    464 
    465     u_strcpy(source,p);
    466     source[u_strlen(p)] = 0x62;
    467     source[u_strlen(p)+1] = 0;
    468 
    469 
    470     u_strcpy(target,q);
    471     target[u_strlen(q)] = 0x61;
    472     target[u_strlen(q)+1] = 0;
    473 
    474     doTest(col, source, target, UCOL_GREATER);
    475 
    476 /*
    477     fprintf(file,"secondary is swamped by 1  failed  source: %s target: %s \n",utfSource,utfTarget);
    478 */
    479 }
    480 
    481 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
    482     UChar source[256] = { '\0'};
    483     UChar target[256] = { '\0'};
    484 
    485     /*log_verbose("Testing tertiary\n");*/
    486 
    487     doTest(col, p, q, UCOL_LESS);
    488 /*
    489     fprintf(file,"Tertiary failed  source: %s target: %s \n",utfSource,utfTarget);
    490 */
    491     source[0] = 0x0020;
    492     u_strcpy(source+1,p);
    493     target[0]= 0x002D;
    494     u_strcpy(target+1,q);
    495 
    496     doTest(col, source, target, UCOL_LESS);
    497 /*
    498     fprintf(file,"Tertiary swamps 4th failed  source: %s target: %s \n", utfSource,utfTarget);
    499 */
    500 
    501     u_strcpy(source,p);
    502     source[u_strlen(p)] = 0xE0;
    503     source[u_strlen(p)+1] = 0;
    504 
    505     u_strcpy(target,q);
    506     target[u_strlen(q)] = 0x61;
    507     target[u_strlen(q)+1] = 0;
    508 
    509     doTest(col, source, target, UCOL_GREATER);
    510 
    511 /*
    512     fprintf(file,"Tertiary is swamped by 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
    513 */
    514 }
    515 
    516 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
    517 /*
    518     UChar source[256] = { '\0'};
    519     UChar target[256] = { '\0'};
    520 */
    521 
    522     doTest(col, p, q, UCOL_EQUAL);
    523 /*
    524     fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
    525 */
    526 }
    527 
    528 static void testCollator(UCollator *coll, UErrorCode *status) {
    529   const UChar *rules = NULL, *current = NULL;
    530   int32_t ruleLen = 0;
    531   uint32_t strength = 0;
    532   uint32_t chOffset = 0; uint32_t chLen = 0;
    533   uint32_t exOffset = 0; uint32_t exLen = 0;
    534   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
    535   uint32_t firstEx = 0;
    536 /*  uint32_t rExpsLen = 0; */
    537   uint32_t firstLen = 0;
    538   UBool varT = FALSE; UBool top_ = TRUE;
    539   uint16_t specs = 0;
    540   UBool startOfRules = TRUE;
    541   UBool lastReset = FALSE;
    542   UBool before = FALSE;
    543   uint32_t beforeStrength = 0;
    544   UColTokenParser src;
    545   UColOptionSet opts;
    546 
    547   UChar first[256];
    548   UChar second[256];
    549   UChar tempB[256];
    550   uint32_t tempLen;
    551   UChar *rulesCopy = NULL;
    552   UParseError parseError;
    553 
    554   src.opts = &opts;
    555 
    556   rules = ucol_getRules(coll, &ruleLen);
    557   if(U_SUCCESS(*status) && ruleLen > 0) {
    558     rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
    559     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
    560     src.current = src.source = rulesCopy;
    561     src.end = rulesCopy+ruleLen;
    562     src.extraCurrent = src.end;
    563     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
    564     *first = *second = 0;
    565 
    566     while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
    567       strength = src.parsedToken.strength;
    568       chOffset = src.parsedToken.charsOffset;
    569       chLen = src.parsedToken.charsLen;
    570       exOffset = src.parsedToken.extensionOffset;
    571       exLen = src.parsedToken.extensionLen;
    572       prefixOffset = src.parsedToken.prefixOffset;
    573       prefixLen = src.parsedToken.prefixLen;
    574       specs = src.parsedToken.flags;
    575 
    576       startOfRules = FALSE;
    577       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
    578       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
    579       if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
    580         second[0] = 0;
    581       } else {
    582         u_strncpy(second,rulesCopy+chOffset, chLen);
    583         second[chLen] = 0;
    584 
    585         if(exLen > 0 && firstEx == 0) {
    586           u_strncat(first, rulesCopy+exOffset, exLen);
    587           first[firstLen+exLen] = 0;
    588         }
    589 
    590         if(lastReset == TRUE && prefixLen != 0) {
    591           u_strncpy(first+prefixLen, first, firstLen);
    592           u_strncpy(first, rulesCopy+prefixOffset, prefixLen);
    593           first[firstLen+prefixLen] = 0;
    594           firstLen = firstLen+prefixLen;
    595         }
    596 
    597         if(before == TRUE) { /* swap first and second */
    598           u_strcpy(tempB, first);
    599           u_strcpy(first, second);
    600           u_strcpy(second, tempB);
    601 
    602           tempLen = firstLen;
    603           firstLen = chLen;
    604           chLen = tempLen;
    605 
    606           tempLen = firstEx;
    607           firstEx = exLen;
    608           exLen = tempLen;
    609           if(beforeStrength < strength) {
    610             strength = beforeStrength;
    611           }
    612         }
    613       }
    614       lastReset = FALSE;
    615 
    616       switch(strength){
    617       case UCOL_IDENTICAL:
    618           testEquality(coll,first,second);
    619           break;
    620       case UCOL_PRIMARY:
    621           testPrimary(coll,first,second);
    622           break;
    623       case UCOL_SECONDARY:
    624           testSecondary(coll,first,second);
    625           break;
    626       case UCOL_TERTIARY:
    627           testTertiary(coll,first,second);
    628           break;
    629       case UCOL_TOK_RESET:
    630         lastReset = TRUE;
    631         before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
    632         if(before) {
    633           beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
    634         }
    635         break;
    636       default:
    637           break;
    638       }
    639 
    640       if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
    641         before = FALSE;
    642       } else {
    643         firstLen = chLen;
    644         firstEx = exLen;
    645         u_strcpy(first, second);
    646       }
    647     }
    648     free(rulesCopy);
    649   }
    650 }
    651 
    652 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
    653   UCollator *UCA = (UCollator *)collator;
    654   return ucol_strcoll(UCA, source, sLen, target, tLen);
    655 }
    656 
    657 /*
    658 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
    659 #ifdef U_WINDOWS
    660   LCID lcid = (LCID)collator;
    661   return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
    662 #else
    663   return 0;
    664 #endif
    665 }
    666 */
    667 
    668 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
    669                                      UChar s1, UChar s2,
    670                                      const UChar *s, const uint32_t sLen,
    671                                      const UChar *t, const uint32_t tLen) {
    672   UChar source[256] = {0};
    673   UChar target[256] = {0};
    674 
    675   source[0] = s1;
    676   u_strcpy(source+1, s);
    677   target[0] = s2;
    678   u_strcpy(target+1, t);
    679 
    680   return func(collator, opts, source, sLen+1, target, tLen+1);
    681 }
    682 
    683 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
    684                                    UChar s1, UChar s2,
    685                                    const UChar *s, const uint32_t sLen,
    686                                    const UChar *t, const uint32_t tLen) {
    687   UChar source[256] = {0};
    688   UChar target[256] = {0};
    689 
    690   u_strcpy(source, s);
    691   source[sLen] = s1;
    692   u_strcpy(target, t);
    693   target[tLen] = s2;
    694 
    695   return func(collator, opts, source, sLen+1, target, tLen+1);
    696 }
    697 
    698 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
    699                               const UChar *s, const uint32_t sLen,
    700                               const UChar *t, const uint32_t tLen,
    701                               UCollationResult result) {
    702   /*UChar fPrimary = 0x6d;*/
    703   /*UChar sPrimary = 0x6e;*/
    704   UChar fSecondary = 0x310d;
    705   UChar sSecondary = 0x31a3;
    706   UChar fTertiary = 0x310f;
    707   UChar sTertiary = 0x31b7;
    708 
    709   UCollationResult oposite;
    710   if(result == UCOL_EQUAL) {
    711     return UCOL_IDENTICAL;
    712   } else if(result == UCOL_GREATER) {
    713     oposite = UCOL_LESS;
    714   } else {
    715     oposite = UCOL_GREATER;
    716   }
    717 
    718   if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
    719     return UCOL_PRIMARY;
    720   } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
    721     (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
    722     return UCOL_SECONDARY;
    723   } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
    724     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
    725     return UCOL_TERTIARY;
    726   } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
    727     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
    728     return UCOL_QUATERNARY;
    729   } else {
    730     return UCOL_IDENTICAL;
    731   }
    732 }
    733 
    734 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
    735   uint32_t i = 0;
    736 
    737   if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
    738     buffer[0] = '=';
    739     buffer[1] = '=';
    740     buffer[2] = '\0';
    741   } else if(res == UCOL_GREATER) {
    742     for(i = 0; i<strength+1; i++) {
    743       buffer[i] = '>';
    744     }
    745     buffer[strength+1] = '\0';
    746   } else {
    747     for(i = 0; i<strength+1; i++) {
    748       buffer[i] = '<';
    749     }
    750     buffer[strength+1] = '\0';
    751   }
    752 
    753   return buffer;
    754 }
    755 
    756 
    757 
    758 static void logFailure (const char *platform, const char *test,
    759                         const UChar *source, const uint32_t sLen,
    760                         const UChar *target, const uint32_t tLen,
    761                         UCollationResult realRes, uint32_t realStrength,
    762                         UCollationResult expRes, uint32_t expStrength, UBool error) {
    763 
    764   uint32_t i = 0;
    765 
    766   char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
    767   static int32_t maxOutputLength = 0;
    768   int32_t outputLength;
    769 
    770   *sEsc = *tEsc = *s = *t = 0;
    771   if(error == TRUE) {
    772     log_err("Difference between expected and generated order. Run test with -v for more info\n");
    773   } else if(VERBOSITY == 0) {
    774     return;
    775   }
    776   for(i = 0; i<sLen; i++) {
    777     sprintf(b, "%04X", source[i]);
    778     strcat(sEsc, "\\u");
    779     strcat(sEsc, b);
    780     strcat(s, b);
    781     strcat(s, " ");
    782     if(source[i] < 0x80) {
    783       sprintf(b, "(%c)", source[i]);
    784       strcat(sEsc, b);
    785     }
    786   }
    787   for(i = 0; i<tLen; i++) {
    788     sprintf(b, "%04X", target[i]);
    789     strcat(tEsc, "\\u");
    790     strcat(tEsc, b);
    791     strcat(t, b);
    792     strcat(t, " ");
    793     if(target[i] < 0x80) {
    794       sprintf(b, "(%c)", target[i]);
    795       strcat(tEsc, b);
    796     }
    797   }
    798 /*
    799   strcpy(output, "[[ ");
    800   strcat(output, sEsc);
    801   strcat(output, getRelationSymbol(expRes, expStrength, relation));
    802   strcat(output, tEsc);
    803 
    804   strcat(output, " : ");
    805 
    806   strcat(output, sEsc);
    807   strcat(output, getRelationSymbol(realRes, realStrength, relation));
    808   strcat(output, tEsc);
    809   strcat(output, " ]] ");
    810 
    811   log_verbose("%s", output);
    812 */
    813 
    814 
    815   strcpy(output, "DIFF: ");
    816 
    817   strcat(output, s);
    818   strcat(output, " : ");
    819   strcat(output, t);
    820 
    821   strcat(output, test);
    822   strcat(output, ": ");
    823 
    824   strcat(output, sEsc);
    825   strcat(output, getRelationSymbol(expRes, expStrength, relation));
    826   strcat(output, tEsc);
    827 
    828   strcat(output, " ");
    829 
    830   strcat(output, platform);
    831   strcat(output, ": ");
    832 
    833   strcat(output, sEsc);
    834   strcat(output, getRelationSymbol(realRes, realStrength, relation));
    835   strcat(output, tEsc);
    836 
    837   outputLength = (int32_t)strlen(output);
    838   if(outputLength > maxOutputLength) {
    839     maxOutputLength = outputLength;
    840     U_ASSERT(outputLength < sizeof(output));
    841   }
    842 
    843   log_verbose("%s\n", output);
    844 
    845 }
    846 
    847 /*
    848 static void printOutRules(const UChar *rules) {
    849   uint32_t len = u_strlen(rules);
    850   uint32_t i = 0;
    851   char toPrint;
    852   uint32_t line = 0;
    853 
    854   fprintf(stdout, "Rules:");
    855 
    856   for(i = 0; i<len; i++) {
    857     if(rules[i]<0x7f && rules[i]>=0x20) {
    858       toPrint = (char)rules[i];
    859       if(toPrint == '&') {
    860         line = 1;
    861         fprintf(stdout, "\n&");
    862       } else if(toPrint == ';') {
    863         fprintf(stdout, "<<");
    864         line+=2;
    865       } else if(toPrint == ',') {
    866         fprintf(stdout, "<<<");
    867         line+=3;
    868       } else {
    869         fprintf(stdout, "%c", toPrint);
    870         line++;
    871       }
    872     } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
    873       fprintf(stdout, "\\u%04X", rules[i]);
    874       line+=6;
    875     }
    876     if(line>72) {
    877       fprintf(stdout, "\n");
    878       line = 0;
    879     }
    880   }
    881 
    882   log_verbose("\n");
    883 
    884 }
    885 */
    886 
    887 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
    888   uint32_t diffs = 0;
    889   UCollationResult realResult;
    890   uint32_t realStrength;
    891 
    892   uint32_t sLen = u_strlen(first);
    893   uint32_t tLen = u_strlen(second);
    894 
    895   realResult = func(collator, opts, first, sLen, second, tLen);
    896   realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
    897 
    898   if(strength == UCOL_IDENTICAL && realResult != UCOL_IDENTICAL) {
    899     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
    900     diffs++;
    901   } else if(realResult != UCOL_LESS || realStrength != strength) {
    902     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
    903     diffs++;
    904   }
    905   return diffs;
    906 }
    907 
    908 
    909 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
    910   const UChar *rules = NULL, *current = NULL;
    911   int32_t ruleLen = 0;
    912   uint32_t strength = 0;
    913   uint32_t chOffset = 0; uint32_t chLen = 0;
    914   uint32_t exOffset = 0; uint32_t exLen = 0;
    915   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
    916 /*  uint32_t rExpsLen = 0; */
    917   uint32_t firstLen = 0, secondLen = 0;
    918   UBool varT = FALSE; UBool top_ = TRUE;
    919   uint16_t specs = 0;
    920   UBool startOfRules = TRUE;
    921   UColTokenParser src;
    922   UColOptionSet opts;
    923 
    924   UChar first[256];
    925   UChar second[256];
    926   UChar *rulesCopy = NULL;
    927 
    928   uint32_t UCAdiff = 0;
    929   uint32_t Windiff = 1;
    930   UParseError parseError;
    931 
    932   src.opts = &opts;
    933 
    934   rules = ucol_getRules(coll, &ruleLen);
    935 
    936   /*printOutRules(rules);*/
    937 
    938   if(U_SUCCESS(*status) && ruleLen > 0) {
    939     rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
    940     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
    941     src.current = src.source = rulesCopy;
    942     src.end = rulesCopy+ruleLen;
    943     src.extraCurrent = src.end;
    944     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
    945     *first = *second = 0;
    946 
    947     while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
    948       strength = src.parsedToken.strength;
    949       chOffset = src.parsedToken.charsOffset;
    950       chLen = src.parsedToken.charsLen;
    951       exOffset = src.parsedToken.extensionOffset;
    952       exLen = src.parsedToken.extensionLen;
    953       prefixOffset = src.parsedToken.prefixOffset;
    954       prefixLen = src.parsedToken.prefixLen;
    955       specs = src.parsedToken.flags;
    956 
    957       startOfRules = FALSE;
    958       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
    959       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
    960 
    961       u_strncpy(second,rulesCopy+chOffset, chLen);
    962       second[chLen] = 0;
    963       secondLen = chLen;
    964 
    965       if(exLen > 0) {
    966         u_strncat(first, rulesCopy+exOffset, exLen);
    967         first[firstLen+exLen] = 0;
    968         firstLen += exLen;
    969       }
    970 
    971       if(strength != UCOL_TOK_RESET) {
    972         if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
    973           UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
    974           /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
    975         }
    976       }
    977 
    978 
    979       firstLen = chLen;
    980       u_strcpy(first, second);
    981 
    982     }
    983     if(UCAdiff != 0 && Windiff != 0) {
    984       log_verbose("\n");
    985     }
    986     if(UCAdiff == 0) {
    987       log_verbose("No immediate difference with %s!\n", refName);
    988     }
    989     if(Windiff == 0) {
    990       log_verbose("No immediate difference with Win32!\n");
    991     }
    992     free(rulesCopy);
    993   }
    994 }
    995 
    996 /*
    997  * Takes two CEs (lead and continuation) and
    998  * compares them as CEs should be compared:
    999  * primary vs. primary, secondary vs. secondary
   1000  * tertiary vs. tertiary
   1001  */
   1002 static int32_t compareCEs(uint32_t s1, uint32_t s2,
   1003                    uint32_t t1, uint32_t t2) {
   1004   uint32_t s = 0, t = 0;
   1005   if(s1 == t1 && s2 == t2) {
   1006     return 0;
   1007   }
   1008   s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
   1009   t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
   1010   if(s < t) {
   1011     return -1;
   1012   } else if(s > t) {
   1013     return 1;
   1014   } else {
   1015     s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
   1016     t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
   1017     if(s < t) {
   1018       return -1;
   1019     } else if(s > t) {
   1020       return 1;
   1021     } else {
   1022       s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
   1023       t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
   1024       if(s < t) {
   1025         return -1;
   1026       } else {
   1027         return 1;
   1028       }
   1029     }
   1030   }
   1031 }
   1032 
   1033 typedef struct {
   1034   uint32_t startCE;
   1035   uint32_t startContCE;
   1036   uint32_t limitCE;
   1037   uint32_t limitContCE;
   1038 } indirectBoundaries;
   1039 
   1040 /* these values are used for finding CE values for indirect positioning. */
   1041 /* Indirect positioning is a mechanism for allowing resets on symbolic   */
   1042 /* values. It only works for resets and you cannot tailor indirect names */
   1043 /* An indirect name can define either an anchor point or a range. An     */
   1044 /* anchor point behaves in exactly the same way as a code point in reset */
   1045 /* would, except that it cannot be tailored. A range (we currently only  */
   1046 /* know for the [top] range will explicitly set the upper bound for      */
   1047 /* generated CEs, thus allowing for better control over how many CEs can */
   1048 /* be squeezed between in the range without performance penalty.         */
   1049 /* In that respect, we use [top] for tailoring of locales that use CJK   */
   1050 /* characters. Other indirect values are currently a pure convenience,   */
   1051 /* they can be used to assure that the CEs will be always positioned in  */
   1052 /* the same place relative to a point with known properties (e.g. first  */
   1053 /* primary ignorable). */
   1054 static indirectBoundaries ucolIndirectBoundaries[15];
   1055 static UBool indirectBoundariesSet = FALSE;
   1056 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
   1057     /* Set values for the top - TODO: once we have values for all the indirects, we are going */
   1058     /* to initalize here. */
   1059     ucolIndirectBoundaries[indexR].startCE = start[0];
   1060     ucolIndirectBoundaries[indexR].startContCE = start[1];
   1061     if(end) {
   1062         ucolIndirectBoundaries[indexR].limitCE = end[0];
   1063         ucolIndirectBoundaries[indexR].limitContCE = end[1];
   1064     } else {
   1065         ucolIndirectBoundaries[indexR].limitCE = 0;
   1066         ucolIndirectBoundaries[indexR].limitContCE = 0;
   1067     }
   1068 }
   1069 
   1070 static void testCEs(UCollator *coll, UErrorCode *status) {
   1071     const UChar *rules = NULL, *current = NULL;
   1072     int32_t ruleLen = 0;
   1073 
   1074     uint32_t strength = 0;
   1075     uint32_t maxStrength = UCOL_IDENTICAL;
   1076     uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
   1077     uint32_t lastCE;
   1078     uint32_t lastContCE;
   1079 
   1080     int32_t result = 0;
   1081     uint32_t chOffset = 0; uint32_t chLen = 0;
   1082     uint32_t exOffset = 0; uint32_t exLen = 0;
   1083     uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
   1084     uint32_t oldOffset = 0;
   1085 
   1086     /* uint32_t rExpsLen = 0; */
   1087     /* uint32_t firstLen = 0; */
   1088     uint16_t specs = 0;
   1089     UBool varT = FALSE; UBool top_ = TRUE;
   1090     UBool startOfRules = TRUE;
   1091     UBool before = FALSE;
   1092     UColTokenParser src;
   1093     UColOptionSet opts;
   1094     UParseError parseError;
   1095     UChar *rulesCopy = NULL;
   1096     collIterate c;
   1097     UCAConstants *consts = NULL;
   1098     uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
   1099         UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
   1100     const char *colLoc;
   1101     UCollator *UCA = ucol_open("root", status);
   1102 
   1103     if (U_FAILURE(*status)) {
   1104         log_err("Could not open root collator %s\n", u_errorName(*status));
   1105         return;
   1106     }
   1107 
   1108     colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
   1109     if (U_FAILURE(*status)) {
   1110         log_err("Could not get collator name: %s\n", u_errorName(*status));
   1111         return;
   1112     }
   1113 
   1114     consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
   1115     UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
   1116     /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
   1117     UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
   1118     UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
   1119 
   1120     baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
   1121 
   1122     src.opts = &opts;
   1123 
   1124     rules = ucol_getRules(coll, &ruleLen);
   1125 
   1126     src.invUCA = ucol_initInverseUCA(status);
   1127 
   1128     if(indirectBoundariesSet == FALSE) {
   1129         /* UCOL_RESET_TOP_VALUE */
   1130         setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
   1131         /* UCOL_FIRST_PRIMARY_IGNORABLE */
   1132         setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
   1133         /* UCOL_LAST_PRIMARY_IGNORABLE */
   1134         setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
   1135         /* UCOL_FIRST_SECONDARY_IGNORABLE */
   1136         setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
   1137         /* UCOL_LAST_SECONDARY_IGNORABLE */
   1138         setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
   1139         /* UCOL_FIRST_TERTIARY_IGNORABLE */
   1140         setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
   1141         /* UCOL_LAST_TERTIARY_IGNORABLE */
   1142         setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
   1143         /* UCOL_FIRST_VARIABLE */
   1144         setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
   1145         /* UCOL_LAST_VARIABLE */
   1146         setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
   1147         /* UCOL_FIRST_NON_VARIABLE */
   1148         setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
   1149         /* UCOL_LAST_NON_VARIABLE */
   1150         setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
   1151         /* UCOL_FIRST_IMPLICIT */
   1152         setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
   1153         /* UCOL_LAST_IMPLICIT */
   1154         setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
   1155         /* UCOL_FIRST_TRAILING */
   1156         setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
   1157         /* UCOL_LAST_TRAILING */
   1158         setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
   1159         ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
   1160         indirectBoundariesSet = TRUE;
   1161     }
   1162 
   1163 
   1164     if(U_SUCCESS(*status) && ruleLen > 0) {
   1165         rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
   1166         uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
   1167         src.current = src.source = rulesCopy;
   1168         src.end = rulesCopy+ruleLen;
   1169         src.extraCurrent = src.end;
   1170         src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   1171 
   1172         while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
   1173             strength = src.parsedToken.strength;
   1174             chOffset = src.parsedToken.charsOffset;
   1175             chLen = src.parsedToken.charsLen;
   1176             exOffset = src.parsedToken.extensionOffset;
   1177             exLen = src.parsedToken.extensionLen;
   1178             prefixOffset = src.parsedToken.prefixOffset;
   1179             prefixLen = src.parsedToken.prefixLen;
   1180             specs = src.parsedToken.flags;
   1181 
   1182             startOfRules = FALSE;
   1183             varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
   1184             top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
   1185 
   1186             uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, &c);
   1187 
   1188             currCE = ucol_getNextCE(coll, &c, status);
   1189             if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(rulesCopy+chOffset))) {
   1190                 log_verbose("Thai prevowel detected. Will pick next CE\n");
   1191                 currCE = ucol_getNextCE(coll, &c, status);
   1192             }
   1193 
   1194             currContCE = ucol_getNextCE(coll, &c, status);
   1195             if(!isContinuation(currContCE)) {
   1196                 currContCE = 0;
   1197             }
   1198 
   1199             /* we need to repack CEs here */
   1200 
   1201             if(strength == UCOL_TOK_RESET) {
   1202                 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
   1203                 if(top_ == TRUE) {
   1204                     int32_t index = src.parsedToken.indirectIndex;
   1205 
   1206                     nextCE = baseCE = currCE = ucolIndirectBoundaries[index].startCE;
   1207                     nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[index].startContCE;
   1208                 } else {
   1209                     nextCE = baseCE = currCE;
   1210                     nextContCE = baseContCE = currContCE;
   1211                 }
   1212                 maxStrength = UCOL_IDENTICAL;
   1213             } else {
   1214                 if(strength < maxStrength) {
   1215                     maxStrength = strength;
   1216                     if(baseCE == UCOL_RESET_TOP_VALUE) {
   1217                         log_verbose("Resetting to [top]\n");
   1218                         nextCE = UCOL_NEXT_TOP_VALUE;
   1219                         nextContCE = UCOL_NEXT_TOP_CONT;
   1220                     } else {
   1221                         result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
   1222                     }
   1223                     if(result < 0) {
   1224                         if(ucol_isTailored(coll, *(rulesCopy+oldOffset), status)) {
   1225                             log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(rulesCopy+oldOffset));
   1226                             return;
   1227                         } else {
   1228                             log_err("%s: couldn't find the CE\n", colLoc);
   1229                             return;
   1230                         }
   1231                     }
   1232                 }
   1233 
   1234                 currCE &= 0xFFFFFF3F;
   1235                 currContCE &= 0xFFFFFFBF;
   1236 
   1237                 if(maxStrength == UCOL_IDENTICAL) {
   1238                     if(baseCE != currCE || baseContCE != currContCE) {
   1239                         log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
   1240                     }
   1241                 } else {
   1242                     if(strength == UCOL_IDENTICAL) {
   1243                         if(lastCE != currCE || lastContCE != currContCE) {
   1244                             log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
   1245                         }
   1246                     } else {
   1247                         if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
   1248                             /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
   1249                             log_err("%s: current CE is not less than base CE\n", colLoc);
   1250                         }
   1251                         if(!before) {
   1252                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
   1253                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
   1254                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
   1255                             }
   1256                         } else {
   1257                             before = FALSE;
   1258                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
   1259                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
   1260                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
   1261                             }
   1262                         }
   1263                     }
   1264                 }
   1265 
   1266             }
   1267 
   1268             oldOffset = chOffset;
   1269             lastCE = currCE & 0xFFFFFF3F;
   1270             lastContCE = currContCE & 0xFFFFFFBF;
   1271         }
   1272         free(rulesCopy);
   1273     }
   1274     ucol_close(UCA);
   1275 }
   1276 
   1277 #if 0
   1278 /* these locales are now picked from index RB */
   1279 static const char* localesToTest[] = {
   1280 "ar", "bg", "ca", "cs", "da",
   1281 "el", "en_BE", "en_US_POSIX",
   1282 "es", "et", "fi", "fr", "hi",
   1283 "hr", "hu", "is", "iw", "ja",
   1284 "ko", "lt", "lv", "mk", "mt",
   1285 "nb", "nn", "nn_NO", "pl", "ro",
   1286 "ru", "sh", "sk", "sl", "sq",
   1287 "sr", "sv", "th", "tr", "uk",
   1288 "vi", "zh", "zh_TW"
   1289 };
   1290 #endif
   1291 
   1292 static const char* rulesToTest[] = {
   1293   /* Funky fa rule */
   1294   "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
   1295   /*"& Z < p, P",*/
   1296     /* Cui Mins rules */
   1297     "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
   1298     "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
   1299     "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
   1300     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
   1301     "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
   1302     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
   1303     "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U"  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
   1304 };
   1305 
   1306 
   1307 static void TestCollations(void) {
   1308     int32_t noOfLoc = uloc_countAvailable();
   1309     int32_t i = 0, j = 0;
   1310 
   1311     UErrorCode status = U_ZERO_ERROR;
   1312     char cName[256];
   1313     UChar name[256];
   1314     int32_t nameSize;
   1315 
   1316 
   1317     const char *locName = NULL;
   1318     UCollator *coll = NULL;
   1319     UCollator *UCA = ucol_open("", &status);
   1320     UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
   1321     if (U_FAILURE(status)) {
   1322         log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
   1323         return;
   1324     }
   1325     ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
   1326 
   1327     for(i = 0; i<noOfLoc; i++) {
   1328         status = U_ZERO_ERROR;
   1329         locName = uloc_getAvailable(i);
   1330         if(uprv_strcmp("ja", locName) == 0) {
   1331             log_verbose("Don't know how to test prefixes\n");
   1332             continue;
   1333         }
   1334         if(hasCollationElements(locName)) {
   1335             nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
   1336             for(j = 0; j<nameSize; j++) {
   1337                 cName[j] = (char)name[j];
   1338             }
   1339             cName[nameSize] = 0;
   1340             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
   1341             coll = ucol_open(locName, &status);
   1342             if(U_SUCCESS(status)) {
   1343                 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
   1344                 ucol_close(coll);
   1345             } else {
   1346                 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
   1347                 status = U_ZERO_ERROR;
   1348             }
   1349         }
   1350     }
   1351     ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
   1352     ucol_close(UCA);
   1353 }
   1354 
   1355 static void RamsRulesTest(void) {
   1356     UErrorCode status = U_ZERO_ERROR;
   1357     int32_t i = 0;
   1358     UCollator *coll = NULL;
   1359     UChar rule[2048];
   1360     uint32_t ruleLen;
   1361     int32_t noOfLoc = uloc_countAvailable();
   1362     const char *locName = NULL;
   1363 
   1364     log_verbose("RamsRulesTest\n");
   1365 
   1366     for(i = 0; i<noOfLoc; i++) {
   1367         status = U_ZERO_ERROR;
   1368         locName = uloc_getAvailable(i);
   1369         if(hasCollationElements(locName)) {
   1370             if (uprv_strcmp("ja", locName)==0) {
   1371                 log_verbose("Don't know how to test Japanese because of prefixes\n");
   1372                 continue;
   1373             }
   1374             if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
   1375                 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
   1376                 continue;
   1377             }
   1378             if (uprv_strcmp("km", locName)==0 ||
   1379                 uprv_strcmp("km_KH", locName)==0 ||
   1380                 uprv_strcmp("si", locName)==0 ||
   1381                 uprv_strcmp("si_LK", locName)==0 ||
   1382                 uprv_strcmp("zh", locName)==0 ||
   1383                 uprv_strcmp("zh_Hant", locName)==0 ) {
   1384                     continue;  /* TODO: enable these locale tests after trac#6040 is fixed. */
   1385             }
   1386             log_verbose("Testing locale %s\n", locName);
   1387             coll = ucol_open(locName, &status);
   1388             if(U_SUCCESS(status)) {
   1389                 if(coll->image->jamoSpecial == TRUE) {
   1390                     log_err("%s has special JAMOs\n", locName);
   1391                 }
   1392                 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
   1393                 testCollator(coll, &status);
   1394                 testCEs(coll, &status);
   1395                 ucol_close(coll);
   1396             }
   1397         }
   1398     }
   1399 
   1400     for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
   1401         log_verbose("Testing rule: %s\n", rulesToTest[i]);
   1402         ruleLen = u_unescape(rulesToTest[i], rule, 2048);
   1403         coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   1404         if(U_SUCCESS(status)) {
   1405             testCollator(coll, &status);
   1406             testCEs(coll, &status);
   1407             ucol_close(coll);
   1408         }
   1409     }
   1410 
   1411 }
   1412 
   1413 static void IsTailoredTest(void) {
   1414     UErrorCode status = U_ZERO_ERROR;
   1415     uint32_t i = 0;
   1416     UCollator *coll = NULL;
   1417     UChar rule[2048];
   1418     UChar tailored[2048];
   1419     UChar notTailored[2048];
   1420     uint32_t ruleLen, tailoredLen, notTailoredLen;
   1421 
   1422     log_verbose("IsTailoredTest\n");
   1423 
   1424     u_uastrcpy(rule, "&Z < A, B, C;c < d");
   1425     ruleLen = u_strlen(rule);
   1426 
   1427     u_uastrcpy(tailored, "ABCcd");
   1428     tailoredLen = u_strlen(tailored);
   1429 
   1430     u_uastrcpy(notTailored, "ZabD");
   1431     notTailoredLen = u_strlen(notTailored);
   1432 
   1433     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   1434     if(U_SUCCESS(status)) {
   1435         for(i = 0; i<tailoredLen; i++) {
   1436             if(!ucol_isTailored(coll, tailored[i], &status)) {
   1437                 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
   1438             }
   1439         }
   1440         for(i = 0; i<notTailoredLen; i++) {
   1441             if(ucol_isTailored(coll, notTailored[i], &status)) {
   1442                 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
   1443             }
   1444         }
   1445         ucol_close(coll);
   1446     }
   1447     else {
   1448         log_err_status(status, "Can't tailor rules\n");
   1449     }
   1450     /* Code coverage */
   1451     status = U_ZERO_ERROR;
   1452     coll = ucol_open("ja", &status);
   1453     if(!ucol_isTailored(coll, 0x4E9C, &status)) {
   1454         log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
   1455     }
   1456     ucol_close(coll);
   1457 }
   1458 
   1459 
   1460 const static char chTest[][20] = {
   1461   "c",
   1462   "C",
   1463   "ca", "cb", "cx", "cy", "CZ",
   1464   "c\\u030C", "C\\u030C",
   1465   "h",
   1466   "H",
   1467   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
   1468   "ch", "cH", "Ch", "CH",
   1469   "cha", "charly", "che", "chh", "chch", "chr",
   1470   "i", "I", "iarly",
   1471   "r", "R",
   1472   "r\\u030C", "R\\u030C",
   1473   "s",
   1474   "S",
   1475   "s\\u030C", "S\\u030C",
   1476   "z", "Z",
   1477   "z\\u030C", "Z\\u030C"
   1478 };
   1479 
   1480 static void TestChMove(void) {
   1481     UChar t1[256] = {0};
   1482     UChar t2[256] = {0};
   1483 
   1484     uint32_t i = 0, j = 0;
   1485     uint32_t size = 0;
   1486     UErrorCode status = U_ZERO_ERROR;
   1487 
   1488     UCollator *coll = ucol_open("cs", &status);
   1489 
   1490     if(U_SUCCESS(status)) {
   1491         size = sizeof(chTest)/sizeof(chTest[0]);
   1492         for(i = 0; i < size-1; i++) {
   1493             for(j = i+1; j < size; j++) {
   1494                 u_unescape(chTest[i], t1, 256);
   1495                 u_unescape(chTest[j], t2, 256);
   1496                 doTest(coll, t1, t2, UCOL_LESS);
   1497             }
   1498         }
   1499     }
   1500     else {
   1501         log_err("Can't open collator");
   1502     }
   1503     ucol_close(coll);
   1504 }
   1505 
   1506 
   1507 
   1508 
   1509 const static char impTest[][20] = {
   1510   "\\u4e00",
   1511     "a",
   1512     "A",
   1513     "b",
   1514     "B",
   1515     "\\u4e01"
   1516 };
   1517 
   1518 
   1519 static void TestImplicitTailoring(void) {
   1520   static const struct {
   1521     const char *rules;
   1522     const char *data[10];
   1523     const uint32_t len;
   1524   } tests[] = {
   1525       { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
   1526       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
   1527       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
   1528       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
   1529   };
   1530 
   1531   int32_t i = 0;
   1532 
   1533   for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   1534       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   1535   }
   1536 
   1537 /*
   1538   UChar t1[256] = {0};
   1539   UChar t2[256] = {0};
   1540 
   1541   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
   1542 
   1543   uint32_t i = 0, j = 0;
   1544   uint32_t size = 0;
   1545   uint32_t ruleLen = 0;
   1546   UErrorCode status = U_ZERO_ERROR;
   1547   UCollator *coll = NULL;
   1548   ruleLen = u_unescape(rule, t1, 256);
   1549 
   1550   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
   1551 
   1552   if(U_SUCCESS(status)) {
   1553     size = sizeof(impTest)/sizeof(impTest[0]);
   1554     for(i = 0; i < size-1; i++) {
   1555       for(j = i+1; j < size; j++) {
   1556         u_unescape(impTest[i], t1, 256);
   1557         u_unescape(impTest[j], t2, 256);
   1558         doTest(coll, t1, t2, UCOL_LESS);
   1559       }
   1560     }
   1561   }
   1562   else {
   1563     log_err("Can't open collator");
   1564   }
   1565   ucol_close(coll);
   1566   */
   1567 }
   1568 
   1569 static void TestFCDProblem(void) {
   1570   UChar t1[256] = {0};
   1571   UChar t2[256] = {0};
   1572 
   1573   const char *s1 = "\\u0430\\u0306\\u0325";
   1574   const char *s2 = "\\u04D1\\u0325";
   1575 
   1576   UErrorCode status = U_ZERO_ERROR;
   1577   UCollator *coll = ucol_open("", &status);
   1578   u_unescape(s1, t1, 256);
   1579   u_unescape(s2, t2, 256);
   1580 
   1581   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
   1582   doTest(coll, t1, t2, UCOL_EQUAL);
   1583 
   1584   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   1585   doTest(coll, t1, t2, UCOL_EQUAL);
   1586 
   1587   ucol_close(coll);
   1588 }
   1589 
   1590 /*
   1591 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
   1592 We're only using NFC/NFD in this test.
   1593 */
   1594 #define NORM_BUFFER_TEST_LEN 18
   1595 typedef struct {
   1596   UChar32 u;
   1597   UChar NFC[NORM_BUFFER_TEST_LEN];
   1598   UChar NFD[NORM_BUFFER_TEST_LEN];
   1599 } tester;
   1600 
   1601 static void TestComposeDecompose(void) {
   1602     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
   1603     static const UChar UNICODESET_STR[] = {
   1604         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
   1605         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
   1606         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
   1607     };
   1608     int32_t noOfLoc;
   1609     int32_t i = 0, j = 0;
   1610 
   1611     UErrorCode status = U_ZERO_ERROR;
   1612     const char *locName = NULL;
   1613     uint32_t nfcSize;
   1614     uint32_t nfdSize;
   1615     tester **t;
   1616     uint32_t noCases = 0;
   1617     UCollator *coll = NULL;
   1618     UChar32 u = 0;
   1619     UChar comp[NORM_BUFFER_TEST_LEN];
   1620     uint32_t len = 0;
   1621     UCollationElements *iter;
   1622     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
   1623     int32_t charsToTestSize;
   1624 
   1625     noOfLoc = uloc_countAvailable();
   1626 
   1627     coll = ucol_open("", &status);
   1628     if (U_FAILURE(status)) {
   1629         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
   1630         return;
   1631     }
   1632     charsToTestSize = uset_size(charsToTest);
   1633     if (charsToTestSize <= 0) {
   1634         log_err("Set was zero. Missing data?\n");
   1635         return;
   1636     }
   1637     t = malloc(charsToTestSize * sizeof(tester *));
   1638     t[0] = (tester *)malloc(sizeof(tester));
   1639     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
   1640 
   1641     for(u = 0; u < charsToTestSize; u++) {
   1642         UChar32 ch = uset_charAt(charsToTest, u);
   1643         len = 0;
   1644         UTF_APPEND_CHAR_UNSAFE(comp, len, ch);
   1645         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
   1646         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
   1647 
   1648         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
   1649           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
   1650             t[noCases]->u = ch;
   1651             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
   1652                 u_strncpy(t[noCases]->NFC, comp, len);
   1653                 t[noCases]->NFC[len] = 0;
   1654             }
   1655             noCases++;
   1656             t[noCases] = (tester *)malloc(sizeof(tester));
   1657             uprv_memset(t[noCases], 0, sizeof(tester));
   1658         }
   1659     }
   1660     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
   1661     uset_close(charsToTest);
   1662     charsToTest = NULL;
   1663 
   1664     for(u=0; u<(UChar32)noCases; u++) {
   1665         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
   1666             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
   1667             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
   1668         }
   1669     }
   1670     /*
   1671     for(u = 0; u < charsToTestSize; u++) {
   1672       if(!(u&0xFFFF)) {
   1673         log_verbose("%08X ", u);
   1674       }
   1675       uprv_memset(t[noCases], 0, sizeof(tester));
   1676       t[noCases]->u = u;
   1677       len = 0;
   1678       UTF_APPEND_CHAR_UNSAFE(comp, len, u);
   1679       comp[len] = 0;
   1680       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
   1681       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
   1682       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
   1683       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
   1684     }
   1685     */
   1686 
   1687     ucol_close(coll);
   1688 
   1689     log_verbose("Testing locales, number of cases = %i\n", noCases);
   1690     for(i = 0; i<noOfLoc; i++) {
   1691         status = U_ZERO_ERROR;
   1692         locName = uloc_getAvailable(i);
   1693         if(hasCollationElements(locName)) {
   1694             char cName[256];
   1695             UChar name[256];
   1696             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
   1697 
   1698             for(j = 0; j<nameSize; j++) {
   1699                 cName[j] = (char)name[j];
   1700             }
   1701             cName[nameSize] = 0;
   1702             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
   1703 
   1704             coll = ucol_open(locName, &status);
   1705             ucol_setStrength(coll, UCOL_IDENTICAL);
   1706             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
   1707 
   1708             for(u=0; u<(UChar32)noCases; u++) {
   1709                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
   1710                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
   1711                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
   1712                     log_verbose("Testing NFC\n");
   1713                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
   1714                     backAndForth(iter);
   1715                     log_verbose("Testing NFD\n");
   1716                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
   1717                     backAndForth(iter);
   1718                 }
   1719             }
   1720             ucol_closeElements(iter);
   1721             ucol_close(coll);
   1722         }
   1723     }
   1724     for(u = 0; u <= (UChar32)noCases; u++) {
   1725         free(t[u]);
   1726     }
   1727     free(t);
   1728 }
   1729 
   1730 static void TestEmptyRule(void) {
   1731   UErrorCode status = U_ZERO_ERROR;
   1732   UChar rulez[] = { 0 };
   1733   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
   1734 
   1735   ucol_close(coll);
   1736 }
   1737 
   1738 static void TestUCARules(void) {
   1739   UErrorCode status = U_ZERO_ERROR;
   1740   UChar b[256];
   1741   UChar *rules = b;
   1742   uint32_t ruleLen = 0;
   1743   UCollator *UCAfromRules = NULL;
   1744   UCollator *coll = ucol_open("", &status);
   1745   if(status == U_FILE_ACCESS_ERROR) {
   1746     log_data_err("Is your data around?\n");
   1747     return;
   1748   } else if(U_FAILURE(status)) {
   1749     log_err("Error opening collator\n");
   1750     return;
   1751   }
   1752   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
   1753 
   1754   log_verbose("TestUCARules\n");
   1755   if(ruleLen > 256) {
   1756     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
   1757     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
   1758   }
   1759   log_verbose("Rules length is %d\n", ruleLen);
   1760   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   1761   if(U_SUCCESS(status)) {
   1762     ucol_close(UCAfromRules);
   1763   } else {
   1764     log_verbose("Unable to create a collator from UCARules!\n");
   1765   }
   1766 /*
   1767   u_unescape(blah, b, 256);
   1768   ucol_getSortKey(coll, b, 1, res, 256);
   1769 */
   1770   ucol_close(coll);
   1771   if(rules != b) {
   1772     free(rules);
   1773   }
   1774 }
   1775 
   1776 
   1777 /* Pinyin tonal order */
   1778 /*
   1779     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
   1780           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
   1781     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
   1782     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
   1783     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
   1784     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
   1785       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
   1786 .. (\u00fc)
   1787 
   1788 However, in testing we got the following order:
   1789     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
   1790           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
   1791     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
   1792 .. (\u0113)
   1793     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
   1794     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
   1795     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
   1796 .. (\u01d8)
   1797       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
   1798 */
   1799 
   1800 static void TestBefore(void) {
   1801   const static char *data[] = {
   1802       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
   1803       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
   1804       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
   1805       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
   1806       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
   1807       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
   1808   };
   1809   genericRulesStarter(
   1810     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
   1811     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
   1812     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
   1813     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
   1814     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
   1815     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
   1816     data, sizeof(data)/sizeof(data[0]));
   1817 }
   1818 
   1819 #if 0
   1820 /* superceded by TestBeforePinyin */
   1821 static void TestJ784(void) {
   1822   const static char *data[] = {
   1823       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
   1824       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
   1825       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
   1826       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
   1827       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
   1828       "\\u00fc",
   1829            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
   1830   };
   1831   genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
   1832 }
   1833 #endif
   1834 
   1835 #if 0
   1836 /* superceded by the changes to the lv locale */
   1837 static void TestJ831(void) {
   1838   const static char *data[] = {
   1839     "I",
   1840       "i",
   1841       "Y",
   1842       "y"
   1843   };
   1844   genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
   1845 }
   1846 #endif
   1847 
   1848 static void TestJ815(void) {
   1849   const static char *data[] = {
   1850     "aa",
   1851       "Aa",
   1852       "ab",
   1853       "Ab",
   1854       "ad",
   1855       "Ad",
   1856       "ae",
   1857       "Ae",
   1858       "\\u00e6",
   1859       "\\u00c6",
   1860       "af",
   1861       "Af",
   1862       "b",
   1863       "B"
   1864   };
   1865   genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
   1866   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
   1867 }
   1868 
   1869 
   1870 /*
   1871 "& a < b < c < d& r < c",                                   "& a < b < d& r < c",
   1872 "& a < b < c < d& c < m",                                   "& a < b < c < m < d",
   1873 "& a < b < c < d& a < m",                                   "& a < m < b < c < d",
   1874 "& a <<< b << c < d& a < m",                                "& a <<< b << c < m < d",
   1875 "& a < b < c < d& [before 1] c < m",                        "& a < b < m < c < d",
   1876 "& a < b <<< c << d <<< e& [before 3] e <<< x",            "& a < b <<< c << d <<< x <<< e",
   1877 "& a < b <<< c << d <<< e& [before 2] e <<< x",            "& a < b <<< c <<< x << d <<< e",
   1878 "& a < b <<< c << d <<< e& [before 1] e <<< x",            "& a <<< x < b <<< c << d <<< e",
   1879 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",    "& a < b <<< c << d <<< e <<< f < x < g",
   1880 */
   1881 static void TestRedundantRules(void) {
   1882   int32_t i;
   1883 
   1884   static const struct {
   1885       const char *rules;
   1886       const char *expectedRules;
   1887       const char *testdata[8];
   1888       uint32_t testdatalen;
   1889   } tests[] = {
   1890     /* this test conflicts with positioning of CODAN placeholder */
   1891        /*{
   1892         "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
   1893         "&\\u2089<<<x",
   1894         {"\\u2089", "x"}, 2
   1895        }, */
   1896     /* this test conflicts with the [before x] syntax tightening */
   1897       /*{
   1898         "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
   1899         "&\\u0252<<<x",
   1900         {"\\u0252", "x"}, 2
   1901       }, */
   1902     /* this test conflicts with the [before x] syntax tightening */
   1903       /*{
   1904          "& a < b <<< c << d <<< e& [before 1] e <<< x",
   1905          "& a <<< x < b <<< c << d <<< e",
   1906         {"a", "x", "b", "c", "d", "e"}, 6
   1907       }, */
   1908       {
   1909         "& a < b < c < d& [before 1] c < m",
   1910         "& a < b < m < c < d",
   1911         {"a", "b", "m", "c", "d"}, 5
   1912       },
   1913       {
   1914         "& a < b <<< c << d <<< e& [before 3] e <<< x",
   1915         "& a < b <<< c << d <<< x <<< e",
   1916         {"a", "b", "c", "d", "x", "e"}, 6
   1917       },
   1918     /* this test conflicts with the [before x] syntax tightening */
   1919       /* {
   1920         "& a < b <<< c << d <<< e& [before 2] e <<< x",
   1921         "& a < b <<< c <<< x << d <<< e",
   1922         {"a", "b", "c", "x", "d", "e"},, 6
   1923       }, */
   1924       {
   1925         "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
   1926         "& a < b <<< c << d <<< e <<< f < x < g",
   1927         {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
   1928       },
   1929       {
   1930         "& a <<< b << c < d& a < m",
   1931         "& a <<< b << c < m < d",
   1932         {"a", "b", "c", "m", "d"}, 5
   1933       },
   1934       {
   1935         "&a<b<<b\\u0301 &z<b",
   1936         "&a<b\\u0301 &z<b",
   1937         {"a", "b\\u0301", "z", "b"}, 4
   1938       },
   1939       {
   1940         "&z<m<<<q<<<m",
   1941         "&z<q<<<m",
   1942         {"z", "q", "m"},3
   1943       },
   1944       {
   1945         "&z<<<m<q<<<m",
   1946         "&z<q<<<m",
   1947         {"z", "q", "m"}, 3
   1948       },
   1949       {
   1950         "& a < b < c < d& r < c",
   1951         "& a < b < d& r < c",
   1952         {"a", "b", "d"}, 3
   1953       },
   1954       {
   1955         "& a < b < c < d& r < c",
   1956         "& a < b < d& r < c",
   1957         {"r", "c"}, 2
   1958       },
   1959       {
   1960         "& a < b < c < d& c < m",
   1961         "& a < b < c < m < d",
   1962         {"a", "b", "c", "m", "d"}, 5
   1963       },
   1964       {
   1965         "& a < b < c < d& a < m",
   1966         "& a < m < b < c < d",
   1967         {"a", "m", "b", "c", "d"}, 5
   1968       }
   1969   };
   1970 
   1971 
   1972   UCollator *credundant = NULL;
   1973   UCollator *cresulting = NULL;
   1974   UErrorCode status = U_ZERO_ERROR;
   1975   UChar rlz[2048] = { 0 };
   1976   uint32_t rlen = 0;
   1977 
   1978   for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
   1979     log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
   1980     rlen = u_unescape(tests[i].rules, rlz, 2048);
   1981 
   1982     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
   1983     if(status == U_FILE_ACCESS_ERROR) {
   1984       log_data_err("Is your data around?\n");
   1985       return;
   1986     } else if(U_FAILURE(status)) {
   1987       log_err("Error opening collator\n");
   1988       return;
   1989     }
   1990 
   1991     rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
   1992     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
   1993 
   1994     testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
   1995 
   1996     ucol_close(credundant);
   1997     ucol_close(cresulting);
   1998 
   1999     log_verbose("testing using data\n");
   2000 
   2001     genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
   2002   }
   2003 
   2004 }
   2005 
   2006 static void TestExpansionSyntax(void) {
   2007   int32_t i;
   2008 
   2009   const static char *rules[] = {
   2010     "&AE <<< a << b <<< c &d <<< f",
   2011     "&AE <<< a <<< b << c << d < e < f <<< g",
   2012     "&AE <<< B <<< C / D <<< F"
   2013   };
   2014 
   2015   const static char *expectedRules[] = {
   2016     "&A <<< a / E << b / E <<< c /E  &d <<< f",
   2017     "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
   2018     "&A <<< B / E <<< C / ED <<< F / E"
   2019   };
   2020 
   2021   const static char *testdata[][8] = {
   2022     {"AE", "a", "b", "c"},
   2023     {"AE", "a", "b", "c", "d", "e", "f", "g"},
   2024     {"AE", "B", "C"} /* / ED <<< F / E"},*/
   2025   };
   2026 
   2027   const static uint32_t testdatalen[] = {
   2028       4,
   2029       8,
   2030       3
   2031   };
   2032 
   2033 
   2034 
   2035   UCollator *credundant = NULL;
   2036   UCollator *cresulting = NULL;
   2037   UErrorCode status = U_ZERO_ERROR;
   2038   UChar rlz[2048] = { 0 };
   2039   uint32_t rlen = 0;
   2040 
   2041   for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
   2042     log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
   2043     rlen = u_unescape(rules[i], rlz, 2048);
   2044 
   2045     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2046     if(status == U_FILE_ACCESS_ERROR) {
   2047       log_data_err("Is your data around?\n");
   2048       return;
   2049     } else if(U_FAILURE(status)) {
   2050       log_err("Error opening collator\n");
   2051       return;
   2052     }
   2053     rlen = u_unescape(expectedRules[i], rlz, 2048);
   2054     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
   2055 
   2056     /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
   2057     /* as a hard error test, but only in information mode */
   2058     testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
   2059 
   2060     ucol_close(credundant);
   2061     ucol_close(cresulting);
   2062 
   2063     log_verbose("testing using data\n");
   2064 
   2065     genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
   2066   }
   2067 }
   2068 
   2069 static void TestCase(void)
   2070 {
   2071     const static UChar gRules[MAX_TOKEN_LEN] =
   2072     /*" & 0 < 1,\u2461<a,A"*/
   2073     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
   2074 
   2075     const static UChar testCase[][MAX_TOKEN_LEN] =
   2076     {
   2077         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
   2078         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
   2079         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
   2080         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
   2081     };
   2082 
   2083     const static UCollationResult caseTestResults[][9] =
   2084     {
   2085         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
   2086         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
   2087         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
   2088         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
   2089     };
   2090 
   2091     const static UColAttributeValue caseTestAttributes[][2] =
   2092     {
   2093         { UCOL_LOWER_FIRST, UCOL_OFF},
   2094         { UCOL_UPPER_FIRST, UCOL_OFF},
   2095         { UCOL_LOWER_FIRST, UCOL_ON},
   2096         { UCOL_UPPER_FIRST, UCOL_ON}
   2097     };
   2098     int32_t i,j,k;
   2099     UErrorCode status = U_ZERO_ERROR;
   2100     UCollationElements *iter;
   2101     UCollator  *myCollation;
   2102     myCollation = ucol_open("en_US", &status);
   2103 
   2104     if(U_FAILURE(status)){
   2105         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   2106         return;
   2107     }
   2108     log_verbose("Testing different case settings\n");
   2109     ucol_setStrength(myCollation, UCOL_TERTIARY);
   2110 
   2111     for(k = 0; k<4; k++) {
   2112       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
   2113       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
   2114       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
   2115       for (i = 0; i < 3 ; i++) {
   2116         for(j = i+1; j<4; j++) {
   2117           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
   2118         }
   2119       }
   2120     }
   2121     ucol_close(myCollation);
   2122 
   2123     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
   2124     if(U_FAILURE(status)){
   2125         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   2126         return;
   2127     }
   2128     log_verbose("Testing different case settings with custom rules\n");
   2129     ucol_setStrength(myCollation, UCOL_TERTIARY);
   2130 
   2131     for(k = 0; k<4; k++) {
   2132       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
   2133       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
   2134       for (i = 0; i < 3 ; i++) {
   2135         for(j = i+1; j<4; j++) {
   2136           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
   2137           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
   2138           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
   2139           backAndForth(iter);
   2140           ucol_closeElements(iter);
   2141           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
   2142           backAndForth(iter);
   2143           ucol_closeElements(iter);
   2144         }
   2145       }
   2146     }
   2147     ucol_close(myCollation);
   2148     {
   2149       const static char *lowerFirst[] = {
   2150         "h",
   2151         "H",
   2152         "ch",
   2153         "Ch",
   2154         "CH",
   2155         "cha",
   2156         "chA",
   2157         "Cha",
   2158         "ChA",
   2159         "CHa",
   2160         "CHA",
   2161         "i",
   2162         "I"
   2163       };
   2164 
   2165       const static char *upperFirst[] = {
   2166         "H",
   2167         "h",
   2168         "CH",
   2169         "Ch",
   2170         "ch",
   2171         "CHA",
   2172         "CHa",
   2173         "ChA",
   2174         "Cha",
   2175         "chA",
   2176         "cha",
   2177         "I",
   2178         "i"
   2179       };
   2180       log_verbose("mixed case test\n");
   2181       log_verbose("lower first, case level off\n");
   2182       genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
   2183       log_verbose("upper first, case level off\n");
   2184       genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
   2185       log_verbose("lower first, case level on\n");
   2186       genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
   2187       log_verbose("upper first, case level on\n");
   2188       genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
   2189     }
   2190 
   2191 }
   2192 
   2193 static void TestIncrementalNormalize(void) {
   2194 
   2195     /*UChar baseA     =0x61;*/
   2196     UChar baseA     =0x41;
   2197 /*    UChar baseB     = 0x42;*/
   2198     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
   2199     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
   2200     /*
   2201         0x316 is combining grave accent below, cc=220
   2202         0x321 is combining palatalized hook below, cc=202
   2203         0x300 is combining grave accent, cc=230
   2204     */
   2205 
   2206 #define MAXSLEN 2000
   2207     /*int          maxSLen   = 64000;*/
   2208     int          sLen;
   2209     int          i;
   2210 
   2211     UCollator        *coll;
   2212     UErrorCode       status = U_ZERO_ERROR;
   2213     UCollationResult result;
   2214 
   2215     int32_t myQ = QUICK;
   2216 
   2217     if(QUICK < 0) {
   2218       QUICK = 1;
   2219     }
   2220 
   2221     {
   2222         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
   2223         /*          most buffers along the way.*/
   2224         UChar            strA[MAXSLEN+1];
   2225         UChar            strB[MAXSLEN+1];
   2226 
   2227         coll = ucol_open("en_US", &status);
   2228         if(status == U_FILE_ACCESS_ERROR) {
   2229           log_data_err("Is your data around?\n");
   2230           return;
   2231         } else if(U_FAILURE(status)) {
   2232           log_err("Error opening collator\n");
   2233           return;
   2234         }
   2235         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   2236 
   2237         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
   2238         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
   2239         /*for (sLen = 1000; sLen<1001; sLen++) {*/
   2240         for (sLen = 500; sLen<501; sLen++) {
   2241         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
   2242             strA[0] = baseA;
   2243             strB[0] = baseA;
   2244             for (i=1; i<=sLen-1; i++) {
   2245                 strA[i] = ccMix[i % 3];
   2246                 strB[sLen-i] = ccMix[i % 3];
   2247             }
   2248             strA[sLen]   = 0;
   2249             strB[sLen]   = 0;
   2250 
   2251             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
   2252             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
   2253             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
   2254             doTest(coll, strA, strB, UCOL_EQUAL);
   2255         }
   2256     }
   2257 
   2258     QUICK = myQ;
   2259 
   2260 
   2261     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
   2262     /*         of the string.  Checks a couple of edge cases.*/
   2263 
   2264     {
   2265         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
   2266         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
   2267         ucol_setStrength(coll, UCOL_TERTIARY);
   2268         doTest(coll, strA, strB, UCOL_EQUAL);
   2269     }
   2270 
   2271     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
   2272 
   2273     {
   2274       /* New UCA  3.1.1.
   2275        * test below used a code point from Desseret, which sorts differently
   2276        * than d800 dc00
   2277        */
   2278         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
   2279         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
   2280         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
   2281         ucol_setStrength(coll, UCOL_TERTIARY);
   2282         doTest(coll, strA, strB, UCOL_GREATER);
   2283     }
   2284 
   2285     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
   2286 
   2287     {
   2288         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
   2289         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
   2290         char  sortKeyA[50];
   2291         char  sortKeyAz[50];
   2292         char  sortKeyB[50];
   2293         char  sortKeyBz[50];
   2294         int   r;
   2295 
   2296         /* there used to be -3 here. Hmmmm.... */
   2297         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
   2298         result = ucol_strcoll(coll, strA, 3, strB, 3);
   2299         if (result != UCOL_GREATER) {
   2300             log_err("ERROR 1 in test 4\n");
   2301         }
   2302         result = ucol_strcoll(coll, strA, -1, strB, -1);
   2303         if (result != UCOL_EQUAL) {
   2304             log_err("ERROR 2 in test 4\n");
   2305         }
   2306 
   2307         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2308         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2309         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2310         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2311 
   2312         r = strcmp(sortKeyA, sortKeyAz);
   2313         if (r <= 0) {
   2314             log_err("Error 3 in test 4\n");
   2315         }
   2316         r = strcmp(sortKeyA, sortKeyB);
   2317         if (r <= 0) {
   2318             log_err("Error 4 in test 4\n");
   2319         }
   2320         r = strcmp(sortKeyAz, sortKeyBz);
   2321         if (r != 0) {
   2322             log_err("Error 5 in test 4\n");
   2323         }
   2324 
   2325         ucol_setStrength(coll, UCOL_IDENTICAL);
   2326         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2327         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2328         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2329         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2330 
   2331         r = strcmp(sortKeyA, sortKeyAz);
   2332         if (r <= 0) {
   2333             log_err("Error 6 in test 4\n");
   2334         }
   2335         r = strcmp(sortKeyA, sortKeyB);
   2336         if (r <= 0) {
   2337             log_err("Error 7 in test 4\n");
   2338         }
   2339         r = strcmp(sortKeyAz, sortKeyBz);
   2340         if (r != 0) {
   2341             log_err("Error 8 in test 4\n");
   2342         }
   2343         ucol_setStrength(coll, UCOL_TERTIARY);
   2344     }
   2345 
   2346 
   2347     /*  Test 5:  Null characters in non-normal source strings.*/
   2348 
   2349     {
   2350         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
   2351         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
   2352         char  sortKeyA[50];
   2353         char  sortKeyAz[50];
   2354         char  sortKeyB[50];
   2355         char  sortKeyBz[50];
   2356         int   r;
   2357 
   2358         result = ucol_strcoll(coll, strA, 6, strB, 6);
   2359         if (result != UCOL_GREATER) {
   2360             log_err("ERROR 1 in test 5\n");
   2361         }
   2362         result = ucol_strcoll(coll, strA, -1, strB, -1);
   2363         if (result != UCOL_EQUAL) {
   2364             log_err("ERROR 2 in test 5\n");
   2365         }
   2366 
   2367         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2368         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2369         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2370         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2371 
   2372         r = strcmp(sortKeyA, sortKeyAz);
   2373         if (r <= 0) {
   2374             log_err("Error 3 in test 5\n");
   2375         }
   2376         r = strcmp(sortKeyA, sortKeyB);
   2377         if (r <= 0) {
   2378             log_err("Error 4 in test 5\n");
   2379         }
   2380         r = strcmp(sortKeyAz, sortKeyBz);
   2381         if (r != 0) {
   2382             log_err("Error 5 in test 5\n");
   2383         }
   2384 
   2385         ucol_setStrength(coll, UCOL_IDENTICAL);
   2386         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2387         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2388         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2389         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2390 
   2391         r = strcmp(sortKeyA, sortKeyAz);
   2392         if (r <= 0) {
   2393             log_err("Error 6 in test 5\n");
   2394         }
   2395         r = strcmp(sortKeyA, sortKeyB);
   2396         if (r <= 0) {
   2397             log_err("Error 7 in test 5\n");
   2398         }
   2399         r = strcmp(sortKeyAz, sortKeyBz);
   2400         if (r != 0) {
   2401             log_err("Error 8 in test 5\n");
   2402         }
   2403         ucol_setStrength(coll, UCOL_TERTIARY);
   2404     }
   2405 
   2406 
   2407     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
   2408 
   2409     {
   2410         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
   2411         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
   2412 
   2413         result = ucol_strcoll(coll, strA, 5, strB, 5);
   2414         if (result != UCOL_LESS) {
   2415             log_err("Error 1 in test 6\n");
   2416         }
   2417         result = ucol_strcoll(coll, strA, -1, strB, -1);
   2418         if (result != UCOL_EQUAL) {
   2419             log_err("Error 2 in test 6\n");
   2420         }
   2421     }
   2422 
   2423     ucol_close(coll);
   2424 }
   2425 
   2426 
   2427 
   2428 #if 0
   2429 static void TestGetCaseBit(void) {
   2430   static const char *caseBitData[] = {
   2431     "a", "A", "ch", "Ch", "CH",
   2432       "\\uFF9E", "\\u0009"
   2433   };
   2434 
   2435   static const uint8_t results[] = {
   2436     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
   2437       UCOL_UPPER_CASE, UCOL_LOWER_CASE
   2438   };
   2439 
   2440   uint32_t i, blen = 0;
   2441   UChar b[256] = {0};
   2442   UErrorCode status = U_ZERO_ERROR;
   2443   UCollator *UCA = ucol_open("", &status);
   2444   uint8_t res = 0;
   2445 
   2446   for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
   2447     blen = u_unescape(caseBitData[i], b, 256);
   2448     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
   2449     if(results[i] != res) {
   2450       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
   2451     }
   2452   }
   2453 }
   2454 #endif
   2455 
   2456 static void TestHangulTailoring(void) {
   2457     static const char *koreanData[] = {
   2458         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
   2459             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
   2460             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
   2461             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
   2462             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
   2463             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
   2464     };
   2465 
   2466     const char *rules =
   2467         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
   2468         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
   2469         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
   2470         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
   2471         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
   2472         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
   2473 
   2474 
   2475   UErrorCode status = U_ZERO_ERROR;
   2476   UChar rlz[2048] = { 0 };
   2477   uint32_t rlen = u_unescape(rules, rlz, 2048);
   2478 
   2479   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   2480   if(status == U_FILE_ACCESS_ERROR) {
   2481     log_data_err("Is your data around?\n");
   2482     return;
   2483   } else if(U_FAILURE(status)) {
   2484     log_err("Error opening collator\n");
   2485     return;
   2486   }
   2487 
   2488   log_verbose("Using start of korean rules\n");
   2489 
   2490   if(U_SUCCESS(status)) {
   2491     genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   2492   } else {
   2493     log_err("Unable to open collator with rules %s\n", rules);
   2494   }
   2495 
   2496   log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
   2497   ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home  */
   2498   genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   2499 
   2500   ucol_close(coll);
   2501 
   2502   log_verbose("Using ko__LOTUS locale\n");
   2503   genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   2504 }
   2505 
   2506 static void TestCompressOverlap(void) {
   2507     UChar       secstr[150];
   2508     UChar       tertstr[150];
   2509     UErrorCode  status = U_ZERO_ERROR;
   2510     UCollator  *coll;
   2511     char        result[200];
   2512     uint32_t    resultlen;
   2513     int         count = 0;
   2514     char       *tempptr;
   2515 
   2516     coll = ucol_open("", &status);
   2517 
   2518     if (U_FAILURE(status)) {
   2519         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
   2520         return;
   2521     }
   2522     while (count < 149) {
   2523         secstr[count] = 0x0020; /* [06, 05, 05] */
   2524         tertstr[count] = 0x0020;
   2525         count ++;
   2526     }
   2527 
   2528     /* top down compression ----------------------------------- */
   2529     secstr[count] = 0x0332; /* [, 87, 05] */
   2530     tertstr[count] = 0x3000; /* [06, 05, 07] */
   2531 
   2532     /* no compression secstr should have 150 secondary bytes, tertstr should
   2533     have 150 tertiary bytes.
   2534     with correct overlapping compression, secstr should have 4 secondary
   2535     bytes, tertstr should have > 2 tertiary bytes */
   2536     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
   2537     tempptr = uprv_strchr(result, 1) + 1;
   2538     while (*(tempptr + 1) != 1) {
   2539         /* the last secondary collation element is not checked since it is not
   2540         part of the compression */
   2541         if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
   2542             log_err("Secondary compression overlapped\n");
   2543         }
   2544         tempptr ++;
   2545     }
   2546 
   2547     /* tertiary top/bottom/common for en_US is similar to the secondary
   2548     top/bottom/common */
   2549     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
   2550     tempptr = uprv_strrchr(result, 1) + 1;
   2551     while (*(tempptr + 1) != 0) {
   2552         /* the last secondary collation element is not checked since it is not
   2553         part of the compression */
   2554         if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
   2555             log_err("Tertiary compression overlapped\n");
   2556         }
   2557         tempptr ++;
   2558     }
   2559 
   2560     /* bottom up compression ------------------------------------- */
   2561     secstr[count] = 0;
   2562     tertstr[count] = 0;
   2563     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
   2564     tempptr = uprv_strchr(result, 1) + 1;
   2565     while (*(tempptr + 1) != 1) {
   2566         /* the last secondary collation element is not checked since it is not
   2567         part of the compression */
   2568         if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
   2569             log_err("Secondary compression overlapped\n");
   2570         }
   2571         tempptr ++;
   2572     }
   2573 
   2574     /* tertiary top/bottom/common for en_US is similar to the secondary
   2575     top/bottom/common */
   2576     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
   2577     tempptr = uprv_strrchr(result, 1) + 1;
   2578     while (*(tempptr + 1) != 0) {
   2579         /* the last secondary collation element is not checked since it is not
   2580         part of the compression */
   2581         if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
   2582             log_err("Tertiary compression overlapped\n");
   2583         }
   2584         tempptr ++;
   2585     }
   2586 
   2587     ucol_close(coll);
   2588 }
   2589 
   2590 static void TestCyrillicTailoring(void) {
   2591   static const char *test[] = {
   2592     "\\u0410b",
   2593       "\\u0410\\u0306a",
   2594       "\\u04d0A"
   2595   };
   2596 
   2597     /* Russian overrides contractions, so this test is not valid anymore */
   2598     /*genericLocaleStarter("ru", test, 3);*/
   2599 
   2600     genericLocaleStarter("root", test, 3);
   2601     genericRulesStarter("&\\u0410 = \\u0410", test, 3);
   2602     genericRulesStarter("&Z < \\u0410", test, 3);
   2603     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
   2604     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
   2605     genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
   2606     genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
   2607 }
   2608 
   2609 static void TestSuppressContractions(void) {
   2610 
   2611   static const char *testNoCont2[] = {
   2612       "\\u0410\\u0302a",
   2613       "\\u0410\\u0306b",
   2614       "\\u0410c"
   2615   };
   2616   static const char *testNoCont[] = {
   2617       "a\\u0410",
   2618       "A\\u0410\\u0306",
   2619       "\\uFF21\\u0410\\u0302"
   2620   };
   2621 
   2622   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
   2623   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
   2624 }
   2625 
   2626 static void TestContraction(void) {
   2627     const static char *testrules[] = {
   2628         "&A = AB / B",
   2629         "&A = A\\u0306/\\u0306",
   2630         "&c = ch / h"
   2631     };
   2632     const static UChar testdata[][2] = {
   2633         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
   2634         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
   2635         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
   2636     };
   2637     const static UChar testdata2[][2] = {
   2638         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
   2639         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
   2640         {0x0063 /* 'c' */, 0x006C /* 'l' */}
   2641     };
   2642     const static char *testrules3[] = {
   2643         "&z < xyz &xyzw << B",
   2644         "&z < xyz &xyz << B / w",
   2645         "&z < ch &achm << B",
   2646         "&z < ch &a << B / chm",
   2647         "&\\ud800\\udc00w << B",
   2648         "&\\ud800\\udc00 << B / w",
   2649         "&a\\ud800\\udc00m << B",
   2650         "&a << B / \\ud800\\udc00m",
   2651     };
   2652 
   2653     UErrorCode  status   = U_ZERO_ERROR;
   2654     UCollator  *coll;
   2655     UChar       rule[256] = {0};
   2656     uint32_t    rlen     = 0;
   2657     int         i;
   2658 
   2659     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
   2660         UCollationElements *iter1;
   2661         int j = 0;
   2662         log_verbose("Rule %s for testing\n", testrules[i]);
   2663         rlen = u_unescape(testrules[i], rule, 32);
   2664         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2665         if (U_FAILURE(status)) {
   2666             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   2667             return;
   2668         }
   2669         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
   2670         if (U_FAILURE(status)) {
   2671             log_err("Collation iterator creation failed\n");
   2672             return;
   2673         }
   2674         while (j < 2) {
   2675             UCollationElements *iter2 = ucol_openElements(coll,
   2676                                                          &(testdata[i][j]),
   2677                                                          1, &status);
   2678             uint32_t ce;
   2679             if (U_FAILURE(status)) {
   2680                 log_err("Collation iterator creation failed\n");
   2681                 return;
   2682             }
   2683             ce = ucol_next(iter2, &status);
   2684             while (ce != UCOL_NULLORDER) {
   2685                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
   2686                     log_err("Collation elements in contraction split does not match\n");
   2687                     return;
   2688                 }
   2689                 ce = ucol_next(iter2, &status);
   2690             }
   2691             j ++;
   2692             ucol_closeElements(iter2);
   2693         }
   2694         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
   2695             log_err("Collation elements not exhausted\n");
   2696             return;
   2697         }
   2698         ucol_closeElements(iter1);
   2699         ucol_close(coll);
   2700     }
   2701 
   2702     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
   2703     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2704     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
   2705         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   2706                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
   2707                 testdata2[1][1]);
   2708         return;
   2709     }
   2710     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
   2711         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   2712                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
   2713                 testdata2[2][1]);
   2714         return;
   2715     }
   2716     ucol_close(coll);
   2717 
   2718     for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
   2719         UCollator          *coll1,
   2720                            *coll2;
   2721         UCollationElements *iter1,
   2722                            *iter2;
   2723         UChar               ch = 0x0042 /* 'B' */;
   2724         uint32_t            ce;
   2725         rlen = u_unescape(testrules3[i], rule, 32);
   2726         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2727         rlen = u_unescape(testrules3[i + 1], rule, 32);
   2728         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2729         if (U_FAILURE(status)) {
   2730             log_err("Collator creation failed %s\n", testrules[i]);
   2731             return;
   2732         }
   2733         iter1 = ucol_openElements(coll1, &ch, 1, &status);
   2734         iter2 = ucol_openElements(coll2, &ch, 1, &status);
   2735         if (U_FAILURE(status)) {
   2736             log_err("Collation iterator creation failed\n");
   2737             return;
   2738         }
   2739         ce = ucol_next(iter1, &status);
   2740         if (U_FAILURE(status)) {
   2741             log_err("Retrieving ces failed\n");
   2742             return;
   2743         }
   2744         while (ce != UCOL_NULLORDER) {
   2745             if (ce != (uint32_t)ucol_next(iter2, &status)) {
   2746                 log_err("CEs does not match\n");
   2747                 return;
   2748             }
   2749             ce = ucol_next(iter1, &status);
   2750             if (U_FAILURE(status)) {
   2751                 log_err("Retrieving ces failed\n");
   2752                 return;
   2753             }
   2754         }
   2755         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
   2756             log_err("CEs not exhausted\n");
   2757             return;
   2758         }
   2759         ucol_closeElements(iter1);
   2760         ucol_closeElements(iter2);
   2761         ucol_close(coll1);
   2762         ucol_close(coll2);
   2763     }
   2764 }
   2765 
   2766 static void TestExpansion(void) {
   2767     const static char *testrules[] = {
   2768         "&J << K / B & K << M",
   2769         "&J << K / B << M"
   2770     };
   2771     const static UChar testdata[][3] = {
   2772         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
   2773         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
   2774         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
   2775         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
   2776         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
   2777         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
   2778     };
   2779 
   2780     UErrorCode  status   = U_ZERO_ERROR;
   2781     UCollator  *coll;
   2782     UChar       rule[256] = {0};
   2783     uint32_t    rlen     = 0;
   2784     int         i;
   2785 
   2786     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
   2787         int j = 0;
   2788         log_verbose("Rule %s for testing\n", testrules[i]);
   2789         rlen = u_unescape(testrules[i], rule, 32);
   2790         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2791         if (U_FAILURE(status)) {
   2792             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   2793             return;
   2794         }
   2795 
   2796         for (j = 0; j < 5; j ++) {
   2797             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
   2798         }
   2799         ucol_close(coll);
   2800     }
   2801 }
   2802 
   2803 #if 0
   2804 /* this test tests the current limitations of the engine */
   2805 /* it always fail, so it is disabled by default */
   2806 static void TestLimitations(void) {
   2807   /* recursive expansions */
   2808   {
   2809     static const char *rule = "&a=b/c&d=c/e";
   2810     static const char *tlimit01[] = {"add","b","adf"};
   2811     static const char *tlimit02[] = {"aa","b","af"};
   2812     log_verbose("recursive expansions\n");
   2813     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
   2814     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
   2815   }
   2816   /* contractions spanning expansions */
   2817   {
   2818     static const char *rule = "&a<<<c/e&g<<<eh";
   2819     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
   2820     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
   2821     log_verbose("contractions spanning expansions\n");
   2822     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
   2823     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
   2824   }
   2825   /* normalization: nulls in contractions */
   2826   {
   2827     static const char *rule = "&a<<<\\u0000\\u0302";
   2828     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   2829     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   2830     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   2831     static const UColAttributeValue valOn[] = { UCOL_ON };
   2832     static const UColAttributeValue valOff[] = { UCOL_OFF };
   2833 
   2834     log_verbose("NULL in contractions\n");
   2835     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   2836     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   2837     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   2838     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   2839 
   2840   }
   2841   /* normalization: contractions spanning normalization */
   2842   {
   2843     static const char *rule = "&a<<<\\u0000\\u0302";
   2844     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   2845     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   2846     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   2847     static const UColAttributeValue valOn[] = { UCOL_ON };
   2848     static const UColAttributeValue valOff[] = { UCOL_OFF };
   2849 
   2850     log_verbose("contractions spanning normalization\n");
   2851     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   2852     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   2853     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   2854     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   2855 
   2856   }
   2857   /* variable top:  */
   2858   {
   2859     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
   2860     static const char *rule = "&\\u2010<x<[variable top]=z";
   2861     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
   2862     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
   2863     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
   2864     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
   2865     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
   2866     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
   2867     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
   2868 
   2869     log_verbose("variable top\n");
   2870     genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2871     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2872     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2873     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
   2874     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
   2875 
   2876   }
   2877   /* case level */
   2878   {
   2879     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
   2880     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
   2881     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
   2882     static const UColAttribute att[] = { UCOL_CASE_FIRST};
   2883     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
   2884     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
   2885     log_verbose("case level\n");
   2886     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2887     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2888     /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
   2889     /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
   2890   }
   2891 
   2892 }
   2893 #endif
   2894 
   2895 static void TestBocsuCoverage(void) {
   2896   UErrorCode status = U_ZERO_ERROR;
   2897   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
   2898   UChar       test[256] = {0};
   2899   uint32_t    tlen     = u_unescape(testString, test, 32);
   2900   uint8_t key[256]     = {0};
   2901   uint32_t klen         = 0;
   2902 
   2903   UCollator *coll = ucol_open("", &status);
   2904   if(U_SUCCESS(status)) {
   2905   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
   2906 
   2907   klen = ucol_getSortKey(coll, test, tlen, key, 256);
   2908 
   2909   ucol_close(coll);
   2910   } else {
   2911     log_data_err("Couldn't open UCA\n");
   2912   }
   2913 }
   2914 
   2915 static void TestVariableTopSetting(void) {
   2916   UErrorCode status = U_ZERO_ERROR;
   2917   const UChar *current = NULL;
   2918   uint32_t varTopOriginal = 0, varTop1, varTop2;
   2919   UCollator *coll = ucol_open("", &status);
   2920   if(U_SUCCESS(status)) {
   2921 
   2922   uint32_t strength = 0;
   2923   uint16_t specs = 0;
   2924   uint32_t chOffset = 0;
   2925   uint32_t chLen = 0;
   2926   uint32_t exOffset = 0;
   2927   uint32_t exLen = 0;
   2928   uint32_t oldChOffset = 0;
   2929   uint32_t oldChLen = 0;
   2930   uint32_t oldExOffset = 0;
   2931   uint32_t oldExLen = 0;
   2932   uint32_t prefixOffset = 0;
   2933   uint32_t prefixLen = 0;
   2934 
   2935   UBool startOfRules = TRUE;
   2936   UColTokenParser src;
   2937   UColOptionSet opts;
   2938 
   2939   UChar *rulesCopy = NULL;
   2940   uint32_t rulesLen;
   2941 
   2942   UCollationResult result;
   2943 
   2944   UChar first[256] = { 0 };
   2945   UChar second[256] = { 0 };
   2946   UParseError parseError;
   2947   int32_t myQ = QUICK;
   2948 
   2949   src.opts = &opts;
   2950 
   2951   if(QUICK <= 0) {
   2952     QUICK = 1;
   2953   }
   2954 
   2955   /* this test will fail when normalization is turned on */
   2956   /* therefore we always turn off exhaustive mode for it */
   2957   { /* QUICK > 0*/
   2958     log_verbose("Slide variable top over UCARules\n");
   2959     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
   2960     rulesCopy = (UChar *)malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
   2961     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
   2962 
   2963     if(U_SUCCESS(status) && rulesLen > 0) {
   2964       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   2965       src.current = src.source = rulesCopy;
   2966       src.end = rulesCopy+rulesLen;
   2967       src.extraCurrent = src.end;
   2968       src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   2969 
   2970       while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
   2971         strength = src.parsedToken.strength;
   2972         chOffset = src.parsedToken.charsOffset;
   2973         chLen = src.parsedToken.charsLen;
   2974         exOffset = src.parsedToken.extensionOffset;
   2975         exLen = src.parsedToken.extensionLen;
   2976         prefixOffset = src.parsedToken.prefixOffset;
   2977         prefixLen = src.parsedToken.prefixLen;
   2978         specs = src.parsedToken.flags;
   2979 
   2980         startOfRules = FALSE;
   2981         {
   2982           log_verbose("%04X %d ", *(rulesCopy+chOffset), chLen);
   2983         }
   2984         if(strength == UCOL_PRIMARY) {
   2985           status = U_ZERO_ERROR;
   2986           varTopOriginal = ucol_getVariableTop(coll, &status);
   2987           varTop1 = ucol_setVariableTop(coll, rulesCopy+oldChOffset, oldChLen, &status);
   2988           if(U_FAILURE(status)) {
   2989             char buffer[256];
   2990             char *buf = buffer;
   2991             uint32_t i = 0, j;
   2992             uint32_t CE = UCOL_NO_MORE_CES;
   2993 
   2994             /* before we start screaming, let's see if there is a problem with the rules */
   2995             collIterate s;
   2996             uprv_init_collIterate(coll, rulesCopy+oldChOffset, oldChLen, &s);
   2997 
   2998             CE = ucol_getNextCE(coll, &s, &status);
   2999 
   3000             for(i = 0; i < oldChLen; i++) {
   3001               j = sprintf(buf, "%04X ", *(rulesCopy+oldChOffset+i));
   3002               buf += j;
   3003             }
   3004             if(status == U_PRIMARY_TOO_LONG_ERROR) {
   3005               log_verbose("= Expected failure for %s =", buffer);
   3006             } else {
   3007               if(s.pos == s.endp) {
   3008                 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
   3009                   oldChOffset, u_errorName(status), buffer);
   3010               } else {
   3011                 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
   3012                   buffer);
   3013               }
   3014             }
   3015           }
   3016           varTop2 = ucol_getVariableTop(coll, &status);
   3017           if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
   3018             log_err("cannot retrieve set varTop value!\n");
   3019             continue;
   3020           }
   3021 
   3022           if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
   3023 
   3024             u_strncpy(first, rulesCopy+oldChOffset, oldChLen);
   3025             u_strncpy(first+oldChLen, rulesCopy+chOffset, chLen);
   3026             u_strncpy(first+oldChLen+chLen, rulesCopy+oldChOffset, oldChLen);
   3027             first[2*oldChLen+chLen] = 0;
   3028 
   3029             if(oldExLen == 0) {
   3030               u_strncpy(second, rulesCopy+chOffset, chLen);
   3031               second[chLen] = 0;
   3032             } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
   3033               u_strncpy(second, rulesCopy+oldExOffset, oldExLen);
   3034               u_strncpy(second+oldChLen, rulesCopy+chOffset, chLen);
   3035               u_strncpy(second+oldChLen+chLen, rulesCopy+oldExOffset, oldExLen);
   3036               second[2*oldExLen+chLen] = 0;
   3037             }
   3038             result = ucol_strcoll(coll, first, -1, second, -1);
   3039             if(result == UCOL_EQUAL) {
   3040               doTest(coll, first, second, UCOL_EQUAL);
   3041             } else {
   3042               log_verbose("Suspicious strcoll result for %04X and %04X\n", *(rulesCopy+oldChOffset), *(rulesCopy+chOffset));
   3043             }
   3044           }
   3045         }
   3046         if(strength != UCOL_TOK_RESET) {
   3047           oldChOffset = chOffset;
   3048           oldChLen = chLen;
   3049           oldExOffset = exOffset;
   3050           oldExLen = exLen;
   3051         }
   3052       }
   3053       status = U_ZERO_ERROR;
   3054     }
   3055     else {
   3056       log_err("Unexpected failure getting rules %s\n", u_errorName(status));
   3057       return;
   3058     }
   3059     if (U_FAILURE(status)) {
   3060         log_err("Error parsing rules %s\n", u_errorName(status));
   3061         return;
   3062     }
   3063     status = U_ZERO_ERROR;
   3064   }
   3065 
   3066   QUICK = myQ;
   3067 
   3068   log_verbose("Testing setting variable top to contractions\n");
   3069   {
   3070     /* uint32_t tailoredCE = UCOL_NOT_FOUND; */
   3071     /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/
   3072     UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
   3073     while(*conts != 0) {
   3074       if((*(conts+2) == 0) || (*(conts+1)==0)) { /* contracts or pre-context contractions */
   3075         varTop1 = ucol_setVariableTop(coll, conts, -1, &status);
   3076       } else {
   3077         varTop1 = ucol_setVariableTop(coll, conts, 3, &status);
   3078       }
   3079       if(U_FAILURE(status)) {
   3080         log_err("Couldn't set variable top to a contraction %04X %04X %04X\n",
   3081           *conts, *(conts+1), *(conts+2));
   3082         status = U_ZERO_ERROR;
   3083       }
   3084       conts+=3;
   3085     }
   3086 
   3087     status = U_ZERO_ERROR;
   3088 
   3089     first[0] = 0x0040;
   3090     first[1] = 0x0050;
   3091     first[2] = 0x0000;
   3092 
   3093     ucol_setVariableTop(coll, first, -1, &status);
   3094 
   3095     if(U_SUCCESS(status)) {
   3096       log_err("Invalid contraction succeded in setting variable top!\n");
   3097     }
   3098 
   3099   }
   3100 
   3101   log_verbose("Test restoring variable top\n");
   3102 
   3103   status = U_ZERO_ERROR;
   3104   ucol_restoreVariableTop(coll, varTopOriginal, &status);
   3105   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
   3106     log_err("Couldn't restore old variable top\n");
   3107   }
   3108 
   3109   log_verbose("Testing calling with error set\n");
   3110 
   3111   status = U_INTERNAL_PROGRAM_ERROR;
   3112   varTop1 = ucol_setVariableTop(coll, first, 1, &status);
   3113   varTop2 = ucol_getVariableTop(coll, &status);
   3114   ucol_restoreVariableTop(coll, varTop2, &status);
   3115   varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
   3116   varTop2 = ucol_getVariableTop(NULL, &status);
   3117   ucol_restoreVariableTop(NULL, varTop2, &status);
   3118   if(status != U_INTERNAL_PROGRAM_ERROR) {
   3119     log_err("Bad reaction to passed error!\n");
   3120   }
   3121   free(rulesCopy);
   3122   ucol_close(coll);
   3123   } else {
   3124     log_data_err("Couldn't open UCA collator\n");
   3125   }
   3126 
   3127 }
   3128 
   3129 static void TestNonChars(void) {
   3130   static const char *test[] = {
   3131     "\\u0000",
   3132     "\\uFFFE", "\\uFFFF",
   3133       "\\U0001FFFE", "\\U0001FFFF",
   3134       "\\U0002FFFE", "\\U0002FFFF",
   3135       "\\U0003FFFE", "\\U0003FFFF",
   3136       "\\U0004FFFE", "\\U0004FFFF",
   3137       "\\U0005FFFE", "\\U0005FFFF",
   3138       "\\U0006FFFE", "\\U0006FFFF",
   3139       "\\U0007FFFE", "\\U0007FFFF",
   3140       "\\U0008FFFE", "\\U0008FFFF",
   3141       "\\U0009FFFE", "\\U0009FFFF",
   3142       "\\U000AFFFE", "\\U000AFFFF",
   3143       "\\U000BFFFE", "\\U000BFFFF",
   3144       "\\U000CFFFE", "\\U000CFFFF",
   3145       "\\U000DFFFE", "\\U000DFFFF",
   3146       "\\U000EFFFE", "\\U000EFFFF",
   3147       "\\U000FFFFE", "\\U000FFFFF",
   3148       "\\U0010FFFE", "\\U0010FFFF"
   3149   };
   3150   UErrorCode status = U_ZERO_ERROR;
   3151   UCollator *coll = ucol_open("en_US", &status);
   3152 
   3153   log_verbose("Test non characters\n");
   3154 
   3155   if(U_SUCCESS(status)) {
   3156     genericOrderingTestWithResult(coll, test, 35, UCOL_EQUAL);
   3157   } else {
   3158     log_err_status(status, "Unable to open collator\n");
   3159   }
   3160 
   3161   ucol_close(coll);
   3162 }
   3163 
   3164 static void TestExtremeCompression(void) {
   3165   static char *test[4];
   3166   int32_t j = 0, i = 0;
   3167 
   3168   for(i = 0; i<4; i++) {
   3169     test[i] = (char *)malloc(2048*sizeof(char));
   3170   }
   3171 
   3172   for(j = 20; j < 500; j++) {
   3173     for(i = 0; i<4; i++) {
   3174       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   3175       test[i][j-1] = (char)('a'+i);
   3176       test[i][j] = 0;
   3177     }
   3178     genericLocaleStarter("en_US", (const char **)test, 4);
   3179   }
   3180 
   3181 
   3182   for(i = 0; i<4; i++) {
   3183     free(test[i]);
   3184   }
   3185 }
   3186 
   3187 #if 0
   3188 static void TestExtremeCompression(void) {
   3189   static char *test[4];
   3190   int32_t j = 0, i = 0;
   3191   UErrorCode status = U_ZERO_ERROR;
   3192   UCollator *coll = ucol_open("en_US", status);
   3193   for(i = 0; i<4; i++) {
   3194     test[i] = (char *)malloc(2048*sizeof(char));
   3195   }
   3196   for(j = 10; j < 2048; j++) {
   3197     for(i = 0; i<4; i++) {
   3198       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
   3199       test[i][j-1] = (char)('a'+i);
   3200       test[i][j] = 0;
   3201     }
   3202   }
   3203   genericLocaleStarter("en_US", (const char **)test, 4);
   3204 
   3205   for(j = 10; j < 2048; j++) {
   3206     for(i = 0; i<1; i++) {
   3207       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   3208       test[i][j] = 0;
   3209     }
   3210   }
   3211   for(i = 0; i<4; i++) {
   3212     free(test[i]);
   3213   }
   3214 }
   3215 #endif
   3216 
   3217 static void TestSurrogates(void) {
   3218   static const char *test[] = {
   3219     "z","\\ud900\\udc25",  "\\ud805\\udc50",
   3220        "\\ud800\\udc00y",  "\\ud800\\udc00r",
   3221        "\\ud800\\udc00f",  "\\ud800\\udc00",
   3222        "\\ud800\\udc00c", "\\ud800\\udc00b",
   3223        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
   3224        "\\ud800\\udc00a",
   3225        "c", "b"
   3226   };
   3227 
   3228   static const char *rule =
   3229     "&z < \\ud900\\udc25   < \\ud805\\udc50"
   3230        "< \\ud800\\udc00y  < \\ud800\\udc00r"
   3231        "< \\ud800\\udc00f  << \\ud800\\udc00"
   3232        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
   3233        "< \\ud800\\udc00a  < c < b" ;
   3234 
   3235   genericRulesStarter(rule, test, 14);
   3236 }
   3237 
   3238 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
   3239 static void TestPrefix(void) {
   3240   uint32_t i;
   3241 
   3242   static const struct {
   3243     const char *rules;
   3244     const char *data[50];
   3245     const uint32_t len;
   3246   } tests[] = {
   3247     { "&z <<< z|a",
   3248       {"zz", "za"}, 2 },
   3249 
   3250     { "&z <<< z|   a",
   3251       {"zz", "za"}, 2 },
   3252     { "[strength I]"
   3253       "&a=\\ud900\\udc25"
   3254       "&z<<<\\ud900\\udc25|a",
   3255       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
   3256   };
   3257 
   3258 
   3259   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3260     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3261   }
   3262 }
   3263 
   3264 /* This test uses data suplied by Masashiko Maedera to test the implementation */
   3265 /* JIS X 4061 collation order implementation                                   */
   3266 static void TestNewJapanese(void) {
   3267 
   3268   static const char * const test1[] = {
   3269       "\\u30b7\\u30e3\\u30fc\\u30ec",
   3270       "\\u30b7\\u30e3\\u30a4",
   3271       "\\u30b7\\u30e4\\u30a3",
   3272       "\\u30b7\\u30e3\\u30ec",
   3273       "\\u3061\\u3087\\u3053",
   3274       "\\u3061\\u3088\\u3053",
   3275       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
   3276       "\\u3066\\u30fc\\u305f",
   3277       "\\u30c6\\u30fc\\u30bf",
   3278       "\\u30c6\\u30a7\\u30bf",
   3279       "\\u3066\\u3048\\u305f",
   3280       "\\u3067\\u30fc\\u305f",
   3281       "\\u30c7\\u30fc\\u30bf",
   3282       "\\u30c7\\u30a7\\u30bf",
   3283       "\\u3067\\u3048\\u305f",
   3284       "\\u3066\\u30fc\\u305f\\u30fc",
   3285       "\\u30c6\\u30fc\\u30bf\\u30a1",
   3286       "\\u30c6\\u30a7\\u30bf\\u30fc",
   3287       "\\u3066\\u3047\\u305f\\u3041",
   3288       "\\u3066\\u3048\\u305f\\u30fc",
   3289       "\\u3067\\u30fc\\u305f\\u30fc",
   3290       "\\u30c7\\u30fc\\u30bf\\u30a1",
   3291       "\\u3067\\u30a7\\u305f\\u30a1",
   3292       "\\u30c7\\u3047\\u30bf\\u3041",
   3293       "\\u30c7\\u30a8\\u30bf\\u30a2",
   3294       "\\u3072\\u3086",
   3295       "\\u3073\\u3085\\u3042",
   3296       "\\u3074\\u3085\\u3042",
   3297       "\\u3073\\u3085\\u3042\\u30fc",
   3298       "\\u30d3\\u30e5\\u30a2\\u30fc",
   3299       "\\u3074\\u3085\\u3042\\u30fc",
   3300       "\\u30d4\\u30e5\\u30a2\\u30fc",
   3301       "\\u30d2\\u30e5\\u30a6",
   3302       "\\u30d2\\u30e6\\u30a6",
   3303       "\\u30d4\\u30e5\\u30a6\\u30a2",
   3304       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
   3305       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
   3306       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
   3307       "\\u3072\\u3085\\u3093",
   3308       "\\u3074\\u3085\\u3093",
   3309       "\\u3075\\u30fc\\u308a",
   3310       "\\u30d5\\u30fc\\u30ea",
   3311       "\\u3075\\u3045\\u308a",
   3312       "\\u3075\\u30a5\\u308a",
   3313       "\\u3075\\u30a5\\u30ea",
   3314       "\\u30d5\\u30a6\\u30ea",
   3315       "\\u3076\\u30fc\\u308a",
   3316       "\\u30d6\\u30fc\\u30ea",
   3317       "\\u3076\\u3045\\u308a",
   3318       "\\u30d6\\u30a5\\u308a",
   3319       "\\u3077\\u3046\\u308a",
   3320       "\\u30d7\\u30a6\\u30ea",
   3321       "\\u3075\\u30fc\\u308a\\u30fc",
   3322       "\\u30d5\\u30a5\\u30ea\\u30fc",
   3323       "\\u3075\\u30a5\\u308a\\u30a3",
   3324       "\\u30d5\\u3045\\u308a\\u3043",
   3325       "\\u30d5\\u30a6\\u30ea\\u30fc",
   3326       "\\u3075\\u3046\\u308a\\u3043",
   3327       "\\u30d6\\u30a6\\u30ea\\u30a4",
   3328       "\\u3077\\u30fc\\u308a\\u30fc",
   3329       "\\u3077\\u30a5\\u308a\\u30a4",
   3330       "\\u3077\\u3046\\u308a\\u30fc",
   3331       "\\u30d7\\u30a6\\u30ea\\u30a4",
   3332       "\\u30d5\\u30fd",
   3333       "\\u3075\\u309e",
   3334       "\\u3076\\u309d",
   3335       "\\u3076\\u3075",
   3336       "\\u3076\\u30d5",
   3337       "\\u30d6\\u3075",
   3338       "\\u30d6\\u30d5",
   3339       "\\u3076\\u309e",
   3340       "\\u3076\\u3077",
   3341       "\\u30d6\\u3077",
   3342       "\\u3077\\u309d",
   3343       "\\u30d7\\u30fd",
   3344       "\\u3077\\u3075",
   3345 };
   3346 
   3347   static const char *test2[] = {
   3348     "\\u306f\\u309d", /* H\\u309d */
   3349     "\\u30cf\\u30fd", /* K\\u30fd */
   3350     "\\u306f\\u306f", /* HH */
   3351     "\\u306f\\u30cf", /* HK */
   3352     "\\u30cf\\u30cf", /* KK */
   3353     "\\u306f\\u309e", /* H\\u309e */
   3354     "\\u30cf\\u30fe", /* K\\u30fe */
   3355     "\\u306f\\u3070", /* HH\\u309b */
   3356     "\\u30cf\\u30d0", /* KK\\u309b */
   3357     "\\u306f\\u3071", /* HH\\u309c */
   3358     "\\u30cf\\u3071", /* KH\\u309c */
   3359     "\\u30cf\\u30d1", /* KK\\u309c */
   3360     "\\u3070\\u309d", /* H\\u309b\\u309d */
   3361     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
   3362     "\\u3070\\u306f", /* H\\u309bH */
   3363     "\\u30d0\\u30cf", /* K\\u309bK */
   3364     "\\u3070\\u309e", /* H\\u309b\\u309e */
   3365     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
   3366     "\\u3070\\u3070", /* H\\u309bH\\u309b */
   3367     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
   3368     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
   3369     "\\u3070\\u3071", /* H\\u309bH\\u309c */
   3370     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
   3371     "\\u3071\\u309d", /* H\\u309c\\u309d */
   3372     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
   3373     "\\u3071\\u306f", /* H\\u309cH */
   3374     "\\u30d1\\u30cf", /* K\\u309cK */
   3375     "\\u3071\\u3070", /* H\\u309cH\\u309b */
   3376     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
   3377     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
   3378     "\\u3071\\u3071", /* H\\u309cH\\u309c */
   3379     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
   3380   };
   3381   /*
   3382   static const char *test3[] = {
   3383     "\\u221er\\u221e",
   3384     "\\u221eR#",
   3385     "\\u221et\\u221e",
   3386     "#r\\u221e",
   3387     "#R#",
   3388     "#t%",
   3389     "#T%",
   3390     "8t\\u221e",
   3391     "8T\\u221e",
   3392     "8t#",
   3393     "8T#",
   3394     "8t%",
   3395     "8T%",
   3396     "8t8",
   3397     "8T8",
   3398     "\\u03c9r\\u221e",
   3399     "\\u03a9R%",
   3400     "rr\\u221e",
   3401     "rR\\u221e",
   3402     "Rr\\u221e",
   3403     "RR\\u221e",
   3404     "RT%",
   3405     "rt8",
   3406     "tr\\u221e",
   3407     "tr8",
   3408     "TR8",
   3409     "tt8",
   3410     "\\u30b7\\u30e3\\u30fc\\u30ec",
   3411   };
   3412   */
   3413   static const UColAttribute att[] = { UCOL_STRENGTH };
   3414   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
   3415 
   3416   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
   3417   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
   3418 
   3419   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
   3420   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
   3421   /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
   3422   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
   3423   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
   3424 }
   3425 
   3426 static void TestStrCollIdenticalPrefix(void) {
   3427   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
   3428   const char* test[] = {
   3429     "ab\\ud9b0\\udc70",
   3430     "ab\\ud9b0\\udc71"
   3431   };
   3432   genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
   3433 }
   3434 /* Contractions should have all their canonically equivalent */
   3435 /* strings included */
   3436 static void TestContractionClosure(void) {
   3437   static const struct {
   3438     const char *rules;
   3439     const char *data[10];
   3440     const uint32_t len;
   3441   } tests[] = {
   3442     {   "&b=\\u00e4\\u00e4",
   3443       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
   3444     {   "&b=\\u00C5",
   3445       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
   3446   };
   3447   uint32_t i;
   3448 
   3449 
   3450   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3451     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
   3452   }
   3453 }
   3454 
   3455 /* This tests also fails*/
   3456 static void TestBeforePrefixFailure(void) {
   3457   static const struct {
   3458     const char *rules;
   3459     const char *data[10];
   3460     const uint32_t len;
   3461   } tests[] = {
   3462     { "&g <<< a"
   3463       "&[before 3]\\uff41 <<< x",
   3464       {"x", "\\uff41"}, 2 },
   3465     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3466         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   3467         "&[before 3]\\u30a7<<<\\u30a9",
   3468       {"\\u30a9", "\\u30a7"}, 2 },
   3469     {   "&[before 3]\\u30a7<<<\\u30a9"
   3470         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3471         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
   3472       {"\\u30a9", "\\u30a7"}, 2 },
   3473   };
   3474   uint32_t i;
   3475 
   3476 
   3477   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3478     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3479   }
   3480 
   3481 #if 0
   3482   const char* rule1 =
   3483         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3484         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   3485         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
   3486   const char* rule2 =
   3487         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
   3488         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3489         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
   3490   const char* test[] = {
   3491       "\\u30c6\\u30fc\\u30bf",
   3492       "\\u30c6\\u30a7\\u30bf",
   3493   };
   3494   genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
   3495   genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
   3496 /* this piece of code should be in some sort of verbose mode     */
   3497 /* it gets the collation elements for elements and prints them   */
   3498 /* This is useful when trying to see whether the problem is      */
   3499   {
   3500     UErrorCode status = U_ZERO_ERROR;
   3501     uint32_t i = 0;
   3502     UCollationElements *it = NULL;
   3503     uint32_t CE;
   3504     UChar string[256];
   3505     uint32_t uStringLen;
   3506     UCollator *coll = NULL;
   3507 
   3508     uStringLen = u_unescape(rule1, string, 256);
   3509 
   3510     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   3511 
   3512     /*coll = ucol_open("ja_JP_JIS", &status);*/
   3513     it = ucol_openElements(coll, string, 0, &status);
   3514 
   3515     for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
   3516       log_verbose("%s\n", test[i]);
   3517       uStringLen = u_unescape(test[i], string, 256);
   3518       ucol_setText(it, string, uStringLen, &status);
   3519 
   3520       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
   3521         log_verbose("%08X\n", CE);
   3522       }
   3523       log_verbose("\n");
   3524 
   3525     }
   3526 
   3527     ucol_closeElements(it);
   3528     ucol_close(coll);
   3529   }
   3530 #endif
   3531 }
   3532 
   3533 static void TestPrefixCompose(void) {
   3534   const char* rule1 =
   3535         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
   3536   /*
   3537   const char* test[] = {
   3538       "\\u30c6\\u30fc\\u30bf",
   3539       "\\u30c6\\u30a7\\u30bf",
   3540   };
   3541   */
   3542   {
   3543     UErrorCode status = U_ZERO_ERROR;
   3544     /*uint32_t i = 0;*/
   3545     /*UCollationElements *it = NULL;*/
   3546 /*    uint32_t CE;*/
   3547     UChar string[256];
   3548     uint32_t uStringLen;
   3549     UCollator *coll = NULL;
   3550 
   3551     uStringLen = u_unescape(rule1, string, 256);
   3552 
   3553     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   3554     ucol_close(coll);
   3555   }
   3556 
   3557 
   3558 }
   3559 
   3560 /*
   3561 [last variable] last variable value
   3562 [last primary ignorable] largest CE for primary ignorable
   3563 [last secondary ignorable] largest CE for secondary ignorable
   3564 [last tertiary ignorable] largest CE for tertiary ignorable
   3565 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
   3566 */
   3567 
   3568 static void TestRuleOptions(void) {
   3569   /* values here are hardcoded and are correct for the current UCA
   3570    * when the UCA changes, one might be forced to change these
   3571    * values. (\\u02d0, \\U00010FFFC etc...)
   3572    */
   3573   static const struct {
   3574     const char *rules;
   3575     const char *data[10];
   3576     const uint32_t len;
   3577   } tests[] = {
   3578     /* - all befores here amount to zero */
   3579     { "&[before 3][first tertiary ignorable]<<<a",
   3580         { "\\u0000", "a"}, 2
   3581     }, /* you cannot go before first tertiary ignorable */
   3582 
   3583     { "&[before 3][last tertiary ignorable]<<<a",
   3584         { "\\u0000", "a"}, 2
   3585     }, /* you cannot go before last tertiary ignorable */
   3586 
   3587     { "&[before 3][first secondary ignorable]<<<a",
   3588         { "\\u0000", "a"}, 2
   3589     }, /* you cannot go before first secondary ignorable */
   3590 
   3591     { "&[before 3][last secondary ignorable]<<<a",
   3592         { "\\u0000", "a"}, 2
   3593     }, /* you cannot go before first secondary ignorable */
   3594 
   3595     /* 'normal' befores */
   3596 
   3597     { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
   3598         {  "c", "b", "\\u0332", "a" }, 4
   3599     },
   3600 
   3601     /* we don't have a code point that corresponds to
   3602      * the last primary ignorable
   3603      */
   3604     { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
   3605         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
   3606     },
   3607 
   3608     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
   3609         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
   3610     },
   3611 
   3612     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
   3613         {  "c", "b", "\\uD834\\uDF71", "a", "\\u02d0" }, 5
   3614     },
   3615 
   3616     { "&[first regular]<a"
   3617       "&[before 1][first regular]<b",
   3618       { "b", "\\u02d0", "a", "\\u02d1"}, 4
   3619     },
   3620 
   3621     { "&[before 1][last regular]<b"
   3622       "&[last regular]<a",
   3623         { "b", "\\uD808\\uDF6E", "a", "\\u4e00" }, 4
   3624     },
   3625 
   3626     { "&[before 1][first implicit]<b"
   3627       "&[first implicit]<a",
   3628         { "b", "\\u4e00", "a", "\\u4e01"}, 4
   3629     },
   3630 
   3631     { "&[before 1][last implicit]<b"
   3632       "&[last implicit]<a",
   3633         { "b", "\\U0010FFFD", "a" }, 3
   3634     },
   3635 
   3636     { "&[last variable]<z"
   3637       "&[last primary ignorable]<x"
   3638       "&[last secondary ignorable]<<y"
   3639       "&[last tertiary ignorable]<<<w"
   3640       "&[top]<u",
   3641       {"\\ufffb",  "w", "y", "\\u20e3", "x", "\\u137c", "z", "u"}, 7
   3642     }
   3643 
   3644   };
   3645   uint32_t i;
   3646 
   3647 
   3648   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3649     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3650   }
   3651 }
   3652 
   3653 
   3654 static void TestOptimize(void) {
   3655   /* this is not really a test - just trying out
   3656    * whether copying of UCA contents will fail
   3657    * Cannot really test, since the functionality
   3658    * remains the same.
   3659    */
   3660   static const struct {
   3661     const char *rules;
   3662     const char *data[10];
   3663     const uint32_t len;
   3664   } tests[] = {
   3665     /* - all befores here amount to zero */
   3666     { "[optimize [\\uAC00-\\uD7FF]]",
   3667     { "a", "b"}, 2}
   3668   };
   3669   uint32_t i;
   3670 
   3671   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3672     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3673   }
   3674 }
   3675 
   3676 /*
   3677 cycheng (at) ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
   3678 weiv    ucol_strcollIter?
   3679 cycheng (at) ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
   3680 weiv    these are the input strings?
   3681 cycheng (at) ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
   3682 weiv    will check - could be a problem with utf-8 iterator
   3683 cycheng (at) ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
   3684 weiv    hmmm
   3685 cycheng (at) ca.ibm.c... note that we have a standalone high surrogate
   3686 weiv    that doesn't sound right
   3687 cycheng (at) ca.ibm.c... we got the same inconsistent results on AIX and Win2000
   3688 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
   3689 cycheng (at) ca.ibm.c... yes
   3690 weiv    and then do the comparison
   3691 cycheng (at) ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
   3692 weiv    utf-16 strings look like a little endian ones in the example you sent me
   3693 weiv    It could be a bug - let me try to test it out
   3694 cycheng (at) ca.ibm.c... ok
   3695 cycheng (at) ca.ibm.c... we can wait till the conf. call
   3696 cycheng (at) ca.ibm.c... next weke
   3697 weiv    that would be great
   3698 weiv    hmmm
   3699 weiv    I might be wrong
   3700 weiv    let me play with it some more
   3701 cycheng (at) ca.ibm.c... ok
   3702 cycheng (at) ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
   3703 cycheng (at) ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
   3704 cycheng (at) ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
   3705 weiv    ok
   3706 cycheng (at) ca.ibm.c... i ask sherman to send you more inconsistent data
   3707 weiv    thanks
   3708 cycheng (at) ca.ibm.c... the 4 strings we sent are just samples
   3709 */
   3710 #if 0
   3711 static void Alexis(void) {
   3712   UErrorCode status = U_ZERO_ERROR;
   3713   UCollator *coll = ucol_open("", &status);
   3714 
   3715 
   3716   const char utf16be[2][4] = {
   3717     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
   3718     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
   3719   };
   3720 
   3721   const char utf8[2][4] = {
   3722     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
   3723     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
   3724   };
   3725 
   3726   UCharIterator iterU161, iterU162;
   3727   UCharIterator iterU81, iterU82;
   3728 
   3729   UCollationResult resU16, resU8;
   3730 
   3731   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
   3732   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
   3733 
   3734   uiter_setUTF8(&iterU81, utf8[0], 4);
   3735   uiter_setUTF8(&iterU82, utf8[1], 4);
   3736 
   3737   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3738 
   3739   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
   3740   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
   3741 
   3742 
   3743   if(resU16 != resU8) {
   3744     log_err("different results\n");
   3745   }
   3746 
   3747   ucol_close(coll);
   3748 }
   3749 #endif
   3750 
   3751 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
   3752 static void Alexis2(void) {
   3753   UErrorCode status = U_ZERO_ERROR;
   3754   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   3755   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   3756   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   3757   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
   3758 
   3759   UConverter *conv = NULL;
   3760 
   3761   UCharIterator U16BEItS, U16BEItT;
   3762   UCharIterator U8ItS, U8ItT;
   3763 
   3764   UCollationResult resU16, resU16BE, resU8;
   3765 
   3766   static const char* const pairs[][2] = {
   3767     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
   3768     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
   3769     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
   3770     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
   3771     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
   3772     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
   3773     { "\\u0020", "\\u0020\\u0000"}
   3774 /*
   3775 5F20 (my result here)
   3776 5F204E008E3F
   3777 5F20 (your result here)
   3778 */
   3779   };
   3780 
   3781   int32_t i = 0;
   3782 
   3783   UCollator *coll = ucol_open("", &status);
   3784   if(status == U_FILE_ACCESS_ERROR) {
   3785     log_data_err("Is your data around?\n");
   3786     return;
   3787   } else if(U_FAILURE(status)) {
   3788     log_err("Error opening collator\n");
   3789     return;
   3790   }
   3791   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3792   conv = ucnv_open("UTF16BE", &status);
   3793   for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
   3794     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   3795     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   3796 
   3797     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
   3798 
   3799     log_verbose("Result of strcoll is %i\n", resU16);
   3800 
   3801     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
   3802     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
   3803 
   3804     /* use the original sizes, as the result from converter is in bytes */
   3805     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
   3806     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
   3807 
   3808     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
   3809 
   3810     log_verbose("Result of U16BE is %i\n", resU16BE);
   3811 
   3812     if(resU16 != resU16BE) {
   3813       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
   3814     }
   3815 
   3816     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
   3817     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
   3818 
   3819     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
   3820     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
   3821 
   3822     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
   3823 
   3824     if(resU16 != resU8) {
   3825       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
   3826     }
   3827 
   3828   }
   3829 
   3830   ucol_close(coll);
   3831   ucnv_close(conv);
   3832 }
   3833 
   3834 static void TestHebrewUCA(void) {
   3835   UErrorCode status = U_ZERO_ERROR;
   3836   static const char *first[] = {
   3837     "d790d6b8d79cd795d6bcd7a9",
   3838     "d790d79cd79ed7a7d799d799d7a1",
   3839     "d790d6b4d79ed795d6bcd7a9",
   3840   };
   3841 
   3842   char utf8String[3][256];
   3843   UChar utf16String[3][256];
   3844 
   3845   int32_t i = 0, j = 0;
   3846   int32_t sizeUTF8[3];
   3847   int32_t sizeUTF16[3];
   3848 
   3849   UCollator *coll = ucol_open("", &status);
   3850   if (U_FAILURE(status)) {
   3851       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
   3852       return;
   3853   }
   3854   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
   3855 
   3856   for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
   3857     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
   3858     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
   3859     log_verbose("%i: ");
   3860     for(j = 0; j < sizeUTF16[i]; j++) {
   3861       /*log_verbose("\\u%04X", utf16String[i][j]);*/
   3862       log_verbose("%04X", utf16String[i][j]);
   3863     }
   3864     log_verbose("\n");
   3865   }
   3866   for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
   3867     for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
   3868       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
   3869     }
   3870   }
   3871 
   3872   ucol_close(coll);
   3873 
   3874 }
   3875 
   3876 static void TestPartialSortKeyTermination(void) {
   3877   static const char* cases[] = {
   3878     "\\u1234\\u1234\\udc00",
   3879     "\\udc00\\ud800\\ud800"
   3880   };
   3881 
   3882   int32_t i = sizeof(UCollator);
   3883 
   3884   UErrorCode status = U_ZERO_ERROR;
   3885 
   3886   UCollator *coll = ucol_open("", &status);
   3887 
   3888   UCharIterator iter;
   3889 
   3890   UChar currCase[256];
   3891   int32_t length = 0;
   3892   int32_t pKeyLen = 0;
   3893 
   3894   uint8_t key[256];
   3895 
   3896   for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
   3897     uint32_t state[2] = {0, 0};
   3898     length = u_unescape(cases[i], currCase, 256);
   3899     uiter_setString(&iter, currCase, length);
   3900     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
   3901 
   3902     log_verbose("Done\n");
   3903 
   3904   }
   3905   ucol_close(coll);
   3906 }
   3907 
   3908 static void TestSettings(void) {
   3909   static const char* cases[] = {
   3910     "apple",
   3911       "Apple"
   3912   };
   3913 
   3914   static const char* locales[] = {
   3915     "",
   3916       "en"
   3917   };
   3918 
   3919   UErrorCode status = U_ZERO_ERROR;
   3920 
   3921   int32_t i = 0, j = 0;
   3922 
   3923   UChar source[256], target[256];
   3924   int32_t sLen = 0, tLen = 0;
   3925 
   3926   UCollator *collateObject = NULL;
   3927   for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
   3928     collateObject = ucol_open(locales[i], &status);
   3929     ucol_setStrength(collateObject, UCOL_PRIMARY);
   3930     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
   3931     for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
   3932       sLen = u_unescape(cases[j-1], source, 256);
   3933       source[sLen] = 0;
   3934       tLen = u_unescape(cases[j], target, 256);
   3935       source[tLen] = 0;
   3936       doTest(collateObject, source, target, UCOL_EQUAL);
   3937     }
   3938     ucol_close(collateObject);
   3939   }
   3940 }
   3941 
   3942 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
   3943     UErrorCode status = U_ZERO_ERROR;
   3944     int32_t errorNo = 0;
   3945     /*const UChar *sourceRules = NULL;*/
   3946     /*int32_t sourceRulesLen = 0;*/
   3947     UColAttributeValue french = UCOL_OFF;
   3948     int32_t cloneSize = 0;
   3949 
   3950     if(!ucol_equals(source, target)) {
   3951         log_err("Same collators, different address not equal\n");
   3952         errorNo++;
   3953     }
   3954     ucol_close(target);
   3955     if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
   3956         /* currently, safeClone is implemented through getRules/openRules
   3957         * so it is the same as the test below - I will comment that test out.
   3958         */
   3959         /* real thing */
   3960         target = ucol_safeClone(source, NULL, &cloneSize, &status);
   3961         if(U_FAILURE(status)) {
   3962             log_err("Error creating clone\n");
   3963             errorNo++;
   3964             return errorNo;
   3965         }
   3966         if(!ucol_equals(source, target)) {
   3967             log_err("Collator different from it's clone\n");
   3968             errorNo++;
   3969         }
   3970         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
   3971         if(french == UCOL_ON) {
   3972             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
   3973         } else {
   3974             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
   3975         }
   3976         if(U_FAILURE(status)) {
   3977             log_err("Error setting attributes\n");
   3978             errorNo++;
   3979             return errorNo;
   3980         }
   3981         if(ucol_equals(source, target)) {
   3982             log_err("Collators same even when options changed\n");
   3983             errorNo++;
   3984         }
   3985         ucol_close(target);
   3986         /* commented out since safeClone uses exactly the same technique */
   3987         /*
   3988         sourceRules = ucol_getRules(source, &sourceRulesLen);
   3989         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   3990         if(U_FAILURE(status)) {
   3991         log_err("Error instantiating target from rules\n");
   3992         errorNo++;
   3993         return errorNo;
   3994         }
   3995         if(!ucol_equals(source, target)) {
   3996         log_err("Collator different from collator that was created from the same rules\n");
   3997         errorNo++;
   3998         }
   3999         ucol_close(target);
   4000         */
   4001     }
   4002     return errorNo;
   4003 }
   4004 
   4005 
   4006 static void TestEquals(void) {
   4007     /* ucol_equals is not currently a public API. There is a chance that it will become
   4008     * something like this, but currently it is only used by RuleBasedCollator::operator==
   4009     */
   4010     /* test whether the two collators instantiated from the same locale are equal */
   4011     UErrorCode status = U_ZERO_ERROR;
   4012     UParseError parseError;
   4013     int32_t noOfLoc = uloc_countAvailable();
   4014     const char *locName = NULL;
   4015     UCollator *source = NULL, *target = NULL;
   4016     int32_t i = 0;
   4017 
   4018     const char* rules[] = {
   4019         "&l < lj <<< Lj <<< LJ",
   4020         "&n < nj <<< Nj <<< NJ",
   4021         "&ae <<< \\u00e4",
   4022         "&AE <<< \\u00c4"
   4023     };
   4024     /*
   4025     const char* badRules[] = {
   4026     "&l <<< Lj",
   4027     "&n < nj <<< nJ <<< NJ",
   4028     "&a <<< \\u00e4",
   4029     "&AE <<< \\u00c4 <<< x"
   4030     };
   4031     */
   4032 
   4033     UChar sourceRules[1024], targetRules[1024];
   4034     int32_t sourceRulesSize = 0, targetRulesSize = 0;
   4035     int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
   4036 
   4037     for(i = 0; i < rulesSize; i++) {
   4038         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
   4039         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
   4040     }
   4041 
   4042     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   4043     if(status == U_FILE_ACCESS_ERROR) {
   4044         log_data_err("Is your data around?\n");
   4045         return;
   4046     } else if(U_FAILURE(status)) {
   4047         log_err("Error opening collator\n");
   4048         return;
   4049     }
   4050     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   4051     if(!ucol_equals(source, target)) {
   4052         log_err("Equivalent collators not equal!\n");
   4053     }
   4054     ucol_close(source);
   4055     ucol_close(target);
   4056 
   4057     source = ucol_open("root", &status);
   4058     target = ucol_open("root", &status);
   4059     log_verbose("Testing root\n");
   4060     if(!ucol_equals(source, source)) {
   4061         log_err("Same collator not equal\n");
   4062     }
   4063     if(TestEqualsForCollator(locName, source, target)) {
   4064         log_err("Errors for root\n", locName);
   4065     }
   4066     ucol_close(source);
   4067 
   4068     for(i = 0; i<noOfLoc; i++) {
   4069         status = U_ZERO_ERROR;
   4070         locName = uloc_getAvailable(i);
   4071         /*if(hasCollationElements(locName)) {*/
   4072         log_verbose("Testing equality for locale %s\n", locName);
   4073         source = ucol_open(locName, &status);
   4074         target = ucol_open(locName, &status);
   4075         if (U_FAILURE(status)) {
   4076             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
   4077             continue;
   4078         }
   4079         if(TestEqualsForCollator(locName, source, target)) {
   4080             log_err("Errors for locale %s\n", locName);
   4081         }
   4082         ucol_close(source);
   4083         /*}*/
   4084     }
   4085 }
   4086 
   4087 static void TestJ2726(void) {
   4088     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
   4089     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
   4090     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
   4091     UErrorCode status = U_ZERO_ERROR;
   4092     UCollator *coll = ucol_open("en", &status);
   4093     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   4094     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   4095     doTest(coll, a, aSpace, UCOL_EQUAL);
   4096     doTest(coll, aSpace, a, UCOL_EQUAL);
   4097     doTest(coll, a, spaceA, UCOL_EQUAL);
   4098     doTest(coll, spaceA, a, UCOL_EQUAL);
   4099     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
   4100     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
   4101     ucol_close(coll);
   4102 }
   4103 
   4104 static void NullRule(void) {
   4105     UChar r[3] = {0};
   4106     UErrorCode status = U_ZERO_ERROR;
   4107     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   4108     if(U_SUCCESS(status)) {
   4109         log_err("This should have been an error!\n");
   4110         ucol_close(coll);
   4111     } else {
   4112         status = U_ZERO_ERROR;
   4113     }
   4114     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   4115     if(U_FAILURE(status)) {
   4116         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
   4117     } else {
   4118         ucol_close(coll);
   4119     }
   4120 }
   4121 
   4122 /**
   4123  * Test for CollationElementIterator previous and next for the whole set of
   4124  * unicode characters with normalization on.
   4125  */
   4126 static void TestNumericCollation(void)
   4127 {
   4128     UErrorCode status = U_ZERO_ERROR;
   4129 
   4130     const static char *basicTestStrings[]={
   4131     "hello1",
   4132     "hello2",
   4133     "hello2002",
   4134     "hello2003",
   4135     "hello123456",
   4136     "hello1234567",
   4137     "hello10000000",
   4138     "hello100000000",
   4139     "hello1000000000",
   4140     "hello10000000000",
   4141     };
   4142 
   4143     const static char *preZeroTestStrings[]={
   4144     "avery10000",
   4145     "avery010000",
   4146     "avery0010000",
   4147     "avery00010000",
   4148     "avery000010000",
   4149     "avery0000010000",
   4150     "avery00000010000",
   4151     "avery000000010000",
   4152     };
   4153 
   4154     const static char *thirtyTwoBitNumericStrings[]={
   4155     "avery42949672960",
   4156     "avery42949672961",
   4157     "avery42949672962",
   4158     "avery429496729610"
   4159     };
   4160 
   4161      const static char *longNumericStrings[]={
   4162      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
   4163         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
   4164         are treated as multiple collation elements. */
   4165     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
   4166     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
   4167     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
   4168     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
   4169     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
   4170     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
   4171     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
   4172     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
   4173     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
   4174     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
   4175     };
   4176 
   4177     const static char *supplementaryDigits[] = {
   4178       "\\uD835\\uDFCE", /* 0 */
   4179       "\\uD835\\uDFCF", /* 1 */
   4180       "\\uD835\\uDFD0", /* 2 */
   4181       "\\uD835\\uDFD1", /* 3 */
   4182       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
   4183       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
   4184       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
   4185       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
   4186       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
   4187       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
   4188     };
   4189 
   4190     const static char *foreignDigits[] = {
   4191       "\\u0661",
   4192         "\\u0662",
   4193         "\\u0663",
   4194       "\\u0661\\u0660",
   4195       "\\u0661\\u0662",
   4196       "\\u0661\\u0663",
   4197       "\\u0662\\u0660",
   4198       "\\u0662\\u0662",
   4199       "\\u0662\\u0663",
   4200       "\\u0663\\u0660",
   4201       "\\u0663\\u0662",
   4202       "\\u0663\\u0663"
   4203     };
   4204 
   4205     const static char *evenZeroes[] = {
   4206       "2000",
   4207       "2001",
   4208         "2002",
   4209         "2003"
   4210     };
   4211 
   4212     UColAttribute att = UCOL_NUMERIC_COLLATION;
   4213     UColAttributeValue val = UCOL_ON;
   4214 
   4215     /* Open our collator. */
   4216     UCollator* coll = ucol_open("root", &status);
   4217     if (U_FAILURE(status)){
   4218         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
   4219               myErrorName(status));
   4220         return;
   4221     }
   4222     genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
   4223     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
   4224     genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
   4225     genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
   4226     genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
   4227     genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
   4228 
   4229     /* Setting up our collator to do digits. */
   4230     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   4231     if (U_FAILURE(status)){
   4232         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
   4233               myErrorName(status));
   4234         return;
   4235     }
   4236 
   4237     /*
   4238        Testing that prepended zeroes still yield the correct collation behavior.
   4239        We expect that every element in our strings array will be equal.
   4240     */
   4241     genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
   4242 
   4243     ucol_close(coll);
   4244 }
   4245 
   4246 static void TestTibetanConformance(void)
   4247 {
   4248     const char* test[] = {
   4249         "\\u0FB2\\u0591\\u0F71\\u0061",
   4250         "\\u0FB2\\u0F71\\u0061"
   4251     };
   4252 
   4253     UErrorCode status = U_ZERO_ERROR;
   4254     UCollator *coll = ucol_open("", &status);
   4255     UChar source[100];
   4256     UChar target[100];
   4257     int result;
   4258     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   4259     if (U_SUCCESS(status)) {
   4260         u_unescape(test[0], source, 100);
   4261         u_unescape(test[1], target, 100);
   4262         doTest(coll, source, target, UCOL_EQUAL);
   4263         result = ucol_strcoll(coll, source, -1,   target, -1);
   4264         log_verbose("result %d\n", result);
   4265         if (UCOL_EQUAL != result) {
   4266             log_err("Tibetan comparison error\n");
   4267         }
   4268     }
   4269     ucol_close(coll);
   4270 
   4271     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
   4272 }
   4273 
   4274 static void TestPinyinProblem(void) {
   4275     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
   4276     genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
   4277 }
   4278 
   4279 #define TST_UCOL_MAX_INPUT 0x220001
   4280 #define topByte 0xFF000000;
   4281 #define bottomByte 0xFF;
   4282 #define fourBytes 0xFFFFFFFF;
   4283 
   4284 
   4285 static void showImplicit(UChar32 i) {
   4286     if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
   4287         log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
   4288     }
   4289 }
   4290 
   4291 static void TestImplicitGeneration(void) {
   4292     UErrorCode status = U_ZERO_ERROR;
   4293     UChar32 last = 0;
   4294     UChar32 current;
   4295     UChar32 i = 0, j = 0;
   4296     UChar32 roundtrip = 0;
   4297     UChar32 lastBottom = 0;
   4298     UChar32 currentBottom = 0;
   4299     UChar32 lastTop = 0;
   4300     UChar32 currentTop = 0;
   4301 
   4302     UCollator *coll = ucol_open("root", &status);
   4303     if(U_FAILURE(status)) {
   4304         log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   4305         return;
   4306     }
   4307 
   4308     uprv_uca_getRawFromImplicit(0xE20303E7);
   4309 
   4310     for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
   4311         current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
   4312 
   4313         /* check that it round-trips AND that all intervening ones are illegal*/
   4314         roundtrip = uprv_uca_getRawFromImplicit(current);
   4315         if (roundtrip != i) {
   4316             log_err("No roundtrip %08X\n", i);
   4317         }
   4318         if (last != 0) {
   4319             for (j = last + 1; j < current; ++j) {
   4320                 roundtrip = uprv_uca_getRawFromImplicit(j);
   4321                 /* raise an error if it *doesn't* find an error*/
   4322                 if (roundtrip != -1) {
   4323                     log_err("Fails to recognize illegal %08X\n", j);
   4324                 }
   4325             }
   4326         }
   4327         /* now do other consistency checks*/
   4328         lastBottom = last & bottomByte;
   4329         currentBottom = current & bottomByte;
   4330         lastTop = last & topByte;
   4331         currentTop = current & topByte;
   4332 
   4333         /* print out some values for spot-checking*/
   4334         if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
   4335             showImplicit(i-3);
   4336             showImplicit(i-2);
   4337             showImplicit(i-1);
   4338             showImplicit(i);
   4339             showImplicit(i+1);
   4340             showImplicit(i+2);
   4341         }
   4342         last = current;
   4343 
   4344         if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
   4345             log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
   4346         }
   4347     }
   4348     showImplicit(TST_UCOL_MAX_INPUT-2);
   4349     showImplicit(TST_UCOL_MAX_INPUT-1);
   4350     showImplicit(TST_UCOL_MAX_INPUT);
   4351     ucol_close(coll);
   4352 }
   4353 
   4354 /**
   4355  * Iterate through the given iterator, checking to see that all the strings
   4356  * in the expected array are present.
   4357  * @param expected array of strings we expect to see, or NULL
   4358  * @param expectedCount number of elements of expected, or 0
   4359  */
   4360 static int32_t checkUEnumeration(const char* msg,
   4361                                  UEnumeration* iter,
   4362                                  const char** expected,
   4363                                  int32_t expectedCount) {
   4364     UErrorCode ec = U_ZERO_ERROR;
   4365     int32_t i = 0, n, j, bit;
   4366     int32_t seenMask = 0;
   4367 
   4368     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
   4369     n = uenum_count(iter, &ec);
   4370     if (!assertSuccess("count", &ec)) return -1;
   4371     log_verbose("%s = [", msg);
   4372     for (;; ++i) {
   4373         const char* s = uenum_next(iter, NULL, &ec);
   4374         if (!assertSuccess("snext", &ec) || s == NULL) break;
   4375         if (i != 0) log_verbose(",");
   4376         log_verbose("%s", s);
   4377         /* check expected list */
   4378         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   4379             if ((seenMask&bit) == 0 &&
   4380                 uprv_strcmp(s, expected[j]) == 0) {
   4381                 seenMask |= bit;
   4382                 break;
   4383             }
   4384         }
   4385     }
   4386     log_verbose("] (%d)\n", i);
   4387     assertTrue("count verified", i==n);
   4388     /* did we see all expected strings? */
   4389     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   4390         if ((seenMask&bit)!=0) {
   4391             log_verbose("Ok: \"%s\" seen\n", expected[j]);
   4392         } else {
   4393             log_err("FAIL: \"%s\" not seen\n", expected[j]);
   4394         }
   4395     }
   4396     return n;
   4397 }
   4398 
   4399 /**
   4400  * Test new API added for separate collation tree.
   4401  */
   4402 static void TestSeparateTrees(void) {
   4403     UErrorCode ec = U_ZERO_ERROR;
   4404     UEnumeration *e = NULL;
   4405     int32_t n = -1;
   4406     UBool isAvailable;
   4407     char loc[256];
   4408 
   4409     static const char* AVAIL[] = { "en", "de" };
   4410 
   4411     static const char* KW[] = { "collation" };
   4412 
   4413     static const char* KWVAL[] = { "phonebook", "stroke" };
   4414 
   4415 #if !UCONFIG_NO_SERVICE
   4416     e = ucol_openAvailableLocales(&ec);
   4417     if (e != NULL) {
   4418         assertSuccess("ucol_openAvailableLocales", &ec);
   4419         assertTrue("ucol_openAvailableLocales!=0", e!=0);
   4420         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
   4421         /* Don't need to check n because we check list */
   4422         uenum_close(e);
   4423     } else {
   4424         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
   4425     }
   4426 #endif
   4427 
   4428     e = ucol_getKeywords(&ec);
   4429     if (e != NULL) {
   4430         assertSuccess("ucol_getKeywords", &ec);
   4431         assertTrue("ucol_getKeywords!=0", e!=0);
   4432         n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
   4433         /* Don't need to check n because we check list */
   4434         uenum_close(e);
   4435     } else {
   4436         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
   4437     }
   4438 
   4439     e = ucol_getKeywordValues(KW[0], &ec);
   4440     if (e != NULL) {
   4441         assertSuccess("ucol_getKeywordValues", &ec);
   4442         assertTrue("ucol_getKeywordValues!=0", e!=0);
   4443         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
   4444         /* Don't need to check n because we check list */
   4445         uenum_close(e);
   4446     } else {
   4447         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
   4448     }
   4449 
   4450     /* Try setting a warning before calling ucol_getKeywordValues */
   4451     ec = U_USING_FALLBACK_WARNING;
   4452     e = ucol_getKeywordValues(KW[0], &ec);
   4453     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
   4454         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
   4455         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
   4456         /* Don't need to check n because we check list */
   4457         uenum_close(e);
   4458     }
   4459 
   4460     /*
   4461 U_DRAFT int32_t U_EXPORT2
   4462 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
   4463                              const char* locale, UBool* isAvailable,
   4464                              UErrorCode* status);
   4465 }
   4466 */
   4467     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "fr",
   4468                                      &isAvailable, &ec);
   4469     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   4470         assertEquals("getFunctionalEquivalent(fr)", "fr", loc);
   4471         assertTrue("getFunctionalEquivalent(fr).isAvailable==TRUE",
   4472                    isAvailable == TRUE);
   4473     }
   4474 
   4475     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "fr_FR",
   4476                                      &isAvailable, &ec);
   4477     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   4478         assertEquals("getFunctionalEquivalent(fr_FR)", "fr", loc);
   4479         assertTrue("getFunctionalEquivalent(fr_FR).isAvailable==TRUE",
   4480                    isAvailable == TRUE);
   4481     }
   4482 }
   4483 
   4484 /* supercedes TestJ784 */
   4485 static void TestBeforePinyin(void) {
   4486     const static char rules[] = {
   4487         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
   4488         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
   4489         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
   4490         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
   4491         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
   4492         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
   4493     };
   4494 
   4495     const static char *test[] = {
   4496         "l\\u0101",
   4497         "la",
   4498         "l\\u0101n",
   4499         "lan ",
   4500         "l\\u0113",
   4501         "le",
   4502         "l\\u0113n",
   4503         "len"
   4504     };
   4505 
   4506     const static char *test2[] = {
   4507         "x\\u0101",
   4508         "x\\u0100",
   4509         "X\\u0101",
   4510         "X\\u0100",
   4511         "x\\u00E1",
   4512         "x\\u00C1",
   4513         "X\\u00E1",
   4514         "X\\u00C1",
   4515         "x\\u01CE",
   4516         "x\\u01CD",
   4517         "X\\u01CE",
   4518         "X\\u01CD",
   4519         "x\\u00E0",
   4520         "x\\u00C0",
   4521         "X\\u00E0",
   4522         "X\\u00C0",
   4523         "xa",
   4524         "xA",
   4525         "Xa",
   4526         "XA",
   4527         "x\\u0101x",
   4528         "x\\u0100x",
   4529         "x\\u00E1x",
   4530         "x\\u00C1x",
   4531         "x\\u01CEx",
   4532         "x\\u01CDx",
   4533         "x\\u00E0x",
   4534         "x\\u00C0x",
   4535         "xax",
   4536         "xAx"
   4537     };
   4538 
   4539     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
   4540     genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
   4541     genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
   4542     genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
   4543 }
   4544 
   4545 static void TestBeforeTightening(void) {
   4546     static const struct {
   4547         const char *rules;
   4548         UErrorCode expectedStatus;
   4549     } tests[] = {
   4550         { "&[before 1]a<x", U_ZERO_ERROR },
   4551         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
   4552         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
   4553         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
   4554         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
   4555         { "&[before 2]a<<x",U_ZERO_ERROR },
   4556         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
   4557         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
   4558         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
   4559         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
   4560         { "&[before 3]a<<<x",U_ZERO_ERROR },
   4561         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
   4562         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
   4563     };
   4564 
   4565     int32_t i = 0;
   4566 
   4567     UErrorCode status = U_ZERO_ERROR;
   4568     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   4569     uint32_t rlen = 0;
   4570 
   4571     UCollator *coll = NULL;
   4572 
   4573 
   4574     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   4575         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
   4576         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   4577         if(status != tests[i].expectedStatus) {
   4578             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
   4579                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
   4580         }
   4581         ucol_close(coll);
   4582         status = U_ZERO_ERROR;
   4583     }
   4584 
   4585 }
   4586 
   4587 #if 0
   4588 &m < a
   4589 &[before 1] a < x <<< X << q <<< Q < z
   4590 assert: m <<< M < x <<< X << q <<< Q < z < a < n
   4591 
   4592 &m < a
   4593 &[before 2] a << x <<< X << q <<< Q < z
   4594 assert: m <<< M < x <<< X << q <<< Q << a < z < n
   4595 
   4596 &m < a
   4597 &[before 3] a <<< x <<< X << q <<< Q < z
   4598 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
   4599 
   4600 
   4601 &m << a
   4602 &[before 1] a < x <<< X << q <<< Q < z
   4603 assert: x <<< X << q <<< Q < z < m <<< M << a < n
   4604 
   4605 &m << a
   4606 &[before 2] a << x <<< X << q <<< Q < z
   4607 assert: m <<< M << x <<< X << q <<< Q << a < z < n
   4608 
   4609 &m << a
   4610 &[before 3] a <<< x <<< X << q <<< Q < z
   4611 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
   4612 
   4613 
   4614 &m <<< a
   4615 &[before 1] a < x <<< X << q <<< Q < z
   4616 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
   4617 
   4618 &m <<< a
   4619 &[before 2] a << x <<< X << q <<< Q < z
   4620 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
   4621 
   4622 &m <<< a
   4623 &[before 3] a <<< x <<< X << q <<< Q < z
   4624 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
   4625 
   4626 
   4627 &[before 1] s < x <<< X << q <<< Q < z
   4628 assert: r <<< R < x <<< X << q <<< Q < z < s < n
   4629 
   4630 &[before 2] s << x <<< X << q <<< Q < z
   4631 assert: r <<< R < x <<< X << q <<< Q << s < z < n
   4632 
   4633 &[before 3] s <<< x <<< X << q <<< Q < z
   4634 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
   4635 
   4636 
   4637 &[before 1] \u24DC < x <<< X << q <<< Q < z
   4638 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
   4639 
   4640 &[before 2] \u24DC << x <<< X << q <<< Q < z
   4641 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
   4642 
   4643 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
   4644 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
   4645 #endif
   4646 
   4647 
   4648 #if 0
   4649 /* requires features not yet supported */
   4650 static void TestMoreBefore(void) {
   4651     static const struct {
   4652         const char* rules;
   4653         const char* order[16];
   4654         int32_t size;
   4655     } tests[] = {
   4656         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
   4657         { "m","M","x","X","q","Q","z","a","n" }, 9},
   4658         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
   4659         { "m","M","x","X","q","Q","a","z","n" }, 9},
   4660         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
   4661         { "m","M","x","X","a","q","Q","z","n" }, 9},
   4662         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
   4663         { "x","X","q","Q","z","m","M","a","n" }, 9},
   4664         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
   4665         { "m","M","x","X","q","Q","a","z","n" }, 9},
   4666         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
   4667         { "m","M","x","X","a","q","Q","z","n" }, 9},
   4668         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
   4669         { "x","X","q","Q","z","n","m","a","M" }, 9},
   4670         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
   4671         { "x","X","q","Q","m","a","M","z","n" }, 9},
   4672         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
   4673         { "m","x","X","a","M","q","Q","z","n" }, 9},
   4674         { "&[before 1] s < x <<< X << q <<< Q < z",
   4675         { "r","R","x","X","q","Q","z","s","n" }, 9},
   4676         { "&[before 2] s << x <<< X << q <<< Q < z",
   4677         { "r","R","x","X","q","Q","s","z","n" }, 9},
   4678         { "&[before 3] s <<< x <<< X << q <<< Q < z",
   4679         { "r","R","x","X","s","q","Q","z","n" }, 9},
   4680         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
   4681         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
   4682         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
   4683         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
   4684         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
   4685         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
   4686     };
   4687 
   4688     int32_t i = 0;
   4689 
   4690     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   4691         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
   4692     }
   4693 }
   4694 #endif
   4695 
   4696 static void TestTailorNULL( void ) {
   4697     const static char* rule = "&a <<< '\\u0000'";
   4698     UErrorCode status = U_ZERO_ERROR;
   4699     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   4700     uint32_t rlen = 0;
   4701     UChar a = 1, null = 0;
   4702     UCollationResult res = UCOL_EQUAL;
   4703 
   4704     UCollator *coll = NULL;
   4705 
   4706 
   4707     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
   4708     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   4709 
   4710     if(U_FAILURE(status)) {
   4711         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
   4712     } else {
   4713         res = ucol_strcoll(coll, &a, 1, &null, 1);
   4714 
   4715         if(res != UCOL_LESS) {
   4716             log_err("NULL was not tailored properly!\n");
   4717         }
   4718     }
   4719 
   4720     ucol_close(coll);
   4721 }
   4722 
   4723 static void
   4724 TestThaiSortKey(void)
   4725 {
   4726   UChar yamakan = 0x0E4E;
   4727   UErrorCode status = U_ZERO_ERROR;
   4728   uint8_t key[256];
   4729   int32_t keyLen = 0;
   4730   /* NOTE: there is a Thai tailoring that moves Yammakan. It should not move it, */
   4731   /* since it stays in the same relative position. This should be addressed in CLDR */
   4732   /* UCA 4.0 uint8_t expectedKey[256] = { 0x01, 0xd9, 0xb2, 0x01, 0x05, 0x00 }; */
   4733   /* UCA 4.1 uint8_t expectedKey[256] = { 0x01, 0xdb, 0x3a, 0x01, 0x05, 0x00 }; */
   4734   /* UCA 5.0 uint8_t expectedKey[256] = { 0x01, 0xdc, 0xce, 0x01, 0x05, 0x00 }; */
   4735   /* UCA 5.1 moves Yammakan */
   4736   uint8_t expectedKey[256] = { 0x01, 0xe0, 0x4e, 0x01, 0x05, 0x00 };
   4737   UCollator *coll = ucol_open("th", &status);
   4738   if(U_FAILURE(status)) {
   4739     log_err_status(status, "Could not open a collator, exiting (%s)\n", u_errorName(status));
   4740     return;
   4741   }
   4742 
   4743   keyLen = ucol_getSortKey(coll, &yamakan, 1, key, 256);
   4744   if(strcmp((char *)key, (char *)expectedKey)) {
   4745     log_err("Yammakan key is different from ICU 4.0!\n");
   4746   }
   4747 
   4748   ucol_close(coll);
   4749 }
   4750 
   4751 static void
   4752 TestUpperFirstQuaternary(void)
   4753 {
   4754   const char* tests[] = { "B", "b", "Bb", "bB" };
   4755   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
   4756   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
   4757   genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
   4758 }
   4759 
   4760 static void
   4761 TestJ4960(void)
   4762 {
   4763   const char* tests[] = { "\\u00e2T", "aT" };
   4764   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
   4765   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
   4766   const char* tests2[] = { "a", "A" };
   4767   const char* rule = "&[first tertiary ignorable]=A=a";
   4768   UColAttribute att2[] = { UCOL_CASE_LEVEL };
   4769   UColAttributeValue attVals2[] = { UCOL_ON };
   4770   /* Test whether we correctly ignore primary ignorables on case level when */
   4771   /* we have only primary & case level */
   4772   genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
   4773   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
   4774   /* and case level */
   4775   genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
   4776   /* Test whether completely ignorable letters have case level info (they shouldn't) */
   4777   genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
   4778 }
   4779 
   4780 static void
   4781 TestJ5223(void)
   4782 {
   4783   static const char *test = "this is a test string";
   4784   UChar ustr[256];
   4785   int32_t ustr_length = u_unescape(test, ustr, 256);
   4786   unsigned char sortkey[256];
   4787   int32_t sortkey_length;
   4788   UErrorCode status = U_ZERO_ERROR;
   4789   static UCollator *coll = NULL;
   4790   coll = ucol_open("root", &status);
   4791   if(U_FAILURE(status)) {
   4792     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   4793     return;
   4794   }
   4795   ucol_setStrength(coll, UCOL_PRIMARY);
   4796   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   4797   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   4798   if (U_FAILURE(status)) {
   4799     log_err("Failed setting atributes\n");
   4800     return;
   4801   }
   4802   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
   4803   if (sortkey_length > 256) return;
   4804 
   4805   /* we mark the position where the null byte should be written in advance */
   4806   sortkey[sortkey_length-1] = 0xAA;
   4807 
   4808   /* we set the buffer size one byte higher than needed */
   4809   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
   4810     sortkey_length+1);
   4811 
   4812   /* no error occurs (for me) */
   4813   if (sortkey[sortkey_length-1] == 0xAA) {
   4814     log_err("Hit bug at first try\n");
   4815   }
   4816 
   4817   /* we mark the position where the null byte should be written again */
   4818   sortkey[sortkey_length-1] = 0xAA;
   4819 
   4820   /* this time we set the buffer size to the exact amount needed */
   4821   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
   4822     sortkey_length);
   4823 
   4824   /* now the trailing null byte is not written */
   4825   if (sortkey[sortkey_length-1] == 0xAA) {
   4826     log_err("Hit bug at second try\n");
   4827   }
   4828 
   4829   ucol_close(coll);
   4830 }
   4831 
   4832 /* Regression test for Thai partial sort key problem */
   4833 static void
   4834 TestJ5232(void)
   4835 {
   4836     const static char *test[] = {
   4837         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
   4838         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
   4839     };
   4840 
   4841     genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
   4842 }
   4843 
   4844 static void
   4845 TestJ5367(void)
   4846 {
   4847     const static char *test[] = { "a", "y" };
   4848     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
   4849     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
   4850 }
   4851 
   4852 static void
   4853 TestVI5913(void)
   4854 {
   4855     UErrorCode status = U_ZERO_ERROR;
   4856     int32_t i, j;
   4857     UCollator *coll =NULL;
   4858     uint8_t  resColl[100], expColl[100];
   4859     int32_t  rLen, tLen, ruleLen, sLen, kLen;
   4860     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &a<0x1FF3-omega with Ypogegrammeni*/
   4861     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
   4862     UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0};  /* &z<a+e with circumflex.*/
   4863     static const UChar tData[][20]={
   4864         {0x1EAC, 0},
   4865         {0x0041, 0x0323, 0x0302, 0},
   4866         {0x1EA0, 0x0302, 0},
   4867         {0x00C2, 0x0323, 0},
   4868         {0x1ED8, 0},  /* O with dot and circumflex */
   4869         {0x1ECC, 0x0302, 0},
   4870         {0x1EB7, 0},
   4871         {0x1EA1, 0x0306, 0},
   4872     };
   4873     static const UChar tailorData[][20]={
   4874         {0x1FA2, 0},  /* Omega with 3 combining marks */
   4875         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
   4876         {0x1FF3, 0x0313, 0x0300, 0},
   4877         {0x1F60, 0x0300, 0x0345, 0},
   4878         {0x1F62, 0x0345, 0},
   4879         {0x1FA0, 0x0300, 0},
   4880     };
   4881     static const UChar tailorData2[][20]={
   4882         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
   4883         {0x0073, 0x0323, 0x030C, 0},
   4884         {0x0073, 0x030C, 0x0323, 0},
   4885     };
   4886     static const UChar tailorData3[][20]={
   4887         {0x007a, 0},  /*  z */
   4888         {0x0061, 0x0065, 0},  /*  a + e */
   4889         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
   4890         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
   4891         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
   4892         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
   4893         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
   4894         {0x00EA, 0},  /* e with circumflex  */
   4895     };
   4896 
   4897     /* Test Vietnamese sort. */
   4898     coll = ucol_open("vi", &status);
   4899     if(U_FAILURE(status)) {
   4900         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
   4901         return;
   4902     }
   4903     log_verbose("\n\nVI collation:");
   4904     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
   4905         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
   4906     }
   4907     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
   4908         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
   4909     }
   4910     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
   4911         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
   4912     }
   4913     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
   4914         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
   4915     }
   4916 
   4917     for (j=0; j<8; j++) {
   4918         tLen = u_strlen(tData[j]);
   4919         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
   4920         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
   4921         for(i = 0; i<rLen; i++) {
   4922             log_verbose(" %02X", resColl[i]);
   4923         }
   4924     }
   4925 
   4926     ucol_close(coll);
   4927 
   4928     /* Test Romanian sort. */
   4929     coll = ucol_open("ro", &status);
   4930     log_verbose("\n\nRO collation:");
   4931     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
   4932         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
   4933     }
   4934     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
   4935         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
   4936     }
   4937     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
   4938         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
   4939     }
   4940 
   4941     for (j=4; j<8; j++) {
   4942         tLen = u_strlen(tData[j]);
   4943         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
   4944         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
   4945         for(i = 0; i<rLen; i++) {
   4946             log_verbose(" %02X", resColl[i]);
   4947         }
   4948     }
   4949     ucol_close(coll);
   4950 
   4951     /* Test the precomposed Greek character with 3 combining marks. */
   4952     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
   4953     ruleLen = u_strlen(rule);
   4954     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   4955     if (U_FAILURE(status)) {
   4956         log_err("ucol_openRules failed with %s\n", u_errorName(status));
   4957         return;
   4958     }
   4959     sLen = u_strlen(tailorData[0]);
   4960     for (j=1; j<6; j++) {
   4961         tLen = u_strlen(tailorData[j]);
   4962         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
   4963             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
   4964         }
   4965     }
   4966     /* Test getSortKey. */
   4967     tLen = u_strlen(tailorData[0]);
   4968     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
   4969     for (j=0; j<6; j++) {
   4970         tLen = u_strlen(tailorData[j]);
   4971         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
   4972         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   4973             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   4974             for(i = 0; i<rLen; i++) {
   4975                 log_err(" %02X", resColl[i]);
   4976             }
   4977         }
   4978     }
   4979     ucol_close(coll);
   4980 
   4981     log_verbose("\n\nTailoring test for s with caron:");
   4982     ruleLen = u_strlen(rule2);
   4983     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   4984     tLen = u_strlen(tailorData2[0]);
   4985     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
   4986     for (j=1; j<3; j++) {
   4987         tLen = u_strlen(tailorData2[j]);
   4988         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
   4989         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   4990             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   4991             for(i = 0; i<rLen; i++) {
   4992                 log_err(" %02X", resColl[i]);
   4993             }
   4994         }
   4995     }
   4996     ucol_close(coll);
   4997 
   4998     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
   4999     ruleLen = u_strlen(rule3);
   5000     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5001     tLen = u_strlen(tailorData3[3]);
   5002     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
   5003     for (j=4; j<6; j++) {
   5004         tLen = u_strlen(tailorData3[j]);
   5005         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
   5006 
   5007         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   5008             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5009             for(i = 0; i<rLen; i++) {
   5010                 log_err(" %02X", resColl[i]);
   5011             }
   5012         }
   5013 
   5014         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5015          for(i = 0; i<rLen; i++) {
   5016              log_verbose(" %02X", resColl[i]);
   5017          }
   5018     }
   5019     ucol_close(coll);
   5020 }
   5021 
   5022 static void
   5023 TestTailor6179(void)
   5024 {
   5025     UErrorCode status = U_ZERO_ERROR;
   5026     int32_t i;
   5027     UCollator *coll =NULL;
   5028     uint8_t  resColl[100];
   5029     int32_t  rLen, tLen, ruleLen;
   5030     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
   5031     UChar rule1[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
   5032             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
   5033             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
   5034             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
   5035     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
   5036     UChar rule2[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
   5037             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
   5038             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
   5039             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
   5040             0x3C,0x3C,0x20,0x62,0};
   5041 
   5042     UChar tData1[][20]={
   5043         {0x61, 0},
   5044         {0x62, 0},
   5045         { 0xFDD0,0x009E, 0}
   5046     };
   5047     UChar tData2[][20]={
   5048             {0x61, 0},
   5049             {0x62, 0},
   5050             { 0xFDD0,0x009E, 0}
   5051      };
   5052 
   5053     /* UCA5.1, the value may increase in later version. */
   5054     uint8_t firstPrimaryIgnCE[6]={1, 87, 1, 5, 1, 0};
   5055     uint8_t lastPrimaryIgnCE[6]={1, 0xE7, 0xB9, 1, 5, 0};
   5056     uint8_t firstSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};
   5057     uint8_t lastSecondaryIgnCE[6]={1, 1, 0x05, 0};
   5058 
   5059     /* Test [Last Primary ignorable] */
   5060 
   5061     log_verbose("\n\nTailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b ");
   5062     ruleLen = u_strlen(rule1);
   5063     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5064     if (U_FAILURE(status)) {
   5065         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
   5066         return;
   5067     }
   5068     tLen = u_strlen(tData1[0]);
   5069     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
   5070     if (uprv_memcmp(resColl, lastPrimaryIgnCE, uprv_min(rLen,6)) < 0) {
   5071         log_err("\n Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
   5072         for(i = 0; i<rLen; i++) {
   5073             log_err(" %02X", resColl[i]);
   5074         }
   5075     }
   5076     tLen = u_strlen(tData1[1]);
   5077     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
   5078     if (uprv_memcmp(resColl, firstPrimaryIgnCE, uprv_min(rLen, 6)) < 0) {
   5079         log_err("\n Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
   5080         for(i = 0; i<rLen; i++) {
   5081             log_err(" %02X", resColl[i]);
   5082         }
   5083     }
   5084     ucol_close(coll);
   5085 
   5086 
   5087     /* Test [Last Secondary ignorable] */
   5088     log_verbose("\n\nTailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b ");
   5089     ruleLen = u_strlen(rule1);
   5090     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5091     if (U_FAILURE(status)) {
   5092         log_err("Tailoring test: &[last primary ignorable] failed!");
   5093         return;
   5094     }
   5095     tLen = u_strlen(tData2[0]);
   5096     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
   5097     log_verbose("\n Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
   5098     for(i = 0; i<rLen; i++) {
   5099         log_verbose(" %02X", resColl[i]);
   5100     }
   5101     if (uprv_memcmp(resColl, lastSecondaryIgnCE, uprv_min(rLen, 3)) < 0) {
   5102         log_err("\n Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
   5103         for(i = 0; i<rLen; i++) {
   5104             log_err(" %02X", resColl[i]);
   5105         }
   5106     }
   5107     tLen = u_strlen(tData2[1]);
   5108     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
   5109     log_verbose("\n Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
   5110     for(i = 0; i<rLen; i++) {
   5111         log_verbose(" %02X", resColl[i]);
   5112     }
   5113     if (uprv_memcmp(resColl, firstSecondaryIgnCE, uprv_min(rLen, 4)) < 0) {
   5114         log_err("\n Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
   5115         for(i = 0; i<rLen; i++) {
   5116             log_err(" %02X", resColl[i]);
   5117         }
   5118     }
   5119     ucol_close(coll);
   5120 }
   5121 
   5122 static void
   5123 TestUCAPrecontext(void)
   5124 {
   5125     UErrorCode status = U_ZERO_ERROR;
   5126     int32_t i, j;
   5127     UCollator *coll =NULL;
   5128     uint8_t  resColl[100], prevColl[100];
   5129     int32_t  rLen, tLen, ruleLen;
   5130     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
   5131     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
   5132     /* & l middle-dot << a  a is an expansion. */
   5133 
   5134     UChar tData1[][20]={
   5135             { 0xb7, 0},  /* standalone middle dot(0xb7) */
   5136             { 0x387, 0}, /* standalone middle dot(0x387) */
   5137             { 0x61, 0},  /* a */
   5138             { 0x6C, 0},  /* l */
   5139             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
   5140             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
   5141             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
   5142             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
   5143             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
   5144             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
   5145             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
   5146      };
   5147 
   5148     log_verbose("\n\nEN collation:");
   5149     coll = ucol_open("en", &status);
   5150     if (U_FAILURE(status)) {
   5151         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
   5152         return;
   5153     }
   5154     for (j=0; j<11; j++) {
   5155         tLen = u_strlen(tData1[j]);
   5156         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5157         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5158             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5159                     j, tData1[j]);
   5160         }
   5161         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5162         for(i = 0; i<rLen; i++) {
   5163             log_verbose(" %02X", resColl[i]);
   5164         }
   5165         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5166      }
   5167      ucol_close(coll);
   5168 
   5169 
   5170      log_verbose("\n\nJA collation:");
   5171      coll = ucol_open("ja", &status);
   5172      if (U_FAILURE(status)) {
   5173          log_err("Tailoring test: &z <<a|- failed!");
   5174          return;
   5175      }
   5176      for (j=0; j<11; j++) {
   5177          tLen = u_strlen(tData1[j]);
   5178          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5179          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5180              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5181                      j, tData1[j]);
   5182          }
   5183          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5184          for(i = 0; i<rLen; i++) {
   5185              log_verbose(" %02X", resColl[i]);
   5186          }
   5187          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5188       }
   5189       ucol_close(coll);
   5190 
   5191 
   5192       log_verbose("\n\nTailoring test: & middle dot < a ");
   5193       ruleLen = u_strlen(rule1);
   5194       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5195       if (U_FAILURE(status)) {
   5196           log_err("Tailoring test: & middle dot < a failed!");
   5197           return;
   5198       }
   5199       for (j=0; j<11; j++) {
   5200           tLen = u_strlen(tData1[j]);
   5201           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5202           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5203               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5204                       j, tData1[j]);
   5205           }
   5206           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5207           for(i = 0; i<rLen; i++) {
   5208               log_verbose(" %02X", resColl[i]);
   5209           }
   5210           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5211        }
   5212        ucol_close(coll);
   5213 
   5214 
   5215        log_verbose("\n\nTailoring test: & l middle-dot << a ");
   5216        ruleLen = u_strlen(rule2);
   5217        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5218        if (U_FAILURE(status)) {
   5219            log_err("Tailoring test: & l middle-dot << a failed!");
   5220            return;
   5221        }
   5222        for (j=0; j<11; j++) {
   5223            tLen = u_strlen(tData1[j]);
   5224            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5225            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5226                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5227                        j, tData1[j]);
   5228            }
   5229            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
   5230                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
   5231                        j, tData1[j]);
   5232            }
   5233            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5234            for(i = 0; i<rLen; i++) {
   5235                log_verbose(" %02X", resColl[i]);
   5236            }
   5237            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5238         }
   5239         ucol_close(coll);
   5240 }
   5241 
   5242 static void
   5243 TestOutOfBuffer5468(void)
   5244 {
   5245     static const char *test = "\\u4e00";
   5246     UChar ustr[256];
   5247     int32_t ustr_length = u_unescape(test, ustr, 256);
   5248     unsigned char shortKeyBuf[1];
   5249     int32_t sortkey_length;
   5250     UErrorCode status = U_ZERO_ERROR;
   5251     static UCollator *coll = NULL;
   5252 
   5253     coll = ucol_open("root", &status);
   5254     if(U_FAILURE(status)) {
   5255       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   5256       return;
   5257     }
   5258     ucol_setStrength(coll, UCOL_PRIMARY);
   5259     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   5260     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   5261     if (U_FAILURE(status)) {
   5262       log_err("Failed setting atributes\n");
   5263       return;
   5264     }
   5265 
   5266     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
   5267     if (sortkey_length != 4) {
   5268         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
   5269     }
   5270     log_verbose("length of sortKey is %d", sortkey_length);
   5271     ucol_close(coll);
   5272 }
   5273 
   5274 #define TSKC_DATA_SIZE 5
   5275 #define TSKC_BUF_SIZE  50
   5276 static void
   5277 TestSortKeyConsistency(void)
   5278 {
   5279     UErrorCode icuRC = U_ZERO_ERROR;
   5280     UCollator* ucol;
   5281     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
   5282 
   5283     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
   5284     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
   5285     int32_t i, j, i2;
   5286 
   5287     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
   5288     if (U_FAILURE(icuRC))
   5289     {
   5290         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
   5291         return;
   5292     }
   5293 
   5294     for (i = 0; i < TSKC_DATA_SIZE; i++)
   5295     {
   5296         UCharIterator uiter;
   5297         uint32_t state[2] = { 0, 0 };
   5298         int32_t dataLen = i+1;
   5299         for (j=0; j<TSKC_BUF_SIZE; j++)
   5300             bufFull[i][j] = bufPart[i][j] = 0;
   5301 
   5302         /* Full sort key */
   5303         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
   5304 
   5305         /* Partial sort key */
   5306         uiter_setString(&uiter, data, dataLen);
   5307         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
   5308         if (U_FAILURE(icuRC))
   5309         {
   5310             log_err("ucol_nextSortKeyPart failed\n");
   5311             ucol_close(ucol);
   5312             return;
   5313         }
   5314 
   5315         for (i2=0; i2<i; i2++)
   5316         {
   5317             UBool fullMatch = TRUE;
   5318             UBool partMatch = TRUE;
   5319             for (j=0; j<TSKC_BUF_SIZE; j++)
   5320             {
   5321                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
   5322                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
   5323             }
   5324             if (fullMatch != partMatch) {
   5325                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
   5326                                   : "partial key was consistent, but full key changed\n");
   5327                 ucol_close(ucol);
   5328                 return;
   5329             }
   5330         }
   5331     }
   5332 
   5333     /*=============================================*/
   5334    ucol_close(ucol);
   5335 }
   5336 
   5337 /* ticket: 6101 */
   5338 static void TestCroatianSortKey(void) {
   5339     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
   5340     UErrorCode status = U_ZERO_ERROR;
   5341     UCollator *ucol;
   5342     UCharIterator iter;
   5343 
   5344     static const UChar text[] = { 0x0044, 0xD81A };
   5345 
   5346     size_t length = sizeof(text)/sizeof(*text);
   5347 
   5348     uint8_t textSortKey[32];
   5349     size_t lenSortKey = 32;
   5350     size_t actualSortKeyLen;
   5351     uint32_t uStateInfo[2] = { 0, 0 };
   5352 
   5353     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
   5354     if (U_FAILURE(status)) {
   5355         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
   5356         return;
   5357     }
   5358 
   5359     uiter_setString(&iter, text, length);
   5360 
   5361     actualSortKeyLen = ucol_nextSortKeyPart(
   5362         ucol, &iter, (uint32_t*)uStateInfo,
   5363         textSortKey, lenSortKey, &status
   5364         );
   5365 
   5366     if (actualSortKeyLen == lenSortKey) {
   5367         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
   5368     }
   5369 
   5370     ucol_close(ucol);
   5371 }
   5372 
   5373 /* ticket: 6140 */
   5374 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
   5375  * they are both Hiragana and Katakana
   5376  */
   5377 #define SORTKEYLEN 50
   5378 static void TestHiragana(void) {
   5379     UErrorCode status = U_ZERO_ERROR;
   5380     UCollator* ucol;
   5381     UCollationResult strcollresult;
   5382     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
   5383     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
   5384     int32_t data1Len = sizeof(data1)/sizeof(*data1);
   5385     int32_t data2Len = sizeof(data2)/sizeof(*data2);
   5386     int32_t i, j;
   5387     uint8_t sortKey1[SORTKEYLEN];
   5388     uint8_t sortKey2[SORTKEYLEN];
   5389 
   5390     UCharIterator uiter1;
   5391     UCharIterator uiter2;
   5392     uint32_t state1[2] = { 0, 0 };
   5393     uint32_t state2[2] = { 0, 0 };
   5394     int32_t keySize1;
   5395     int32_t keySize2;
   5396 
   5397     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
   5398             &status);
   5399     if (U_FAILURE(status)) {
   5400         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
   5401         return;
   5402     }
   5403 
   5404     /* Start of full sort keys */
   5405     /* Full sort key1 */
   5406     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
   5407     /* Full sort key2 */
   5408     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
   5409     if (keySize1 == keySize2) {
   5410         for (i = 0; i < keySize1; i++) {
   5411             if (sortKey1[i] != sortKey2[i]) {
   5412                 log_err("Full sort keys are different. Should be equal.");
   5413             }
   5414         }
   5415     } else {
   5416         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
   5417     }
   5418     /* End of full sort keys */
   5419 
   5420     /* Start of partial sort keys */
   5421     /* Partial sort key1 */
   5422     uiter_setString(&uiter1, data1, data1Len);
   5423     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
   5424     /* Partial sort key2 */
   5425     uiter_setString(&uiter2, data2, data2Len);
   5426     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
   5427     if (U_SUCCESS(status) && keySize1 == keySize2) {
   5428         for (j = 0; j < keySize1; j++) {
   5429             if (sortKey1[j] != sortKey2[j]) {
   5430                 log_err("Partial sort keys are different. Should be equal");
   5431             }
   5432         }
   5433     } else {
   5434         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
   5435     }
   5436     /* End of partial sort keys */
   5437 
   5438     /* Start of strcoll */
   5439     /* Use ucol_strcoll() to determine ordering */
   5440     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
   5441     if (strcollresult != UCOL_EQUAL) {
   5442         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
   5443     }
   5444 
   5445     ucol_close(ucol);
   5446 }
   5447 
   5448 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
   5449 
   5450 void addMiscCollTest(TestNode** root)
   5451 {
   5452     TEST(TestRuleOptions);
   5453     TEST(TestBeforePrefixFailure);
   5454     TEST(TestContractionClosure);
   5455     TEST(TestPrefixCompose);
   5456     TEST(TestStrCollIdenticalPrefix);
   5457     TEST(TestPrefix);
   5458     TEST(TestNewJapanese);
   5459     /*TEST(TestLimitations);*/
   5460     TEST(TestNonChars);
   5461     TEST(TestExtremeCompression);
   5462     TEST(TestSurrogates);
   5463     TEST(TestVariableTopSetting);
   5464     TEST(TestBocsuCoverage);
   5465     TEST(TestCyrillicTailoring);
   5466     TEST(TestCase);
   5467     TEST(IncompleteCntTest);
   5468     TEST(BlackBirdTest);
   5469     TEST(FunkyATest);
   5470     TEST(BillFairmanTest);
   5471     TEST(RamsRulesTest);
   5472     TEST(IsTailoredTest);
   5473     TEST(TestCollations);
   5474     TEST(TestChMove);
   5475     TEST(TestImplicitTailoring);
   5476     TEST(TestFCDProblem);
   5477     TEST(TestEmptyRule);
   5478     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
   5479     TEST(TestJ815);
   5480     /*TEST(TestJ831);*/ /* we changed lv locale */
   5481     TEST(TestBefore);
   5482     TEST(TestRedundantRules);
   5483     TEST(TestExpansionSyntax);
   5484     TEST(TestHangulTailoring);
   5485     TEST(TestUCARules);
   5486     TEST(TestIncrementalNormalize);
   5487     TEST(TestComposeDecompose);
   5488     TEST(TestCompressOverlap);
   5489     TEST(TestContraction);
   5490     TEST(TestExpansion);
   5491     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
   5492     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
   5493     TEST(TestOptimize);
   5494     TEST(TestSuppressContractions);
   5495     TEST(Alexis2);
   5496     TEST(TestHebrewUCA);
   5497     TEST(TestPartialSortKeyTermination);
   5498     TEST(TestSettings);
   5499     TEST(TestEquals);
   5500     TEST(TestJ2726);
   5501     TEST(NullRule);
   5502     TEST(TestNumericCollation);
   5503     TEST(TestTibetanConformance);
   5504     TEST(TestPinyinProblem);
   5505     TEST(TestImplicitGeneration);
   5506     TEST(TestSeparateTrees);
   5507     TEST(TestBeforePinyin);
   5508     TEST(TestBeforeTightening);
   5509     /*TEST(TestMoreBefore);*/
   5510     TEST(TestTailorNULL);
   5511     TEST(TestThaiSortKey);
   5512     TEST(TestUpperFirstQuaternary);
   5513     TEST(TestJ4960);
   5514     TEST(TestJ5223);
   5515     TEST(TestJ5232);
   5516     TEST(TestJ5367);
   5517     TEST(TestHiragana);
   5518     TEST(TestSortKeyConsistency);
   5519     TEST(TestVI5913);  /* VI, RO tailored rules */
   5520     TEST(TestCroatianSortKey);
   5521     TEST(TestTailor6179);
   5522     TEST(TestUCAPrecontext);
   5523     TEST(TestOutOfBuffer5468);
   5524 }
   5525 
   5526 #endif /* #if !UCONFIG_NO_COLLATION */
   5527