Home | History | Annotate | Download | only in cintltst
      1 
      2 /********************************************************************
      3  * COPYRIGHT:
      4  * Copyright (c) 2001-2012, International Business Machines Corporation and
      5  * others. All Rights Reserved.
      6  ********************************************************************/
      7 /*******************************************************************************
      8 *
      9 * File cmsccoll.C
     10 *
     11 *******************************************************************************/
     12 /**
     13  * These are the tests specific to ICU 1.8 and above, that I didn't know where
     14  * to fit.
     15  */
     16 
     17 #include <stdio.h>
     18 
     19 #include "unicode/utypes.h"
     20 
     21 #if !UCONFIG_NO_COLLATION
     22 
     23 #include "unicode/ucol.h"
     24 #include "unicode/ucoleitr.h"
     25 #include "unicode/uloc.h"
     26 #include "cintltst.h"
     27 #include "ccolltst.h"
     28 #include "callcoll.h"
     29 #include "unicode/ustring.h"
     30 #include "string.h"
     31 #include "ucol_imp.h"
     32 #include "ucol_tok.h"
     33 #include "cmemory.h"
     34 #include "cstring.h"
     35 #include "uassert.h"
     36 #include "unicode/parseerr.h"
     37 #include "unicode/ucnv.h"
     38 #include "unicode/ures.h"
     39 #include "unicode/uscript.h"
     40 #include "unicode/utf16.h"
     41 #include "uparse.h"
     42 #include "putilimp.h"
     43 
     44 
     45 #define LEN(a) (sizeof(a)/sizeof(a[0]))
     46 
     47 #define MAX_TOKEN_LEN 16
     48 
     49 typedef UCollationResult tst_strcoll(void *collator, const int object,
     50                         const UChar *source, const int sLen,
     51                         const UChar *target, const int tLen);
     52 
     53 
     54 
     55 const static char cnt1[][10] = {
     56 
     57   "AA",
     58   "AC",
     59   "AZ",
     60   "AQ",
     61   "AB",
     62   "ABZ",
     63   "ABQ",
     64   "Z",
     65   "ABC",
     66   "Q",
     67   "B"
     68 };
     69 
     70 const static char cnt2[][10] = {
     71   "DA",
     72   "DAD",
     73   "DAZ",
     74   "MAR",
     75   "Z",
     76   "DAVIS",
     77   "MARK",
     78   "DAV",
     79   "DAVI"
     80 };
     81 
     82 static void IncompleteCntTest(void)
     83 {
     84   UErrorCode status = U_ZERO_ERROR;
     85   UChar temp[90];
     86   UChar t1[90];
     87   UChar t2[90];
     88 
     89   UCollator *coll =  NULL;
     90   uint32_t i = 0, j = 0;
     91   uint32_t size = 0;
     92 
     93   u_uastrcpy(temp, " & Z < ABC < Q < B");
     94 
     95   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
     96 
     97   if(U_SUCCESS(status)) {
     98     size = sizeof(cnt1)/sizeof(cnt1[0]);
     99     for(i = 0; i < size-1; i++) {
    100       for(j = i+1; j < size; j++) {
    101         UCollationElements *iter;
    102         u_uastrcpy(t1, cnt1[i]);
    103         u_uastrcpy(t2, cnt1[j]);
    104         doTest(coll, t1, t2, UCOL_LESS);
    105         /* synwee : added collation element iterator test */
    106         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    107         if (U_FAILURE(status)) {
    108           log_err("Creation of iterator failed\n");
    109           break;
    110         }
    111         backAndForth(iter);
    112         ucol_closeElements(iter);
    113       }
    114     }
    115   }
    116 
    117   ucol_close(coll);
    118 
    119 
    120   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
    121   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
    122 
    123   if(U_SUCCESS(status)) {
    124     size = sizeof(cnt2)/sizeof(cnt2[0]);
    125     for(i = 0; i < size-1; i++) {
    126       for(j = i+1; j < size; j++) {
    127         UCollationElements *iter;
    128         u_uastrcpy(t1, cnt2[i]);
    129         u_uastrcpy(t2, cnt2[j]);
    130         doTest(coll, t1, t2, UCOL_LESS);
    131 
    132         /* synwee : added collation element iterator test */
    133         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    134         if (U_FAILURE(status)) {
    135           log_err("Creation of iterator failed\n");
    136           break;
    137         }
    138         backAndForth(iter);
    139         ucol_closeElements(iter);
    140       }
    141     }
    142   }
    143 
    144   ucol_close(coll);
    145 
    146 
    147 }
    148 
    149 const static char shifted[][20] = {
    150   "black bird",
    151   "black-bird",
    152   "blackbird",
    153   "black Bird",
    154   "black-Bird",
    155   "blackBird",
    156   "black birds",
    157   "black-birds",
    158   "blackbirds"
    159 };
    160 
    161 const static UCollationResult shiftedTert[] = {
    162   UCOL_EQUAL,
    163   UCOL_EQUAL,
    164   UCOL_EQUAL,
    165   UCOL_LESS,
    166   UCOL_EQUAL,
    167   UCOL_EQUAL,
    168   UCOL_LESS,
    169   UCOL_EQUAL,
    170   UCOL_EQUAL
    171 };
    172 
    173 const static char nonignorable[][20] = {
    174   "black bird",
    175   "black Bird",
    176   "black birds",
    177   "black-bird",
    178   "black-Bird",
    179   "black-birds",
    180   "blackbird",
    181   "blackBird",
    182   "blackbirds"
    183 };
    184 
    185 static void BlackBirdTest(void) {
    186   UErrorCode status = U_ZERO_ERROR;
    187   UChar t1[90];
    188   UChar t2[90];
    189 
    190   uint32_t i = 0, j = 0;
    191   uint32_t size = 0;
    192   UCollator *coll = ucol_open("en_US", &status);
    193 
    194   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
    195   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
    196 
    197   if(U_SUCCESS(status)) {
    198     size = sizeof(nonignorable)/sizeof(nonignorable[0]);
    199     for(i = 0; i < size-1; i++) {
    200       for(j = i+1; j < size; j++) {
    201         u_uastrcpy(t1, nonignorable[i]);
    202         u_uastrcpy(t2, nonignorable[j]);
    203         doTest(coll, t1, t2, UCOL_LESS);
    204       }
    205     }
    206   }
    207 
    208   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
    209   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
    210 
    211   if(U_SUCCESS(status)) {
    212     size = sizeof(shifted)/sizeof(shifted[0]);
    213     for(i = 0; i < size-1; i++) {
    214       for(j = i+1; j < size; j++) {
    215         u_uastrcpy(t1, shifted[i]);
    216         u_uastrcpy(t2, shifted[j]);
    217         doTest(coll, t1, t2, UCOL_LESS);
    218       }
    219     }
    220   }
    221 
    222   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
    223   if(U_SUCCESS(status)) {
    224     size = sizeof(shifted)/sizeof(shifted[0]);
    225     for(i = 1; i < size; i++) {
    226       u_uastrcpy(t1, shifted[i-1]);
    227       u_uastrcpy(t2, shifted[i]);
    228       doTest(coll, t1, t2, shiftedTert[i]);
    229     }
    230   }
    231 
    232   ucol_close(coll);
    233 }
    234 
    235 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
    236     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
    237     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
    238     {0x0041/*'A'*/, 0x0300, 0x0000},
    239     {0x00C0, 0x0301, 0x0000},
    240     /* this would work with forced normalization */
    241     {0x00C0, 0x0316, 0x0000}
    242 };
    243 
    244 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
    245     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    246     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
    247     {0x00C0, 0},
    248     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    249     /* this would work with forced normalization */
    250     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
    251 };
    252 
    253 const static UCollationResult results[] = {
    254     UCOL_GREATER,
    255     UCOL_EQUAL,
    256     UCOL_EQUAL,
    257     UCOL_GREATER,
    258     UCOL_EQUAL
    259 };
    260 
    261 static void FunkyATest(void)
    262 {
    263 
    264     int32_t i;
    265     UErrorCode status = U_ZERO_ERROR;
    266     UCollator  *myCollation;
    267     myCollation = ucol_open("en_US", &status);
    268     if(U_FAILURE(status)){
    269         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    270         return;
    271     }
    272     log_verbose("Testing some A letters, for some reason\n");
    273     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    274     ucol_setStrength(myCollation, UCOL_TERTIARY);
    275     for (i = 0; i < 4 ; i++)
    276     {
    277         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    278     }
    279     ucol_close(myCollation);
    280 }
    281 
    282 UColAttributeValue caseFirst[] = {
    283     UCOL_OFF,
    284     UCOL_LOWER_FIRST,
    285     UCOL_UPPER_FIRST
    286 };
    287 
    288 
    289 UColAttributeValue alternateHandling[] = {
    290     UCOL_NON_IGNORABLE,
    291     UCOL_SHIFTED
    292 };
    293 
    294 UColAttributeValue caseLevel[] = {
    295     UCOL_OFF,
    296     UCOL_ON
    297 };
    298 
    299 UColAttributeValue strengths[] = {
    300     UCOL_PRIMARY,
    301     UCOL_SECONDARY,
    302     UCOL_TERTIARY,
    303     UCOL_QUATERNARY,
    304     UCOL_IDENTICAL
    305 };
    306 
    307 #if 0
    308 static const char * strengthsC[] = {
    309     "UCOL_PRIMARY",
    310     "UCOL_SECONDARY",
    311     "UCOL_TERTIARY",
    312     "UCOL_QUATERNARY",
    313     "UCOL_IDENTICAL"
    314 };
    315 
    316 static const char * caseFirstC[] = {
    317     "UCOL_OFF",
    318     "UCOL_LOWER_FIRST",
    319     "UCOL_UPPER_FIRST"
    320 };
    321 
    322 
    323 static const char * alternateHandlingC[] = {
    324     "UCOL_NON_IGNORABLE",
    325     "UCOL_SHIFTED"
    326 };
    327 
    328 static const char * caseLevelC[] = {
    329     "UCOL_OFF",
    330     "UCOL_ON"
    331 };
    332 
    333 /* not used currently - does not test only prints */
    334 static void PrintMarkDavis(void)
    335 {
    336   UErrorCode status = U_ZERO_ERROR;
    337   UChar m[256];
    338   uint8_t sortkey[256];
    339   UCollator *coll = ucol_open("en_US", &status);
    340   uint32_t h,i,j,k, sortkeysize;
    341   uint32_t sizem = 0;
    342   char buffer[512];
    343   uint32_t len = 512;
    344 
    345   log_verbose("PrintMarkDavis");
    346 
    347   u_uastrcpy(m, "Mark Davis");
    348   sizem = u_strlen(m);
    349 
    350 
    351   m[1] = 0xe4;
    352 
    353   for(i = 0; i<sizem; i++) {
    354     fprintf(stderr, "\\u%04X ", m[i]);
    355   }
    356   fprintf(stderr, "\n");
    357 
    358   for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
    359     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
    360     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
    361 
    362     for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
    363       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
    364       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
    365 
    366       for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
    367         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
    368         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
    369 
    370         for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
    371           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
    372           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
    373           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
    374           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
    375         }
    376 
    377       }
    378 
    379     }
    380 
    381   }
    382 }
    383 #endif
    384 
    385 static void BillFairmanTest(void) {
    386 /*
    387 ** check for actual locale via ICU resource bundles
    388 **
    389 ** lp points to the original locale ("fr_FR_....")
    390 */
    391 
    392     UResourceBundle *lr,*cr;
    393     UErrorCode              lec = U_ZERO_ERROR;
    394     const char *lp = "fr_FR_you_ll_never_find_this_locale";
    395 
    396     log_verbose("BillFairmanTest\n");
    397 
    398     lr = ures_open(NULL,lp,&lec);
    399     if (lr) {
    400         cr = ures_getByKey(lr,"collations",0,&lec);
    401         if (cr) {
    402             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
    403             if (lp) {
    404                 if (U_SUCCESS(lec)) {
    405                     if(strcmp(lp, "fr") != 0) {
    406                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
    407                     }
    408                 }
    409             }
    410             ures_close(cr);
    411         }
    412         ures_close(lr);
    413     }
    414 }
    415 
    416 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
    417     UChar source[256] = { '\0'};
    418     UChar target[256] = { '\0'};
    419     UChar preP = 0x31a3;
    420     UChar preQ = 0x310d;
    421 /*
    422     UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
    423     UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
    424 */
    425     /*log_verbose("Testing primary\n");*/
    426 
    427     doTest(col, p, q, UCOL_LESS);
    428 /*
    429     UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
    430 
    431     if(result!=UCOL_LESS){
    432        aescstrdup(p,utfSource,256);
    433        aescstrdup(q,utfTarget,256);
    434        fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
    435     }
    436 */
    437     source[0] = preP;
    438     u_strcpy(source+1,p);
    439     target[0] = preQ;
    440     u_strcpy(target+1,q);
    441     doTest(col, source, target, UCOL_LESS);
    442 /*
    443     fprintf(file,"Primary swamps 2nd failed  source: %s target: %s \n", utfSource,utfTarget);
    444 */
    445 }
    446 
    447 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
    448     UChar source[256] = { '\0'};
    449     UChar target[256] = { '\0'};
    450 
    451     /*log_verbose("Testing secondary\n");*/
    452 
    453     doTest(col, p, q, UCOL_LESS);
    454 /*
    455     fprintf(file,"secondary failed  source: %s target: %s \n", utfSource,utfTarget);
    456 */
    457     source[0] = 0x0053;
    458     u_strcpy(source+1,p);
    459     target[0]= 0x0073;
    460     u_strcpy(target+1,q);
    461 
    462     doTest(col, source, target, UCOL_LESS);
    463 /*
    464     fprintf(file,"secondary swamps 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
    465 */
    466 
    467 
    468     u_strcpy(source,p);
    469     source[u_strlen(p)] = 0x62;
    470     source[u_strlen(p)+1] = 0;
    471 
    472 
    473     u_strcpy(target,q);
    474     target[u_strlen(q)] = 0x61;
    475     target[u_strlen(q)+1] = 0;
    476 
    477     doTest(col, source, target, UCOL_GREATER);
    478 
    479 /*
    480     fprintf(file,"secondary is swamped by 1  failed  source: %s target: %s \n",utfSource,utfTarget);
    481 */
    482 }
    483 
    484 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
    485     UChar source[256] = { '\0'};
    486     UChar target[256] = { '\0'};
    487 
    488     /*log_verbose("Testing tertiary\n");*/
    489 
    490     doTest(col, p, q, UCOL_LESS);
    491 /*
    492     fprintf(file,"Tertiary failed  source: %s target: %s \n",utfSource,utfTarget);
    493 */
    494     source[0] = 0x0020;
    495     u_strcpy(source+1,p);
    496     target[0]= 0x002D;
    497     u_strcpy(target+1,q);
    498 
    499     doTest(col, source, target, UCOL_LESS);
    500 /*
    501     fprintf(file,"Tertiary swamps 4th failed  source: %s target: %s \n", utfSource,utfTarget);
    502 */
    503 
    504     u_strcpy(source,p);
    505     source[u_strlen(p)] = 0xE0;
    506     source[u_strlen(p)+1] = 0;
    507 
    508     u_strcpy(target,q);
    509     target[u_strlen(q)] = 0x61;
    510     target[u_strlen(q)+1] = 0;
    511 
    512     doTest(col, source, target, UCOL_GREATER);
    513 
    514 /*
    515     fprintf(file,"Tertiary is swamped by 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
    516 */
    517 }
    518 
    519 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
    520 /*
    521     UChar source[256] = { '\0'};
    522     UChar target[256] = { '\0'};
    523 */
    524 
    525     doTest(col, p, q, UCOL_EQUAL);
    526 /*
    527     fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
    528 */
    529 }
    530 
    531 static void testCollator(UCollator *coll, UErrorCode *status) {
    532   const UChar *rules = NULL, *current = NULL;
    533   int32_t ruleLen = 0;
    534   uint32_t strength = 0;
    535   uint32_t chOffset = 0; uint32_t chLen = 0;
    536   uint32_t exOffset = 0; uint32_t exLen = 0;
    537   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
    538   uint32_t firstEx = 0;
    539 /*  uint32_t rExpsLen = 0; */
    540   uint32_t firstLen = 0;
    541   UBool varT = FALSE; UBool top_ = TRUE;
    542   uint16_t specs = 0;
    543   UBool startOfRules = TRUE;
    544   UBool lastReset = FALSE;
    545   UBool before = FALSE;
    546   uint32_t beforeStrength = 0;
    547   UColTokenParser src;
    548   UColOptionSet opts;
    549 
    550   UChar first[256];
    551   UChar second[256];
    552   UChar tempB[256];
    553   uint32_t tempLen;
    554   UChar *rulesCopy = NULL;
    555   UParseError parseError;
    556 
    557   uprv_memset(&src, 0, sizeof(UColTokenParser));
    558 
    559   src.opts = &opts;
    560 
    561   rules = ucol_getRules(coll, &ruleLen);
    562   if(U_SUCCESS(*status) && ruleLen > 0) {
    563     rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
    564     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
    565     src.current = src.source = rulesCopy;
    566     src.end = rulesCopy+ruleLen;
    567     src.extraCurrent = src.end;
    568     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
    569     *first = *second = 0;
    570 
    571 	/* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
    572 	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
    573     while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
    574       strength = src.parsedToken.strength;
    575       chOffset = src.parsedToken.charsOffset;
    576       chLen = src.parsedToken.charsLen;
    577       exOffset = src.parsedToken.extensionOffset;
    578       exLen = src.parsedToken.extensionLen;
    579       prefixOffset = src.parsedToken.prefixOffset;
    580       prefixLen = src.parsedToken.prefixLen;
    581       specs = src.parsedToken.flags;
    582 
    583       startOfRules = FALSE;
    584       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
    585       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
    586       if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
    587         second[0] = 0;
    588       } else {
    589         u_strncpy(second,src.source+chOffset, chLen);
    590         second[chLen] = 0;
    591 
    592         if(exLen > 0 && firstEx == 0) {
    593           u_strncat(first, src.source+exOffset, exLen);
    594           first[firstLen+exLen] = 0;
    595         }
    596 
    597         if(lastReset == TRUE && prefixLen != 0) {
    598           u_strncpy(first+prefixLen, first, firstLen);
    599           u_strncpy(first, src.source+prefixOffset, prefixLen);
    600           first[firstLen+prefixLen] = 0;
    601           firstLen = firstLen+prefixLen;
    602         }
    603 
    604         if(before == TRUE) { /* swap first and second */
    605           u_strcpy(tempB, first);
    606           u_strcpy(first, second);
    607           u_strcpy(second, tempB);
    608 
    609           tempLen = firstLen;
    610           firstLen = chLen;
    611           chLen = tempLen;
    612 
    613           tempLen = firstEx;
    614           firstEx = exLen;
    615           exLen = tempLen;
    616           if(beforeStrength < strength) {
    617             strength = beforeStrength;
    618           }
    619         }
    620       }
    621       lastReset = FALSE;
    622 
    623       switch(strength){
    624       case UCOL_IDENTICAL:
    625           testEquality(coll,first,second);
    626           break;
    627       case UCOL_PRIMARY:
    628           testPrimary(coll,first,second);
    629           break;
    630       case UCOL_SECONDARY:
    631           testSecondary(coll,first,second);
    632           break;
    633       case UCOL_TERTIARY:
    634           testTertiary(coll,first,second);
    635           break;
    636       case UCOL_TOK_RESET:
    637         lastReset = TRUE;
    638         before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
    639         if(before) {
    640           beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
    641         }
    642         break;
    643       default:
    644           break;
    645       }
    646 
    647       if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
    648         before = FALSE;
    649       } else {
    650         firstLen = chLen;
    651         firstEx = exLen;
    652         u_strcpy(first, second);
    653       }
    654     }
    655     uprv_free(src.source);
    656   }
    657 }
    658 
    659 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
    660   UCollator *UCA = (UCollator *)collator;
    661   return ucol_strcoll(UCA, source, sLen, target, tLen);
    662 }
    663 
    664 /*
    665 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
    666 #if U_PLATFORM_HAS_WIN32_API
    667   LCID lcid = (LCID)collator;
    668   return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
    669 #else
    670   return 0;
    671 #endif
    672 }
    673 */
    674 
    675 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
    676                                      UChar s1, UChar s2,
    677                                      const UChar *s, const uint32_t sLen,
    678                                      const UChar *t, const uint32_t tLen) {
    679   UChar source[256] = {0};
    680   UChar target[256] = {0};
    681 
    682   source[0] = s1;
    683   u_strcpy(source+1, s);
    684   target[0] = s2;
    685   u_strcpy(target+1, t);
    686 
    687   return func(collator, opts, source, sLen+1, target, tLen+1);
    688 }
    689 
    690 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
    691                                    UChar s1, UChar s2,
    692                                    const UChar *s, const uint32_t sLen,
    693                                    const UChar *t, const uint32_t tLen) {
    694   UChar source[256] = {0};
    695   UChar target[256] = {0};
    696 
    697   u_strcpy(source, s);
    698   source[sLen] = s1;
    699   u_strcpy(target, t);
    700   target[tLen] = s2;
    701 
    702   return func(collator, opts, source, sLen+1, target, tLen+1);
    703 }
    704 
    705 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
    706                               const UChar *s, const uint32_t sLen,
    707                               const UChar *t, const uint32_t tLen,
    708                               UCollationResult result) {
    709   /*UChar fPrimary = 0x6d;*/
    710   /*UChar sPrimary = 0x6e;*/
    711   UChar fSecondary = 0x310d;
    712   UChar sSecondary = 0x31a3;
    713   UChar fTertiary = 0x310f;
    714   UChar sTertiary = 0x31b7;
    715 
    716   UCollationResult oposite;
    717   if(result == UCOL_EQUAL) {
    718     return UCOL_IDENTICAL;
    719   } else if(result == UCOL_GREATER) {
    720     oposite = UCOL_LESS;
    721   } else {
    722     oposite = UCOL_GREATER;
    723   }
    724 
    725   if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
    726     return UCOL_PRIMARY;
    727   } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
    728     (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
    729     return UCOL_SECONDARY;
    730   } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
    731     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
    732     return UCOL_TERTIARY;
    733   } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
    734     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
    735     return UCOL_QUATERNARY;
    736   } else {
    737     return UCOL_IDENTICAL;
    738   }
    739 }
    740 
    741 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
    742   uint32_t i = 0;
    743 
    744   if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
    745     buffer[0] = '=';
    746     buffer[1] = '=';
    747     buffer[2] = '\0';
    748   } else if(res == UCOL_GREATER) {
    749     for(i = 0; i<strength+1; i++) {
    750       buffer[i] = '>';
    751     }
    752     buffer[strength+1] = '\0';
    753   } else {
    754     for(i = 0; i<strength+1; i++) {
    755       buffer[i] = '<';
    756     }
    757     buffer[strength+1] = '\0';
    758   }
    759 
    760   return buffer;
    761 }
    762 
    763 
    764 
    765 static void logFailure (const char *platform, const char *test,
    766                         const UChar *source, const uint32_t sLen,
    767                         const UChar *target, const uint32_t tLen,
    768                         UCollationResult realRes, uint32_t realStrength,
    769                         UCollationResult expRes, uint32_t expStrength, UBool error) {
    770 
    771   uint32_t i = 0;
    772 
    773   char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
    774   static int32_t maxOutputLength = 0;
    775   int32_t outputLength;
    776 
    777   *sEsc = *tEsc = *s = *t = 0;
    778   if(error == TRUE) {
    779     log_err("Difference between expected and generated order. Run test with -v for more info\n");
    780   } else if(getTestOption(VERBOSITY_OPTION) == 0) {
    781     return;
    782   }
    783   for(i = 0; i<sLen; i++) {
    784     sprintf(b, "%04X", source[i]);
    785     strcat(sEsc, "\\u");
    786     strcat(sEsc, b);
    787     strcat(s, b);
    788     strcat(s, " ");
    789     if(source[i] < 0x80) {
    790       sprintf(b, "(%c)", source[i]);
    791       strcat(sEsc, b);
    792     }
    793   }
    794   for(i = 0; i<tLen; i++) {
    795     sprintf(b, "%04X", target[i]);
    796     strcat(tEsc, "\\u");
    797     strcat(tEsc, b);
    798     strcat(t, b);
    799     strcat(t, " ");
    800     if(target[i] < 0x80) {
    801       sprintf(b, "(%c)", target[i]);
    802       strcat(tEsc, b);
    803     }
    804   }
    805 /*
    806   strcpy(output, "[[ ");
    807   strcat(output, sEsc);
    808   strcat(output, getRelationSymbol(expRes, expStrength, relation));
    809   strcat(output, tEsc);
    810 
    811   strcat(output, " : ");
    812 
    813   strcat(output, sEsc);
    814   strcat(output, getRelationSymbol(realRes, realStrength, relation));
    815   strcat(output, tEsc);
    816   strcat(output, " ]] ");
    817 
    818   log_verbose("%s", output);
    819 */
    820 
    821 
    822   strcpy(output, "DIFF: ");
    823 
    824   strcat(output, s);
    825   strcat(output, " : ");
    826   strcat(output, t);
    827 
    828   strcat(output, test);
    829   strcat(output, ": ");
    830 
    831   strcat(output, sEsc);
    832   strcat(output, getRelationSymbol(expRes, expStrength, relation));
    833   strcat(output, tEsc);
    834 
    835   strcat(output, " ");
    836 
    837   strcat(output, platform);
    838   strcat(output, ": ");
    839 
    840   strcat(output, sEsc);
    841   strcat(output, getRelationSymbol(realRes, realStrength, relation));
    842   strcat(output, tEsc);
    843 
    844   outputLength = (int32_t)strlen(output);
    845   if(outputLength > maxOutputLength) {
    846     maxOutputLength = outputLength;
    847     U_ASSERT(outputLength < sizeof(output));
    848   }
    849 
    850   log_verbose("%s\n", output);
    851 
    852 }
    853 
    854 /*
    855 static void printOutRules(const UChar *rules) {
    856   uint32_t len = u_strlen(rules);
    857   uint32_t i = 0;
    858   char toPrint;
    859   uint32_t line = 0;
    860 
    861   fprintf(stdout, "Rules:");
    862 
    863   for(i = 0; i<len; i++) {
    864     if(rules[i]<0x7f && rules[i]>=0x20) {
    865       toPrint = (char)rules[i];
    866       if(toPrint == '&') {
    867         line = 1;
    868         fprintf(stdout, "\n&");
    869       } else if(toPrint == ';') {
    870         fprintf(stdout, "<<");
    871         line+=2;
    872       } else if(toPrint == ',') {
    873         fprintf(stdout, "<<<");
    874         line+=3;
    875       } else {
    876         fprintf(stdout, "%c", toPrint);
    877         line++;
    878       }
    879     } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
    880       fprintf(stdout, "\\u%04X", rules[i]);
    881       line+=6;
    882     }
    883     if(line>72) {
    884       fprintf(stdout, "\n");
    885       line = 0;
    886     }
    887   }
    888 
    889   log_verbose("\n");
    890 
    891 }
    892 */
    893 
    894 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
    895   uint32_t diffs = 0;
    896   UCollationResult realResult;
    897   uint32_t realStrength;
    898 
    899   uint32_t sLen = u_strlen(first);
    900   uint32_t tLen = u_strlen(second);
    901 
    902   realResult = func(collator, opts, first, sLen, second, tLen);
    903   realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
    904 
    905   if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) {
    906     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
    907     diffs++;
    908   } else if(realResult != UCOL_LESS || realStrength != strength) {
    909     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
    910     diffs++;
    911   }
    912   return diffs;
    913 }
    914 
    915 
    916 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
    917   const UChar *rules = NULL, *current = NULL;
    918   int32_t ruleLen = 0;
    919   uint32_t strength = 0;
    920   uint32_t chOffset = 0; uint32_t chLen = 0;
    921   uint32_t exOffset = 0; uint32_t exLen = 0;
    922   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
    923 /*  uint32_t rExpsLen = 0; */
    924   uint32_t firstLen = 0, secondLen = 0;
    925   UBool varT = FALSE; UBool top_ = TRUE;
    926   uint16_t specs = 0;
    927   UBool startOfRules = TRUE;
    928   UColTokenParser src;
    929   UColOptionSet opts;
    930 
    931   UChar first[256];
    932   UChar second[256];
    933   UChar *rulesCopy = NULL;
    934 
    935   uint32_t UCAdiff = 0;
    936   uint32_t Windiff = 1;
    937   UParseError parseError;
    938 
    939   uprv_memset(&src, 0, sizeof(UColTokenParser));
    940   src.opts = &opts;
    941 
    942   rules = ucol_getRules(coll, &ruleLen);
    943 
    944   /*printOutRules(rules);*/
    945 
    946   if(U_SUCCESS(*status) && ruleLen > 0) {
    947     rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
    948     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
    949     src.current = src.source = rulesCopy;
    950     src.end = rulesCopy+ruleLen;
    951     src.extraCurrent = src.end;
    952     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
    953     *first = *second = 0;
    954 
    955     /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
    956        the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
    957     while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
    958       strength = src.parsedToken.strength;
    959       chOffset = src.parsedToken.charsOffset;
    960       chLen = src.parsedToken.charsLen;
    961       exOffset = src.parsedToken.extensionOffset;
    962       exLen = src.parsedToken.extensionLen;
    963       prefixOffset = src.parsedToken.prefixOffset;
    964       prefixLen = src.parsedToken.prefixLen;
    965       specs = src.parsedToken.flags;
    966 
    967       startOfRules = FALSE;
    968       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
    969       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
    970 
    971       u_strncpy(second,src.source+chOffset, chLen);
    972       second[chLen] = 0;
    973       secondLen = chLen;
    974 
    975       if(exLen > 0) {
    976         u_strncat(first, src.source+exOffset, exLen);
    977         first[firstLen+exLen] = 0;
    978         firstLen += exLen;
    979       }
    980 
    981       if(strength != UCOL_TOK_RESET) {
    982         if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
    983           UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
    984           /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
    985         }
    986       }
    987 
    988 
    989       firstLen = chLen;
    990       u_strcpy(first, second);
    991 
    992     }
    993     if(UCAdiff != 0 && Windiff != 0) {
    994       log_verbose("\n");
    995     }
    996     if(UCAdiff == 0) {
    997       log_verbose("No immediate difference with %s!\n", refName);
    998     }
    999     if(Windiff == 0) {
   1000       log_verbose("No immediate difference with Win32!\n");
   1001     }
   1002     uprv_free(src.source);
   1003   }
   1004 }
   1005 
   1006 /*
   1007  * Takes two CEs (lead and continuation) and
   1008  * compares them as CEs should be compared:
   1009  * primary vs. primary, secondary vs. secondary
   1010  * tertiary vs. tertiary
   1011  */
   1012 static int32_t compareCEs(uint32_t s1, uint32_t s2,
   1013                    uint32_t t1, uint32_t t2) {
   1014   uint32_t s = 0, t = 0;
   1015   if(s1 == t1 && s2 == t2) {
   1016     return 0;
   1017   }
   1018   s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
   1019   t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
   1020   if(s < t) {
   1021     return -1;
   1022   } else if(s > t) {
   1023     return 1;
   1024   } else {
   1025     s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
   1026     t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
   1027     if(s < t) {
   1028       return -1;
   1029     } else if(s > t) {
   1030       return 1;
   1031     } else {
   1032       s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
   1033       t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
   1034       if(s < t) {
   1035         return -1;
   1036       } else {
   1037         return 1;
   1038       }
   1039     }
   1040   }
   1041 }
   1042 
   1043 typedef struct {
   1044   uint32_t startCE;
   1045   uint32_t startContCE;
   1046   uint32_t limitCE;
   1047   uint32_t limitContCE;
   1048 } indirectBoundaries;
   1049 
   1050 /* these values are used for finding CE values for indirect positioning. */
   1051 /* Indirect positioning is a mechanism for allowing resets on symbolic   */
   1052 /* values. It only works for resets and you cannot tailor indirect names */
   1053 /* An indirect name can define either an anchor point or a range. An     */
   1054 /* anchor point behaves in exactly the same way as a code point in reset */
   1055 /* would, except that it cannot be tailored. A range (we currently only  */
   1056 /* know for the [top] range will explicitly set the upper bound for      */
   1057 /* generated CEs, thus allowing for better control over how many CEs can */
   1058 /* be squeezed between in the range without performance penalty.         */
   1059 /* In that respect, we use [top] for tailoring of locales that use CJK   */
   1060 /* characters. Other indirect values are currently a pure convenience,   */
   1061 /* they can be used to assure that the CEs will be always positioned in  */
   1062 /* the same place relative to a point with known properties (e.g. first  */
   1063 /* primary ignorable). */
   1064 static indirectBoundaries ucolIndirectBoundaries[15];
   1065 static UBool indirectBoundariesSet = FALSE;
   1066 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
   1067     /* Set values for the top - TODO: once we have values for all the indirects, we are going */
   1068     /* to initalize here. */
   1069     ucolIndirectBoundaries[indexR].startCE = start[0];
   1070     ucolIndirectBoundaries[indexR].startContCE = start[1];
   1071     if(end) {
   1072         ucolIndirectBoundaries[indexR].limitCE = end[0];
   1073         ucolIndirectBoundaries[indexR].limitContCE = end[1];
   1074     } else {
   1075         ucolIndirectBoundaries[indexR].limitCE = 0;
   1076         ucolIndirectBoundaries[indexR].limitContCE = 0;
   1077     }
   1078 }
   1079 
   1080 static void testCEs(UCollator *coll, UErrorCode *status) {
   1081     const UChar *rules = NULL, *current = NULL;
   1082     int32_t ruleLen = 0;
   1083 
   1084     uint32_t strength = 0;
   1085     uint32_t maxStrength = UCOL_IDENTICAL;
   1086     uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
   1087     uint32_t lastCE;
   1088     uint32_t lastContCE;
   1089 
   1090     int32_t result = 0;
   1091     uint32_t chOffset = 0; uint32_t chLen = 0;
   1092     uint32_t exOffset = 0; uint32_t exLen = 0;
   1093     uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
   1094     uint32_t oldOffset = 0;
   1095 
   1096     /* uint32_t rExpsLen = 0; */
   1097     /* uint32_t firstLen = 0; */
   1098     uint16_t specs = 0;
   1099     UBool varT = FALSE; UBool top_ = TRUE;
   1100     UBool startOfRules = TRUE;
   1101     UBool before = FALSE;
   1102     UColTokenParser src;
   1103     UColOptionSet opts;
   1104     UParseError parseError;
   1105     UChar *rulesCopy = NULL;
   1106     collIterate *c = uprv_new_collIterate(status);
   1107     UCAConstants *consts = NULL;
   1108     uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
   1109         UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
   1110     const char *colLoc;
   1111     UCollator *UCA = ucol_open("root", status);
   1112 
   1113     if (U_FAILURE(*status)) {
   1114         log_err("Could not open root collator %s\n", u_errorName(*status));
   1115         uprv_delete_collIterate(c);
   1116         return;
   1117     }
   1118 
   1119     colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
   1120     if (U_FAILURE(*status)) {
   1121         log_err("Could not get collator name: %s\n", u_errorName(*status));
   1122         ucol_close(UCA);
   1123         uprv_delete_collIterate(c);
   1124         return;
   1125     }
   1126 
   1127     uprv_memset(&src, 0, sizeof(UColTokenParser));
   1128 
   1129     consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
   1130     UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
   1131     /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
   1132     UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
   1133     UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
   1134 
   1135     baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
   1136 
   1137     src.opts = &opts;
   1138 
   1139     rules = ucol_getRules(coll, &ruleLen);
   1140 
   1141     src.invUCA = ucol_initInverseUCA(status);
   1142 
   1143     if(indirectBoundariesSet == FALSE) {
   1144         /* UCOL_RESET_TOP_VALUE */
   1145         setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
   1146         /* UCOL_FIRST_PRIMARY_IGNORABLE */
   1147         setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
   1148         /* UCOL_LAST_PRIMARY_IGNORABLE */
   1149         setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
   1150         /* UCOL_FIRST_SECONDARY_IGNORABLE */
   1151         setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
   1152         /* UCOL_LAST_SECONDARY_IGNORABLE */
   1153         setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
   1154         /* UCOL_FIRST_TERTIARY_IGNORABLE */
   1155         setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
   1156         /* UCOL_LAST_TERTIARY_IGNORABLE */
   1157         setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
   1158         /* UCOL_FIRST_VARIABLE */
   1159         setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
   1160         /* UCOL_LAST_VARIABLE */
   1161         setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
   1162         /* UCOL_FIRST_NON_VARIABLE */
   1163         setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
   1164         /* UCOL_LAST_NON_VARIABLE */
   1165         setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
   1166         /* UCOL_FIRST_IMPLICIT */
   1167         setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
   1168         /* UCOL_LAST_IMPLICIT */
   1169         setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
   1170         /* UCOL_FIRST_TRAILING */
   1171         setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
   1172         /* UCOL_LAST_TRAILING */
   1173         setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
   1174         ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
   1175         indirectBoundariesSet = TRUE;
   1176     }
   1177 
   1178 
   1179     if(U_SUCCESS(*status) && ruleLen > 0) {
   1180         rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
   1181         uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
   1182         src.current = src.source = rulesCopy;
   1183         src.end = rulesCopy+ruleLen;
   1184         src.extraCurrent = src.end;
   1185         src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   1186 
   1187 	    /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
   1188 	       the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
   1189         while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
   1190             strength = src.parsedToken.strength;
   1191             chOffset = src.parsedToken.charsOffset;
   1192             chLen = src.parsedToken.charsLen;
   1193             exOffset = src.parsedToken.extensionOffset;
   1194             exLen = src.parsedToken.extensionLen;
   1195             prefixOffset = src.parsedToken.prefixOffset;
   1196             prefixLen = src.parsedToken.prefixLen;
   1197             specs = src.parsedToken.flags;
   1198 
   1199             startOfRules = FALSE;
   1200             varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
   1201             top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
   1202 
   1203             uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
   1204 
   1205             currCE = ucol_getNextCE(coll, c, status);
   1206             if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
   1207                 log_verbose("Thai prevowel detected. Will pick next CE\n");
   1208                 currCE = ucol_getNextCE(coll, c, status);
   1209             }
   1210 
   1211             currContCE = ucol_getNextCE(coll, c, status);
   1212             if(!isContinuation(currContCE)) {
   1213                 currContCE = 0;
   1214             }
   1215 
   1216             /* we need to repack CEs here */
   1217 
   1218             if(strength == UCOL_TOK_RESET) {
   1219                 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
   1220                 if(top_ == TRUE) {
   1221                     int32_t tokenIndex = src.parsedToken.indirectIndex;
   1222 
   1223                     nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
   1224                     nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
   1225                 } else {
   1226                     nextCE = baseCE = currCE;
   1227                     nextContCE = baseContCE = currContCE;
   1228                 }
   1229                 maxStrength = UCOL_IDENTICAL;
   1230             } else {
   1231                 if(strength < maxStrength) {
   1232                     maxStrength = strength;
   1233                     if(baseCE == UCOL_RESET_TOP_VALUE) {
   1234                         log_verbose("Resetting to [top]\n");
   1235                         nextCE = UCOL_NEXT_TOP_VALUE;
   1236                         nextContCE = UCOL_NEXT_TOP_CONT;
   1237                     } else {
   1238                         result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
   1239                     }
   1240                     if(result < 0) {
   1241                         if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
   1242                             log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
   1243                             return;
   1244                         } else {
   1245                             log_err("%s: couldn't find the CE\n", colLoc);
   1246                             return;
   1247                         }
   1248                     }
   1249                 }
   1250 
   1251                 currCE &= 0xFFFFFF3F;
   1252                 currContCE &= 0xFFFFFFBF;
   1253 
   1254                 if(maxStrength == UCOL_IDENTICAL) {
   1255                     if(baseCE != currCE || baseContCE != currContCE) {
   1256                         log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
   1257                     }
   1258                 } else {
   1259                     if(strength == UCOL_IDENTICAL) {
   1260                         if(lastCE != currCE || lastContCE != currContCE) {
   1261                             log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
   1262                         }
   1263                     } else {
   1264                         if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
   1265                             /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
   1266                             log_err("%s: current CE is not less than base CE\n", colLoc);
   1267                         }
   1268                         if(!before) {
   1269                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
   1270                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
   1271                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
   1272                             }
   1273                         } else {
   1274                             before = FALSE;
   1275                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
   1276                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
   1277                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
   1278                             }
   1279                         }
   1280                     }
   1281                 }
   1282 
   1283             }
   1284 
   1285             oldOffset = chOffset;
   1286             lastCE = currCE & 0xFFFFFF3F;
   1287             lastContCE = currContCE & 0xFFFFFFBF;
   1288         }
   1289         uprv_free(src.source);
   1290     }
   1291     ucol_close(UCA);
   1292     uprv_delete_collIterate(c);
   1293 }
   1294 
   1295 #if 0
   1296 /* these locales are now picked from index RB */
   1297 static const char* localesToTest[] = {
   1298 "ar", "bg", "ca", "cs", "da",
   1299 "el", "en_BE", "en_US_POSIX",
   1300 "es", "et", "fi", "fr", "hi",
   1301 "hr", "hu", "is", "iw", "ja",
   1302 "ko", "lt", "lv", "mk", "mt",
   1303 "nb", "nn", "nn_NO", "pl", "ro",
   1304 "ru", "sh", "sk", "sl", "sq",
   1305 "sr", "sv", "th", "tr", "uk",
   1306 "vi", "zh", "zh_TW"
   1307 };
   1308 #endif
   1309 
   1310 static const char* rulesToTest[] = {
   1311   /* Funky fa rule */
   1312   "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
   1313   /*"& Z < p, P",*/
   1314     /* Cui Mins rules */
   1315     "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
   1316     "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
   1317     "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
   1318     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
   1319     "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
   1320     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
   1321     "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U"  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
   1322 };
   1323 
   1324 
   1325 static void TestCollations(void) {
   1326     int32_t noOfLoc = uloc_countAvailable();
   1327     int32_t i = 0, j = 0;
   1328 
   1329     UErrorCode status = U_ZERO_ERROR;
   1330     char cName[256];
   1331     UChar name[256];
   1332     int32_t nameSize;
   1333 
   1334 
   1335     const char *locName = NULL;
   1336     UCollator *coll = NULL;
   1337     UCollator *UCA = ucol_open("", &status);
   1338     UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
   1339     if (U_FAILURE(status)) {
   1340         log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
   1341         return;
   1342     }
   1343     ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
   1344 
   1345     for(i = 0; i<noOfLoc; i++) {
   1346         status = U_ZERO_ERROR;
   1347         locName = uloc_getAvailable(i);
   1348         if(uprv_strcmp("ja", locName) == 0) {
   1349             log_verbose("Don't know how to test prefixes\n");
   1350             continue;
   1351         }
   1352         if(hasCollationElements(locName)) {
   1353             nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
   1354             for(j = 0; j<nameSize; j++) {
   1355                 cName[j] = (char)name[j];
   1356             }
   1357             cName[nameSize] = 0;
   1358             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
   1359             coll = ucol_open(locName, &status);
   1360             if(U_SUCCESS(status)) {
   1361                 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
   1362                 ucol_close(coll);
   1363             } else {
   1364                 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
   1365                 status = U_ZERO_ERROR;
   1366             }
   1367         }
   1368     }
   1369     ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
   1370     ucol_close(UCA);
   1371 }
   1372 
   1373 static void RamsRulesTest(void) {
   1374     UErrorCode status = U_ZERO_ERROR;
   1375     int32_t i = 0;
   1376     UCollator *coll = NULL;
   1377     UChar rule[2048];
   1378     uint32_t ruleLen;
   1379     int32_t noOfLoc = uloc_countAvailable();
   1380     const char *locName = NULL;
   1381 
   1382     log_verbose("RamsRulesTest\n");
   1383 
   1384     if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
   1385         /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
   1386         return;
   1387     }
   1388 
   1389     for(i = 0; i<noOfLoc; i++) {
   1390         locName = uloc_getAvailable(i);
   1391         if(hasCollationElements(locName)) {
   1392             if (uprv_strcmp("ja", locName)==0) {
   1393                 log_verbose("Don't know how to test Japanese because of prefixes\n");
   1394                 continue;
   1395             }
   1396             if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
   1397                 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
   1398                 continue;
   1399             }
   1400             if (uprv_strcmp("bn", locName)==0 ||
   1401                 uprv_strcmp("en_US_POSIX", locName)==0 ||
   1402                 uprv_strcmp("km", locName)==0 ||
   1403                 uprv_strcmp("km_KH", locName)==0 ||
   1404                 uprv_strcmp("my", locName)==0 ||
   1405                 uprv_strcmp("si", locName)==0 ||
   1406                 uprv_strcmp("si_LK", locName)==0 ||
   1407                 uprv_strcmp("th", locName)==0 ||
   1408                 uprv_strcmp("th_TH", locName)==0 ||
   1409                 uprv_strcmp("zh", locName)==0 ||
   1410                 uprv_strcmp("zh_Hant", locName)==0
   1411             ) {
   1412                 log_verbose("Don't know how to test %s. "
   1413                             "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
   1414                 continue;
   1415             }
   1416             log_verbose("Testing locale %s\n", locName);
   1417             status = U_ZERO_ERROR;
   1418             coll = ucol_open(locName, &status);
   1419             if(U_SUCCESS(status)) {
   1420               if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
   1421                 if(coll->image->jamoSpecial == TRUE) {
   1422                   log_err("%s has special JAMOs\n", locName);
   1423                 }
   1424                 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
   1425                 testCollator(coll, &status);
   1426                 testCEs(coll, &status);
   1427               } else {
   1428                 log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
   1429               }
   1430               ucol_close(coll);
   1431             } else {
   1432               log_err("Could not open %s: %s\n", locName, u_errorName(status));
   1433             }
   1434         }
   1435     }
   1436 
   1437     for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
   1438         log_verbose("Testing rule: %s\n", rulesToTest[i]);
   1439         ruleLen = u_unescape(rulesToTest[i], rule, 2048);
   1440         status = U_ZERO_ERROR;
   1441         coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   1442         if(U_SUCCESS(status)) {
   1443             testCollator(coll, &status);
   1444             testCEs(coll, &status);
   1445             ucol_close(coll);
   1446         } else {
   1447           log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
   1448         }
   1449     }
   1450 
   1451 }
   1452 
   1453 static void IsTailoredTest(void) {
   1454     UErrorCode status = U_ZERO_ERROR;
   1455     uint32_t i = 0;
   1456     UCollator *coll = NULL;
   1457     UChar rule[2048];
   1458     UChar tailored[2048];
   1459     UChar notTailored[2048];
   1460     uint32_t ruleLen, tailoredLen, notTailoredLen;
   1461 
   1462     log_verbose("IsTailoredTest\n");
   1463 
   1464     u_uastrcpy(rule, "&Z < A, B, C;c < d");
   1465     ruleLen = u_strlen(rule);
   1466 
   1467     u_uastrcpy(tailored, "ABCcd");
   1468     tailoredLen = u_strlen(tailored);
   1469 
   1470     u_uastrcpy(notTailored, "ZabD");
   1471     notTailoredLen = u_strlen(notTailored);
   1472 
   1473     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   1474     if(U_SUCCESS(status)) {
   1475         for(i = 0; i<tailoredLen; i++) {
   1476             if(!ucol_isTailored(coll, tailored[i], &status)) {
   1477                 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
   1478             }
   1479         }
   1480         for(i = 0; i<notTailoredLen; i++) {
   1481             if(ucol_isTailored(coll, notTailored[i], &status)) {
   1482                 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
   1483             }
   1484         }
   1485         ucol_close(coll);
   1486     }
   1487     else {
   1488         log_err_status(status, "Can't tailor rules\n");
   1489     }
   1490     /* Code coverage */
   1491     status = U_ZERO_ERROR;
   1492     coll = ucol_open("ja", &status);
   1493     if(!ucol_isTailored(coll, 0x4E9C, &status)) {
   1494         log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
   1495     }
   1496     ucol_close(coll);
   1497 }
   1498 
   1499 
   1500 const static char chTest[][20] = {
   1501   "c",
   1502   "C",
   1503   "ca", "cb", "cx", "cy", "CZ",
   1504   "c\\u030C", "C\\u030C",
   1505   "h",
   1506   "H",
   1507   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
   1508   "ch", "cH", "Ch", "CH",
   1509   "cha", "charly", "che", "chh", "chch", "chr",
   1510   "i", "I", "iarly",
   1511   "r", "R",
   1512   "r\\u030C", "R\\u030C",
   1513   "s",
   1514   "S",
   1515   "s\\u030C", "S\\u030C",
   1516   "z", "Z",
   1517   "z\\u030C", "Z\\u030C"
   1518 };
   1519 
   1520 static void TestChMove(void) {
   1521     UChar t1[256] = {0};
   1522     UChar t2[256] = {0};
   1523 
   1524     uint32_t i = 0, j = 0;
   1525     uint32_t size = 0;
   1526     UErrorCode status = U_ZERO_ERROR;
   1527 
   1528     UCollator *coll = ucol_open("cs", &status);
   1529 
   1530     if(U_SUCCESS(status)) {
   1531         size = sizeof(chTest)/sizeof(chTest[0]);
   1532         for(i = 0; i < size-1; i++) {
   1533             for(j = i+1; j < size; j++) {
   1534                 u_unescape(chTest[i], t1, 256);
   1535                 u_unescape(chTest[j], t2, 256);
   1536                 doTest(coll, t1, t2, UCOL_LESS);
   1537             }
   1538         }
   1539     }
   1540     else {
   1541         log_data_err("Can't open collator");
   1542     }
   1543     ucol_close(coll);
   1544 }
   1545 
   1546 
   1547 
   1548 
   1549 const static char impTest[][20] = {
   1550   "\\u4e00",
   1551     "a",
   1552     "A",
   1553     "b",
   1554     "B",
   1555     "\\u4e01"
   1556 };
   1557 
   1558 
   1559 static void TestImplicitTailoring(void) {
   1560   static const struct {
   1561     const char *rules;
   1562     const char *data[10];
   1563     const uint32_t len;
   1564   } tests[] = {
   1565       { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
   1566       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
   1567       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
   1568       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
   1569   };
   1570 
   1571   int32_t i = 0;
   1572 
   1573   for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   1574       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   1575   }
   1576 
   1577 /*
   1578   UChar t1[256] = {0};
   1579   UChar t2[256] = {0};
   1580 
   1581   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
   1582 
   1583   uint32_t i = 0, j = 0;
   1584   uint32_t size = 0;
   1585   uint32_t ruleLen = 0;
   1586   UErrorCode status = U_ZERO_ERROR;
   1587   UCollator *coll = NULL;
   1588   ruleLen = u_unescape(rule, t1, 256);
   1589 
   1590   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
   1591 
   1592   if(U_SUCCESS(status)) {
   1593     size = sizeof(impTest)/sizeof(impTest[0]);
   1594     for(i = 0; i < size-1; i++) {
   1595       for(j = i+1; j < size; j++) {
   1596         u_unescape(impTest[i], t1, 256);
   1597         u_unescape(impTest[j], t2, 256);
   1598         doTest(coll, t1, t2, UCOL_LESS);
   1599       }
   1600     }
   1601   }
   1602   else {
   1603     log_err("Can't open collator");
   1604   }
   1605   ucol_close(coll);
   1606   */
   1607 }
   1608 
   1609 static void TestFCDProblem(void) {
   1610   UChar t1[256] = {0};
   1611   UChar t2[256] = {0};
   1612 
   1613   const char *s1 = "\\u0430\\u0306\\u0325";
   1614   const char *s2 = "\\u04D1\\u0325";
   1615 
   1616   UErrorCode status = U_ZERO_ERROR;
   1617   UCollator *coll = ucol_open("", &status);
   1618   u_unescape(s1, t1, 256);
   1619   u_unescape(s2, t2, 256);
   1620 
   1621   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
   1622   doTest(coll, t1, t2, UCOL_EQUAL);
   1623 
   1624   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   1625   doTest(coll, t1, t2, UCOL_EQUAL);
   1626 
   1627   ucol_close(coll);
   1628 }
   1629 
   1630 /*
   1631 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
   1632 We're only using NFC/NFD in this test.
   1633 */
   1634 #define NORM_BUFFER_TEST_LEN 18
   1635 typedef struct {
   1636   UChar32 u;
   1637   UChar NFC[NORM_BUFFER_TEST_LEN];
   1638   UChar NFD[NORM_BUFFER_TEST_LEN];
   1639 } tester;
   1640 
   1641 static void TestComposeDecompose(void) {
   1642     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
   1643     static const UChar UNICODESET_STR[] = {
   1644         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
   1645         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
   1646         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
   1647     };
   1648     int32_t noOfLoc;
   1649     int32_t i = 0, j = 0;
   1650 
   1651     UErrorCode status = U_ZERO_ERROR;
   1652     const char *locName = NULL;
   1653     uint32_t nfcSize;
   1654     uint32_t nfdSize;
   1655     tester **t;
   1656     uint32_t noCases = 0;
   1657     UCollator *coll = NULL;
   1658     UChar32 u = 0;
   1659     UChar comp[NORM_BUFFER_TEST_LEN];
   1660     uint32_t len = 0;
   1661     UCollationElements *iter;
   1662     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
   1663     int32_t charsToTestSize;
   1664 
   1665     noOfLoc = uloc_countAvailable();
   1666 
   1667     coll = ucol_open("", &status);
   1668     if (U_FAILURE(status)) {
   1669         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
   1670         return;
   1671     }
   1672     charsToTestSize = uset_size(charsToTest);
   1673     if (charsToTestSize <= 0) {
   1674         log_err("Set was zero. Missing data?\n");
   1675         return;
   1676     }
   1677     t = (tester **)malloc(charsToTestSize * sizeof(tester *));
   1678     t[0] = (tester *)malloc(sizeof(tester));
   1679     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
   1680 
   1681     for(u = 0; u < charsToTestSize; u++) {
   1682         UChar32 ch = uset_charAt(charsToTest, u);
   1683         len = 0;
   1684         U16_APPEND_UNSAFE(comp, len, ch);
   1685         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
   1686         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
   1687 
   1688         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
   1689           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
   1690             t[noCases]->u = ch;
   1691             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
   1692                 u_strncpy(t[noCases]->NFC, comp, len);
   1693                 t[noCases]->NFC[len] = 0;
   1694             }
   1695             noCases++;
   1696             t[noCases] = (tester *)malloc(sizeof(tester));
   1697             uprv_memset(t[noCases], 0, sizeof(tester));
   1698         }
   1699     }
   1700     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
   1701     uset_close(charsToTest);
   1702     charsToTest = NULL;
   1703 
   1704     for(u=0; u<(UChar32)noCases; u++) {
   1705         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
   1706             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
   1707             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
   1708         }
   1709     }
   1710     /*
   1711     for(u = 0; u < charsToTestSize; u++) {
   1712       if(!(u&0xFFFF)) {
   1713         log_verbose("%08X ", u);
   1714       }
   1715       uprv_memset(t[noCases], 0, sizeof(tester));
   1716       t[noCases]->u = u;
   1717       len = 0;
   1718       U16_APPEND_UNSAFE(comp, len, u);
   1719       comp[len] = 0;
   1720       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
   1721       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
   1722       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
   1723       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
   1724     }
   1725     */
   1726 
   1727     ucol_close(coll);
   1728 
   1729     log_verbose("Testing locales, number of cases = %i\n", noCases);
   1730     for(i = 0; i<noOfLoc; i++) {
   1731         status = U_ZERO_ERROR;
   1732         locName = uloc_getAvailable(i);
   1733         if(hasCollationElements(locName)) {
   1734             char cName[256];
   1735             UChar name[256];
   1736             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
   1737 
   1738             for(j = 0; j<nameSize; j++) {
   1739                 cName[j] = (char)name[j];
   1740             }
   1741             cName[nameSize] = 0;
   1742             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
   1743 
   1744             coll = ucol_open(locName, &status);
   1745             ucol_setStrength(coll, UCOL_IDENTICAL);
   1746             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
   1747 
   1748             for(u=0; u<(UChar32)noCases; u++) {
   1749                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
   1750                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
   1751                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
   1752                     log_verbose("Testing NFC\n");
   1753                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
   1754                     backAndForth(iter);
   1755                     log_verbose("Testing NFD\n");
   1756                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
   1757                     backAndForth(iter);
   1758                 }
   1759             }
   1760             ucol_closeElements(iter);
   1761             ucol_close(coll);
   1762         }
   1763     }
   1764     for(u = 0; u <= (UChar32)noCases; u++) {
   1765         free(t[u]);
   1766     }
   1767     free(t);
   1768 }
   1769 
   1770 static void TestEmptyRule(void) {
   1771   UErrorCode status = U_ZERO_ERROR;
   1772   UChar rulez[] = { 0 };
   1773   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
   1774 
   1775   ucol_close(coll);
   1776 }
   1777 
   1778 static void TestUCARules(void) {
   1779   UErrorCode status = U_ZERO_ERROR;
   1780   UChar b[256];
   1781   UChar *rules = b;
   1782   uint32_t ruleLen = 0;
   1783   UCollator *UCAfromRules = NULL;
   1784   UCollator *coll = ucol_open("", &status);
   1785   if(status == U_FILE_ACCESS_ERROR) {
   1786     log_data_err("Is your data around?\n");
   1787     return;
   1788   } else if(U_FAILURE(status)) {
   1789     log_err("Error opening collator\n");
   1790     return;
   1791   }
   1792   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
   1793 
   1794   log_verbose("TestUCARules\n");
   1795   if(ruleLen > 256) {
   1796     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
   1797     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
   1798   }
   1799   log_verbose("Rules length is %d\n", ruleLen);
   1800   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   1801   if(U_SUCCESS(status)) {
   1802     ucol_close(UCAfromRules);
   1803   } else {
   1804     log_verbose("Unable to create a collator from UCARules!\n");
   1805   }
   1806 /*
   1807   u_unescape(blah, b, 256);
   1808   ucol_getSortKey(coll, b, 1, res, 256);
   1809 */
   1810   ucol_close(coll);
   1811   if(rules != b) {
   1812     free(rules);
   1813   }
   1814 }
   1815 
   1816 
   1817 /* Pinyin tonal order */
   1818 /*
   1819     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
   1820           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
   1821     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
   1822     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
   1823     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
   1824     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
   1825       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
   1826 .. (\u00fc)
   1827 
   1828 However, in testing we got the following order:
   1829     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
   1830           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
   1831     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
   1832 .. (\u0113)
   1833     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
   1834     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
   1835     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
   1836 .. (\u01d8)
   1837       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
   1838 */
   1839 
   1840 static void TestBefore(void) {
   1841   const static char *data[] = {
   1842       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
   1843       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
   1844       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
   1845       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
   1846       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
   1847       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
   1848   };
   1849   genericRulesStarter(
   1850     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
   1851     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
   1852     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
   1853     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
   1854     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
   1855     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
   1856     data, sizeof(data)/sizeof(data[0]));
   1857 }
   1858 
   1859 #if 0
   1860 /* superceded by TestBeforePinyin */
   1861 static void TestJ784(void) {
   1862   const static char *data[] = {
   1863       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
   1864       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
   1865       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
   1866       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
   1867       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
   1868       "\\u00fc",
   1869            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
   1870   };
   1871   genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
   1872 }
   1873 #endif
   1874 
   1875 #if 0
   1876 /* superceded by the changes to the lv locale */
   1877 static void TestJ831(void) {
   1878   const static char *data[] = {
   1879     "I",
   1880       "i",
   1881       "Y",
   1882       "y"
   1883   };
   1884   genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
   1885 }
   1886 #endif
   1887 
   1888 static void TestJ815(void) {
   1889   const static char *data[] = {
   1890     "aa",
   1891       "Aa",
   1892       "ab",
   1893       "Ab",
   1894       "ad",
   1895       "Ad",
   1896       "ae",
   1897       "Ae",
   1898       "\\u00e6",
   1899       "\\u00c6",
   1900       "af",
   1901       "Af",
   1902       "b",
   1903       "B"
   1904   };
   1905   genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
   1906   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
   1907 }
   1908 
   1909 
   1910 /*
   1911 "& a < b < c < d& r < c",                                   "& a < b < d& r < c",
   1912 "& a < b < c < d& c < m",                                   "& a < b < c < m < d",
   1913 "& a < b < c < d& a < m",                                   "& a < m < b < c < d",
   1914 "& a <<< b << c < d& a < m",                                "& a <<< b << c < m < d",
   1915 "& a < b < c < d& [before 1] c < m",                        "& a < b < m < c < d",
   1916 "& a < b <<< c << d <<< e& [before 3] e <<< x",            "& a < b <<< c << d <<< x <<< e",
   1917 "& a < b <<< c << d <<< e& [before 2] e <<< x",            "& a < b <<< c <<< x << d <<< e",
   1918 "& a < b <<< c << d <<< e& [before 1] e <<< x",            "& a <<< x < b <<< c << d <<< e",
   1919 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",    "& a < b <<< c << d <<< e <<< f < x < g",
   1920 */
   1921 static void TestRedundantRules(void) {
   1922   int32_t i;
   1923 
   1924   static const struct {
   1925       const char *rules;
   1926       const char *expectedRules;
   1927       const char *testdata[8];
   1928       uint32_t testdatalen;
   1929   } tests[] = {
   1930     /* this test conflicts with positioning of CODAN placeholder */
   1931        /*{
   1932         "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
   1933         "&\\u2089<<<x",
   1934         {"\\u2089", "x"}, 2
   1935        }, */
   1936     /* this test conflicts with the [before x] syntax tightening */
   1937       /*{
   1938         "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
   1939         "&\\u0252<<<x",
   1940         {"\\u0252", "x"}, 2
   1941       }, */
   1942     /* this test conflicts with the [before x] syntax tightening */
   1943       /*{
   1944          "& a < b <<< c << d <<< e& [before 1] e <<< x",
   1945          "& a <<< x < b <<< c << d <<< e",
   1946         {"a", "x", "b", "c", "d", "e"}, 6
   1947       }, */
   1948       {
   1949         "& a < b < c < d& [before 1] c < m",
   1950         "& a < b < m < c < d",
   1951         {"a", "b", "m", "c", "d"}, 5
   1952       },
   1953       {
   1954         "& a < b <<< c << d <<< e& [before 3] e <<< x",
   1955         "& a < b <<< c << d <<< x <<< e",
   1956         {"a", "b", "c", "d", "x", "e"}, 6
   1957       },
   1958     /* this test conflicts with the [before x] syntax tightening */
   1959       /* {
   1960         "& a < b <<< c << d <<< e& [before 2] e <<< x",
   1961         "& a < b <<< c <<< x << d <<< e",
   1962         {"a", "b", "c", "x", "d", "e"},, 6
   1963       }, */
   1964       {
   1965         "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
   1966         "& a < b <<< c << d <<< e <<< f < x < g",
   1967         {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
   1968       },
   1969       {
   1970         "& a <<< b << c < d& a < m",
   1971         "& a <<< b << c < m < d",
   1972         {"a", "b", "c", "m", "d"}, 5
   1973       },
   1974       {
   1975         "&a<b<<b\\u0301 &z<b",
   1976         "&a<b\\u0301 &z<b",
   1977         {"a", "b\\u0301", "z", "b"}, 4
   1978       },
   1979       {
   1980         "&z<m<<<q<<<m",
   1981         "&z<q<<<m",
   1982         {"z", "q", "m"},3
   1983       },
   1984       {
   1985         "&z<<<m<q<<<m",
   1986         "&z<q<<<m",
   1987         {"z", "q", "m"}, 3
   1988       },
   1989       {
   1990         "& a < b < c < d& r < c",
   1991         "& a < b < d& r < c",
   1992         {"a", "b", "d"}, 3
   1993       },
   1994       {
   1995         "& a < b < c < d& r < c",
   1996         "& a < b < d& r < c",
   1997         {"r", "c"}, 2
   1998       },
   1999       {
   2000         "& a < b < c < d& c < m",
   2001         "& a < b < c < m < d",
   2002         {"a", "b", "c", "m", "d"}, 5
   2003       },
   2004       {
   2005         "& a < b < c < d& a < m",
   2006         "& a < m < b < c < d",
   2007         {"a", "m", "b", "c", "d"}, 5
   2008       }
   2009   };
   2010 
   2011 
   2012   UCollator *credundant = NULL;
   2013   UCollator *cresulting = NULL;
   2014   UErrorCode status = U_ZERO_ERROR;
   2015   UChar rlz[2048] = { 0 };
   2016   uint32_t rlen = 0;
   2017 
   2018   for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
   2019     log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
   2020     rlen = u_unescape(tests[i].rules, rlz, 2048);
   2021 
   2022     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
   2023     if(status == U_FILE_ACCESS_ERROR) {
   2024       log_data_err("Is your data around?\n");
   2025       return;
   2026     } else if(U_FAILURE(status)) {
   2027       log_err("Error opening collator\n");
   2028       return;
   2029     }
   2030 
   2031     rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
   2032     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
   2033 
   2034     testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
   2035 
   2036     ucol_close(credundant);
   2037     ucol_close(cresulting);
   2038 
   2039     log_verbose("testing using data\n");
   2040 
   2041     genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
   2042   }
   2043 
   2044 }
   2045 
   2046 static void TestExpansionSyntax(void) {
   2047   int32_t i;
   2048 
   2049   const static char *rules[] = {
   2050     "&AE <<< a << b <<< c &d <<< f",
   2051     "&AE <<< a <<< b << c << d < e < f <<< g",
   2052     "&AE <<< B <<< C / D <<< F"
   2053   };
   2054 
   2055   const static char *expectedRules[] = {
   2056     "&A <<< a / E << b / E <<< c /E  &d <<< f",
   2057     "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
   2058     "&A <<< B / E <<< C / ED <<< F / E"
   2059   };
   2060 
   2061   const static char *testdata[][8] = {
   2062     {"AE", "a", "b", "c"},
   2063     {"AE", "a", "b", "c", "d", "e", "f", "g"},
   2064     {"AE", "B", "C"} /* / ED <<< F / E"},*/
   2065   };
   2066 
   2067   const static uint32_t testdatalen[] = {
   2068       4,
   2069       8,
   2070       3
   2071   };
   2072 
   2073 
   2074 
   2075   UCollator *credundant = NULL;
   2076   UCollator *cresulting = NULL;
   2077   UErrorCode status = U_ZERO_ERROR;
   2078   UChar rlz[2048] = { 0 };
   2079   uint32_t rlen = 0;
   2080 
   2081   for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
   2082     log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
   2083     rlen = u_unescape(rules[i], rlz, 2048);
   2084 
   2085     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2086     if(status == U_FILE_ACCESS_ERROR) {
   2087       log_data_err("Is your data around?\n");
   2088       return;
   2089     } else if(U_FAILURE(status)) {
   2090       log_err("Error opening collator\n");
   2091       return;
   2092     }
   2093     rlen = u_unescape(expectedRules[i], rlz, 2048);
   2094     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
   2095 
   2096     /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
   2097     /* as a hard error test, but only in information mode */
   2098     testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
   2099 
   2100     ucol_close(credundant);
   2101     ucol_close(cresulting);
   2102 
   2103     log_verbose("testing using data\n");
   2104 
   2105     genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
   2106   }
   2107 }
   2108 
   2109 static void TestCase(void)
   2110 {
   2111     const static UChar gRules[MAX_TOKEN_LEN] =
   2112     /*" & 0 < 1,\u2461<a,A"*/
   2113     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
   2114 
   2115     const static UChar testCase[][MAX_TOKEN_LEN] =
   2116     {
   2117         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
   2118         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
   2119         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
   2120         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
   2121     };
   2122 
   2123     const static UCollationResult caseTestResults[][9] =
   2124     {
   2125         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
   2126         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
   2127         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
   2128         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
   2129     };
   2130 
   2131     const static UColAttributeValue caseTestAttributes[][2] =
   2132     {
   2133         { UCOL_LOWER_FIRST, UCOL_OFF},
   2134         { UCOL_UPPER_FIRST, UCOL_OFF},
   2135         { UCOL_LOWER_FIRST, UCOL_ON},
   2136         { UCOL_UPPER_FIRST, UCOL_ON}
   2137     };
   2138     int32_t i,j,k;
   2139     UErrorCode status = U_ZERO_ERROR;
   2140     UCollationElements *iter;
   2141     UCollator  *myCollation;
   2142     myCollation = ucol_open("en_US", &status);
   2143 
   2144     if(U_FAILURE(status)){
   2145         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   2146         return;
   2147     }
   2148     log_verbose("Testing different case settings\n");
   2149     ucol_setStrength(myCollation, UCOL_TERTIARY);
   2150 
   2151     for(k = 0; k<4; k++) {
   2152       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
   2153       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
   2154       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
   2155       for (i = 0; i < 3 ; i++) {
   2156         for(j = i+1; j<4; j++) {
   2157           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
   2158         }
   2159       }
   2160     }
   2161     ucol_close(myCollation);
   2162 
   2163     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
   2164     if(U_FAILURE(status)){
   2165         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   2166         return;
   2167     }
   2168     log_verbose("Testing different case settings with custom rules\n");
   2169     ucol_setStrength(myCollation, UCOL_TERTIARY);
   2170 
   2171     for(k = 0; k<4; k++) {
   2172       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
   2173       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
   2174       for (i = 0; i < 3 ; i++) {
   2175         for(j = i+1; j<4; j++) {
   2176           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
   2177           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
   2178           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
   2179           backAndForth(iter);
   2180           ucol_closeElements(iter);
   2181           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
   2182           backAndForth(iter);
   2183           ucol_closeElements(iter);
   2184         }
   2185       }
   2186     }
   2187     ucol_close(myCollation);
   2188     {
   2189       const static char *lowerFirst[] = {
   2190         "h",
   2191         "H",
   2192         "ch",
   2193         "Ch",
   2194         "CH",
   2195         "cha",
   2196         "chA",
   2197         "Cha",
   2198         "ChA",
   2199         "CHa",
   2200         "CHA",
   2201         "i",
   2202         "I"
   2203       };
   2204 
   2205       const static char *upperFirst[] = {
   2206         "H",
   2207         "h",
   2208         "CH",
   2209         "Ch",
   2210         "ch",
   2211         "CHA",
   2212         "CHa",
   2213         "ChA",
   2214         "Cha",
   2215         "chA",
   2216         "cha",
   2217         "I",
   2218         "i"
   2219       };
   2220       log_verbose("mixed case test\n");
   2221       log_verbose("lower first, case level off\n");
   2222       genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
   2223       log_verbose("upper first, case level off\n");
   2224       genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
   2225       log_verbose("lower first, case level on\n");
   2226       genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
   2227       log_verbose("upper first, case level on\n");
   2228       genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
   2229     }
   2230 
   2231 }
   2232 
   2233 static void TestIncrementalNormalize(void) {
   2234 
   2235     /*UChar baseA     =0x61;*/
   2236     UChar baseA     =0x41;
   2237 /*    UChar baseB     = 0x42;*/
   2238     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
   2239     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
   2240     /*
   2241         0x316 is combining grave accent below, cc=220
   2242         0x321 is combining palatalized hook below, cc=202
   2243         0x300 is combining grave accent, cc=230
   2244     */
   2245 
   2246 #define MAXSLEN 2000
   2247     /*int          maxSLen   = 64000;*/
   2248     int          sLen;
   2249     int          i;
   2250 
   2251     UCollator        *coll;
   2252     UErrorCode       status = U_ZERO_ERROR;
   2253     UCollationResult result;
   2254 
   2255     int32_t myQ = getTestOption(QUICK_OPTION);
   2256 
   2257     if(getTestOption(QUICK_OPTION) < 0) {
   2258         setTestOption(QUICK_OPTION, 1);
   2259     }
   2260 
   2261     {
   2262         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
   2263         /*          most buffers along the way.*/
   2264         UChar            strA[MAXSLEN+1];
   2265         UChar            strB[MAXSLEN+1];
   2266 
   2267         coll = ucol_open("en_US", &status);
   2268         if(status == U_FILE_ACCESS_ERROR) {
   2269           log_data_err("Is your data around?\n");
   2270           return;
   2271         } else if(U_FAILURE(status)) {
   2272           log_err("Error opening collator\n");
   2273           return;
   2274         }
   2275         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   2276 
   2277         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
   2278         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
   2279         /*for (sLen = 1000; sLen<1001; sLen++) {*/
   2280         for (sLen = 500; sLen<501; sLen++) {
   2281         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
   2282             strA[0] = baseA;
   2283             strB[0] = baseA;
   2284             for (i=1; i<=sLen-1; i++) {
   2285                 strA[i] = ccMix[i % 3];
   2286                 strB[sLen-i] = ccMix[i % 3];
   2287             }
   2288             strA[sLen]   = 0;
   2289             strB[sLen]   = 0;
   2290 
   2291             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
   2292             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
   2293             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
   2294             doTest(coll, strA, strB, UCOL_EQUAL);
   2295         }
   2296     }
   2297 
   2298     setTestOption(QUICK_OPTION, myQ);
   2299 
   2300 
   2301     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
   2302     /*         of the string.  Checks a couple of edge cases.*/
   2303 
   2304     {
   2305         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
   2306         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
   2307         ucol_setStrength(coll, UCOL_TERTIARY);
   2308         doTest(coll, strA, strB, UCOL_EQUAL);
   2309     }
   2310 
   2311     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
   2312 
   2313     {
   2314       /* New UCA  3.1.1.
   2315        * test below used a code point from Desseret, which sorts differently
   2316        * than d800 dc00
   2317        */
   2318         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
   2319         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
   2320         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
   2321         ucol_setStrength(coll, UCOL_TERTIARY);
   2322         doTest(coll, strA, strB, UCOL_GREATER);
   2323     }
   2324 
   2325     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
   2326 
   2327     {
   2328         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
   2329         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
   2330         char  sortKeyA[50];
   2331         char  sortKeyAz[50];
   2332         char  sortKeyB[50];
   2333         char  sortKeyBz[50];
   2334         int   r;
   2335 
   2336         /* there used to be -3 here. Hmmmm.... */
   2337         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
   2338         result = ucol_strcoll(coll, strA, 3, strB, 3);
   2339         if (result != UCOL_GREATER) {
   2340             log_err("ERROR 1 in test 4\n");
   2341         }
   2342         result = ucol_strcoll(coll, strA, -1, strB, -1);
   2343         if (result != UCOL_EQUAL) {
   2344             log_err("ERROR 2 in test 4\n");
   2345         }
   2346 
   2347         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2348         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2349         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2350         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2351 
   2352         r = strcmp(sortKeyA, sortKeyAz);
   2353         if (r <= 0) {
   2354             log_err("Error 3 in test 4\n");
   2355         }
   2356         r = strcmp(sortKeyA, sortKeyB);
   2357         if (r <= 0) {
   2358             log_err("Error 4 in test 4\n");
   2359         }
   2360         r = strcmp(sortKeyAz, sortKeyBz);
   2361         if (r != 0) {
   2362             log_err("Error 5 in test 4\n");
   2363         }
   2364 
   2365         ucol_setStrength(coll, UCOL_IDENTICAL);
   2366         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2367         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2368         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2369         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2370 
   2371         r = strcmp(sortKeyA, sortKeyAz);
   2372         if (r <= 0) {
   2373             log_err("Error 6 in test 4\n");
   2374         }
   2375         r = strcmp(sortKeyA, sortKeyB);
   2376         if (r <= 0) {
   2377             log_err("Error 7 in test 4\n");
   2378         }
   2379         r = strcmp(sortKeyAz, sortKeyBz);
   2380         if (r != 0) {
   2381             log_err("Error 8 in test 4\n");
   2382         }
   2383         ucol_setStrength(coll, UCOL_TERTIARY);
   2384     }
   2385 
   2386 
   2387     /*  Test 5:  Null characters in non-normal source strings.*/
   2388 
   2389     {
   2390         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
   2391         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
   2392         char  sortKeyA[50];
   2393         char  sortKeyAz[50];
   2394         char  sortKeyB[50];
   2395         char  sortKeyBz[50];
   2396         int   r;
   2397 
   2398         result = ucol_strcoll(coll, strA, 6, strB, 6);
   2399         if (result != UCOL_GREATER) {
   2400             log_err("ERROR 1 in test 5\n");
   2401         }
   2402         result = ucol_strcoll(coll, strA, -1, strB, -1);
   2403         if (result != UCOL_EQUAL) {
   2404             log_err("ERROR 2 in test 5\n");
   2405         }
   2406 
   2407         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2408         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2409         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2410         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2411 
   2412         r = strcmp(sortKeyA, sortKeyAz);
   2413         if (r <= 0) {
   2414             log_err("Error 3 in test 5\n");
   2415         }
   2416         r = strcmp(sortKeyA, sortKeyB);
   2417         if (r <= 0) {
   2418             log_err("Error 4 in test 5\n");
   2419         }
   2420         r = strcmp(sortKeyAz, sortKeyBz);
   2421         if (r != 0) {
   2422             log_err("Error 5 in test 5\n");
   2423         }
   2424 
   2425         ucol_setStrength(coll, UCOL_IDENTICAL);
   2426         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2427         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2428         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2429         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2430 
   2431         r = strcmp(sortKeyA, sortKeyAz);
   2432         if (r <= 0) {
   2433             log_err("Error 6 in test 5\n");
   2434         }
   2435         r = strcmp(sortKeyA, sortKeyB);
   2436         if (r <= 0) {
   2437             log_err("Error 7 in test 5\n");
   2438         }
   2439         r = strcmp(sortKeyAz, sortKeyBz);
   2440         if (r != 0) {
   2441             log_err("Error 8 in test 5\n");
   2442         }
   2443         ucol_setStrength(coll, UCOL_TERTIARY);
   2444     }
   2445 
   2446 
   2447     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
   2448 
   2449     {
   2450         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
   2451         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
   2452 
   2453         result = ucol_strcoll(coll, strA, 5, strB, 5);
   2454         if (result != UCOL_LESS) {
   2455             log_err("Error 1 in test 6\n");
   2456         }
   2457         result = ucol_strcoll(coll, strA, -1, strB, -1);
   2458         if (result != UCOL_EQUAL) {
   2459             log_err("Error 2 in test 6\n");
   2460         }
   2461     }
   2462 
   2463     ucol_close(coll);
   2464 }
   2465 
   2466 
   2467 
   2468 #if 0
   2469 static void TestGetCaseBit(void) {
   2470   static const char *caseBitData[] = {
   2471     "a", "A", "ch", "Ch", "CH",
   2472       "\\uFF9E", "\\u0009"
   2473   };
   2474 
   2475   static const uint8_t results[] = {
   2476     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
   2477       UCOL_UPPER_CASE, UCOL_LOWER_CASE
   2478   };
   2479 
   2480   uint32_t i, blen = 0;
   2481   UChar b[256] = {0};
   2482   UErrorCode status = U_ZERO_ERROR;
   2483   UCollator *UCA = ucol_open("", &status);
   2484   uint8_t res = 0;
   2485 
   2486   for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
   2487     blen = u_unescape(caseBitData[i], b, 256);
   2488     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
   2489     if(results[i] != res) {
   2490       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
   2491     }
   2492   }
   2493 }
   2494 #endif
   2495 
   2496 static void TestHangulTailoring(void) {
   2497     static const char *koreanData[] = {
   2498         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
   2499             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
   2500             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
   2501             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
   2502             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
   2503             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
   2504     };
   2505 
   2506     const char *rules =
   2507         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
   2508         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
   2509         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
   2510         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
   2511         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
   2512         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
   2513 
   2514 
   2515   UErrorCode status = U_ZERO_ERROR;
   2516   UChar rlz[2048] = { 0 };
   2517   uint32_t rlen = u_unescape(rules, rlz, 2048);
   2518 
   2519   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   2520   if(status == U_FILE_ACCESS_ERROR) {
   2521     log_data_err("Is your data around?\n");
   2522     return;
   2523   } else if(U_FAILURE(status)) {
   2524     log_err("Error opening collator\n");
   2525     return;
   2526   }
   2527 
   2528   log_verbose("Using start of korean rules\n");
   2529 
   2530   if(U_SUCCESS(status)) {
   2531     genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   2532   } else {
   2533     log_err("Unable to open collator with rules %s\n", rules);
   2534   }
   2535 
   2536   log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
   2537   ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home  */
   2538   genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   2539 
   2540   ucol_close(coll);
   2541 
   2542   log_verbose("Using ko__LOTUS locale\n");
   2543   genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   2544 }
   2545 
   2546 static void TestCompressOverlap(void) {
   2547     UChar       secstr[150];
   2548     UChar       tertstr[150];
   2549     UErrorCode  status = U_ZERO_ERROR;
   2550     UCollator  *coll;
   2551     char        result[200];
   2552     uint32_t    resultlen;
   2553     int         count = 0;
   2554     char       *tempptr;
   2555 
   2556     coll = ucol_open("", &status);
   2557 
   2558     if (U_FAILURE(status)) {
   2559         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
   2560         return;
   2561     }
   2562     while (count < 149) {
   2563         secstr[count] = 0x0020; /* [06, 05, 05] */
   2564         tertstr[count] = 0x0020;
   2565         count ++;
   2566     }
   2567 
   2568     /* top down compression ----------------------------------- */
   2569     secstr[count] = 0x0332; /* [, 87, 05] */
   2570     tertstr[count] = 0x3000; /* [06, 05, 07] */
   2571 
   2572     /* no compression secstr should have 150 secondary bytes, tertstr should
   2573     have 150 tertiary bytes.
   2574     with correct overlapping compression, secstr should have 4 secondary
   2575     bytes, tertstr should have > 2 tertiary bytes */
   2576     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
   2577     tempptr = uprv_strchr(result, 1) + 1;
   2578     while (*(tempptr + 1) != 1) {
   2579         /* the last secondary collation element is not checked since it is not
   2580         part of the compression */
   2581         if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
   2582             log_err("Secondary compression overlapped\n");
   2583         }
   2584         tempptr ++;
   2585     }
   2586 
   2587     /* tertiary top/bottom/common for en_US is similar to the secondary
   2588     top/bottom/common */
   2589     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
   2590     tempptr = uprv_strrchr(result, 1) + 1;
   2591     while (*(tempptr + 1) != 0) {
   2592         /* the last secondary collation element is not checked since it is not
   2593         part of the compression */
   2594         if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
   2595             log_err("Tertiary compression overlapped\n");
   2596         }
   2597         tempptr ++;
   2598     }
   2599 
   2600     /* bottom up compression ------------------------------------- */
   2601     secstr[count] = 0;
   2602     tertstr[count] = 0;
   2603     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
   2604     tempptr = uprv_strchr(result, 1) + 1;
   2605     while (*(tempptr + 1) != 1) {
   2606         /* the last secondary collation element is not checked since it is not
   2607         part of the compression */
   2608         if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
   2609             log_err("Secondary compression overlapped\n");
   2610         }
   2611         tempptr ++;
   2612     }
   2613 
   2614     /* tertiary top/bottom/common for en_US is similar to the secondary
   2615     top/bottom/common */
   2616     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
   2617     tempptr = uprv_strrchr(result, 1) + 1;
   2618     while (*(tempptr + 1) != 0) {
   2619         /* the last secondary collation element is not checked since it is not
   2620         part of the compression */
   2621         if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
   2622             log_err("Tertiary compression overlapped\n");
   2623         }
   2624         tempptr ++;
   2625     }
   2626 
   2627     ucol_close(coll);
   2628 }
   2629 
   2630 static void TestCyrillicTailoring(void) {
   2631   static const char *test[] = {
   2632     "\\u0410b",
   2633       "\\u0410\\u0306a",
   2634       "\\u04d0A"
   2635   };
   2636 
   2637     /* Russian overrides contractions, so this test is not valid anymore */
   2638     /*genericLocaleStarter("ru", test, 3);*/
   2639 
   2640     genericLocaleStarter("root", test, 3);
   2641     genericRulesStarter("&\\u0410 = \\u0410", test, 3);
   2642     genericRulesStarter("&Z < \\u0410", test, 3);
   2643     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
   2644     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
   2645     genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
   2646     genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
   2647 }
   2648 
   2649 static void TestSuppressContractions(void) {
   2650 
   2651   static const char *testNoCont2[] = {
   2652       "\\u0410\\u0302a",
   2653       "\\u0410\\u0306b",
   2654       "\\u0410c"
   2655   };
   2656   static const char *testNoCont[] = {
   2657       "a\\u0410",
   2658       "A\\u0410\\u0306",
   2659       "\\uFF21\\u0410\\u0302"
   2660   };
   2661 
   2662   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
   2663   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
   2664 }
   2665 
   2666 static void TestContraction(void) {
   2667     const static char *testrules[] = {
   2668         "&A = AB / B",
   2669         "&A = A\\u0306/\\u0306",
   2670         "&c = ch / h"
   2671     };
   2672     const static UChar testdata[][2] = {
   2673         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
   2674         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
   2675         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
   2676     };
   2677     const static UChar testdata2[][2] = {
   2678         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
   2679         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
   2680         {0x0063 /* 'c' */, 0x006C /* 'l' */}
   2681     };
   2682     const static char *testrules3[] = {
   2683         "&z < xyz &xyzw << B",
   2684         "&z < xyz &xyz << B / w",
   2685         "&z < ch &achm << B",
   2686         "&z < ch &a << B / chm",
   2687         "&\\ud800\\udc00w << B",
   2688         "&\\ud800\\udc00 << B / w",
   2689         "&a\\ud800\\udc00m << B",
   2690         "&a << B / \\ud800\\udc00m",
   2691     };
   2692 
   2693     UErrorCode  status   = U_ZERO_ERROR;
   2694     UCollator  *coll;
   2695     UChar       rule[256] = {0};
   2696     uint32_t    rlen     = 0;
   2697     int         i;
   2698 
   2699     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
   2700         UCollationElements *iter1;
   2701         int j = 0;
   2702         log_verbose("Rule %s for testing\n", testrules[i]);
   2703         rlen = u_unescape(testrules[i], rule, 32);
   2704         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2705         if (U_FAILURE(status)) {
   2706             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   2707             return;
   2708         }
   2709         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
   2710         if (U_FAILURE(status)) {
   2711             log_err("Collation iterator creation failed\n");
   2712             return;
   2713         }
   2714         while (j < 2) {
   2715             UCollationElements *iter2 = ucol_openElements(coll,
   2716                                                          &(testdata[i][j]),
   2717                                                          1, &status);
   2718             uint32_t ce;
   2719             if (U_FAILURE(status)) {
   2720                 log_err("Collation iterator creation failed\n");
   2721                 return;
   2722             }
   2723             ce = ucol_next(iter2, &status);
   2724             while (ce != UCOL_NULLORDER) {
   2725                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
   2726                     log_err("Collation elements in contraction split does not match\n");
   2727                     return;
   2728                 }
   2729                 ce = ucol_next(iter2, &status);
   2730             }
   2731             j ++;
   2732             ucol_closeElements(iter2);
   2733         }
   2734         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
   2735             log_err("Collation elements not exhausted\n");
   2736             return;
   2737         }
   2738         ucol_closeElements(iter1);
   2739         ucol_close(coll);
   2740     }
   2741 
   2742     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
   2743     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2744     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
   2745         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   2746                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
   2747                 testdata2[1][1]);
   2748         return;
   2749     }
   2750     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
   2751         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   2752                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
   2753                 testdata2[2][1]);
   2754         return;
   2755     }
   2756     ucol_close(coll);
   2757 
   2758     for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
   2759         UCollator          *coll1,
   2760                            *coll2;
   2761         UCollationElements *iter1,
   2762                            *iter2;
   2763         UChar               ch = 0x0042 /* 'B' */;
   2764         uint32_t            ce;
   2765         rlen = u_unescape(testrules3[i], rule, 32);
   2766         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2767         rlen = u_unescape(testrules3[i + 1], rule, 32);
   2768         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2769         if (U_FAILURE(status)) {
   2770             log_err("Collator creation failed %s\n", testrules[i]);
   2771             return;
   2772         }
   2773         iter1 = ucol_openElements(coll1, &ch, 1, &status);
   2774         iter2 = ucol_openElements(coll2, &ch, 1, &status);
   2775         if (U_FAILURE(status)) {
   2776             log_err("Collation iterator creation failed\n");
   2777             return;
   2778         }
   2779         ce = ucol_next(iter1, &status);
   2780         if (U_FAILURE(status)) {
   2781             log_err("Retrieving ces failed\n");
   2782             return;
   2783         }
   2784         while (ce != UCOL_NULLORDER) {
   2785             if (ce != (uint32_t)ucol_next(iter2, &status)) {
   2786                 log_err("CEs does not match\n");
   2787                 return;
   2788             }
   2789             ce = ucol_next(iter1, &status);
   2790             if (U_FAILURE(status)) {
   2791                 log_err("Retrieving ces failed\n");
   2792                 return;
   2793             }
   2794         }
   2795         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
   2796             log_err("CEs not exhausted\n");
   2797             return;
   2798         }
   2799         ucol_closeElements(iter1);
   2800         ucol_closeElements(iter2);
   2801         ucol_close(coll1);
   2802         ucol_close(coll2);
   2803     }
   2804 }
   2805 
   2806 static void TestExpansion(void) {
   2807     const static char *testrules[] = {
   2808         "&J << K / B & K << M",
   2809         "&J << K / B << M"
   2810     };
   2811     const static UChar testdata[][3] = {
   2812         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
   2813         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
   2814         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
   2815         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
   2816         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
   2817         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
   2818     };
   2819 
   2820     UErrorCode  status   = U_ZERO_ERROR;
   2821     UCollator  *coll;
   2822     UChar       rule[256] = {0};
   2823     uint32_t    rlen     = 0;
   2824     int         i;
   2825 
   2826     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
   2827         int j = 0;
   2828         log_verbose("Rule %s for testing\n", testrules[i]);
   2829         rlen = u_unescape(testrules[i], rule, 32);
   2830         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2831         if (U_FAILURE(status)) {
   2832             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   2833             return;
   2834         }
   2835 
   2836         for (j = 0; j < 5; j ++) {
   2837             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
   2838         }
   2839         ucol_close(coll);
   2840     }
   2841 }
   2842 
   2843 #if 0
   2844 /* this test tests the current limitations of the engine */
   2845 /* it always fail, so it is disabled by default */
   2846 static void TestLimitations(void) {
   2847   /* recursive expansions */
   2848   {
   2849     static const char *rule = "&a=b/c&d=c/e";
   2850     static const char *tlimit01[] = {"add","b","adf"};
   2851     static const char *tlimit02[] = {"aa","b","af"};
   2852     log_verbose("recursive expansions\n");
   2853     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
   2854     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
   2855   }
   2856   /* contractions spanning expansions */
   2857   {
   2858     static const char *rule = "&a<<<c/e&g<<<eh";
   2859     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
   2860     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
   2861     log_verbose("contractions spanning expansions\n");
   2862     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
   2863     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
   2864   }
   2865   /* normalization: nulls in contractions */
   2866   {
   2867     static const char *rule = "&a<<<\\u0000\\u0302";
   2868     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   2869     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   2870     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   2871     static const UColAttributeValue valOn[] = { UCOL_ON };
   2872     static const UColAttributeValue valOff[] = { UCOL_OFF };
   2873 
   2874     log_verbose("NULL in contractions\n");
   2875     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   2876     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   2877     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   2878     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   2879 
   2880   }
   2881   /* normalization: contractions spanning normalization */
   2882   {
   2883     static const char *rule = "&a<<<\\u0000\\u0302";
   2884     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   2885     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   2886     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   2887     static const UColAttributeValue valOn[] = { UCOL_ON };
   2888     static const UColAttributeValue valOff[] = { UCOL_OFF };
   2889 
   2890     log_verbose("contractions spanning normalization\n");
   2891     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   2892     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   2893     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   2894     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   2895 
   2896   }
   2897   /* variable top:  */
   2898   {
   2899     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
   2900     static const char *rule = "&\\u2010<x<[variable top]=z";
   2901     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
   2902     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
   2903     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
   2904     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
   2905     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
   2906     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
   2907     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
   2908 
   2909     log_verbose("variable top\n");
   2910     genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2911     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2912     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2913     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
   2914     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
   2915 
   2916   }
   2917   /* case level */
   2918   {
   2919     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
   2920     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
   2921     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
   2922     static const UColAttribute att[] = { UCOL_CASE_FIRST};
   2923     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
   2924     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
   2925     log_verbose("case level\n");
   2926     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2927     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2928     /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
   2929     /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
   2930   }
   2931 
   2932 }
   2933 #endif
   2934 
   2935 static void TestBocsuCoverage(void) {
   2936   UErrorCode status = U_ZERO_ERROR;
   2937   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
   2938   UChar       test[256] = {0};
   2939   uint32_t    tlen     = u_unescape(testString, test, 32);
   2940   uint8_t key[256]     = {0};
   2941   uint32_t klen         = 0;
   2942 
   2943   UCollator *coll = ucol_open("", &status);
   2944   if(U_SUCCESS(status)) {
   2945   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
   2946 
   2947   klen = ucol_getSortKey(coll, test, tlen, key, 256);
   2948 
   2949   ucol_close(coll);
   2950   } else {
   2951     log_data_err("Couldn't open UCA\n");
   2952   }
   2953 }
   2954 
   2955 static void TestVariableTopSetting(void) {
   2956   UErrorCode status = U_ZERO_ERROR;
   2957   const UChar *current = NULL;
   2958   uint32_t varTopOriginal = 0, varTop1, varTop2;
   2959   UCollator *coll = ucol_open("", &status);
   2960   if(U_SUCCESS(status)) {
   2961 
   2962   uint32_t strength = 0;
   2963   uint16_t specs = 0;
   2964   uint32_t chOffset = 0;
   2965   uint32_t chLen = 0;
   2966   uint32_t exOffset = 0;
   2967   uint32_t exLen = 0;
   2968   uint32_t oldChOffset = 0;
   2969   uint32_t oldChLen = 0;
   2970   uint32_t oldExOffset = 0;
   2971   uint32_t oldExLen = 0;
   2972   uint32_t prefixOffset = 0;
   2973   uint32_t prefixLen = 0;
   2974 
   2975   UBool startOfRules = TRUE;
   2976   UColTokenParser src;
   2977   UColOptionSet opts;
   2978 
   2979   UChar *rulesCopy = NULL;
   2980   uint32_t rulesLen;
   2981 
   2982   UCollationResult result;
   2983 
   2984   UChar first[256] = { 0 };
   2985   UChar second[256] = { 0 };
   2986   UParseError parseError;
   2987   int32_t myQ = getTestOption(QUICK_OPTION);
   2988 
   2989   uprv_memset(&src, 0, sizeof(UColTokenParser));
   2990 
   2991   src.opts = &opts;
   2992 
   2993   if(getTestOption(QUICK_OPTION) <= 0) {
   2994     setTestOption(QUICK_OPTION, 1);
   2995   }
   2996 
   2997   /* this test will fail when normalization is turned on */
   2998   /* therefore we always turn off exhaustive mode for it */
   2999   { /* QUICK > 0*/
   3000     log_verbose("Slide variable top over UCARules\n");
   3001     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
   3002     rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
   3003     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
   3004 
   3005     if(U_SUCCESS(status) && rulesLen > 0) {
   3006       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   3007       src.current = src.source = rulesCopy;
   3008       src.end = rulesCopy+rulesLen;
   3009       src.extraCurrent = src.end;
   3010       src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   3011 
   3012 	  /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
   3013 	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
   3014       while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
   3015         strength = src.parsedToken.strength;
   3016         chOffset = src.parsedToken.charsOffset;
   3017         chLen = src.parsedToken.charsLen;
   3018         exOffset = src.parsedToken.extensionOffset;
   3019         exLen = src.parsedToken.extensionLen;
   3020         prefixOffset = src.parsedToken.prefixOffset;
   3021         prefixLen = src.parsedToken.prefixLen;
   3022         specs = src.parsedToken.flags;
   3023 
   3024         startOfRules = FALSE;
   3025         {
   3026           log_verbose("%04X %d ", *(src.source+chOffset), chLen);
   3027         }
   3028         if(strength == UCOL_PRIMARY) {
   3029           status = U_ZERO_ERROR;
   3030           varTopOriginal = ucol_getVariableTop(coll, &status);
   3031           varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
   3032           if(U_FAILURE(status)) {
   3033             char buffer[256];
   3034             char *buf = buffer;
   3035             uint32_t i = 0, j;
   3036             uint32_t CE = UCOL_NO_MORE_CES;
   3037 
   3038             /* before we start screaming, let's see if there is a problem with the rules */
   3039             UErrorCode collIterateStatus = U_ZERO_ERROR;
   3040             collIterate *s = uprv_new_collIterate(&collIterateStatus);
   3041             uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
   3042 
   3043             CE = ucol_getNextCE(coll, s, &status);
   3044 
   3045             for(i = 0; i < oldChLen; i++) {
   3046               j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
   3047               buf += j;
   3048             }
   3049             if(status == U_PRIMARY_TOO_LONG_ERROR) {
   3050               log_verbose("= Expected failure for %s =", buffer);
   3051             } else {
   3052               if(uprv_collIterateAtEnd(s)) {
   3053                 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
   3054                   oldChOffset, u_errorName(status), buffer);
   3055               } else {
   3056                 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
   3057                   buffer);
   3058               }
   3059             }
   3060             uprv_delete_collIterate(s);
   3061           }
   3062           varTop2 = ucol_getVariableTop(coll, &status);
   3063           if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
   3064             log_err("cannot retrieve set varTop value!\n");
   3065             continue;
   3066           }
   3067 
   3068           if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
   3069 
   3070             u_strncpy(first, src.source+oldChOffset, oldChLen);
   3071             u_strncpy(first+oldChLen, src.source+chOffset, chLen);
   3072             u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
   3073             first[2*oldChLen+chLen] = 0;
   3074 
   3075             if(oldExLen == 0) {
   3076               u_strncpy(second, src.source+chOffset, chLen);
   3077               second[chLen] = 0;
   3078             } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
   3079               u_strncpy(second, src.source+oldExOffset, oldExLen);
   3080               u_strncpy(second+oldChLen, src.source+chOffset, chLen);
   3081               u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
   3082               second[2*oldExLen+chLen] = 0;
   3083             }
   3084             result = ucol_strcoll(coll, first, -1, second, -1);
   3085             if(result == UCOL_EQUAL) {
   3086               doTest(coll, first, second, UCOL_EQUAL);
   3087             } else {
   3088               log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
   3089             }
   3090           }
   3091         }
   3092         if(strength != UCOL_TOK_RESET) {
   3093           oldChOffset = chOffset;
   3094           oldChLen = chLen;
   3095           oldExOffset = exOffset;
   3096           oldExLen = exLen;
   3097         }
   3098       }
   3099       status = U_ZERO_ERROR;
   3100     }
   3101     else {
   3102       log_err("Unexpected failure getting rules %s\n", u_errorName(status));
   3103       return;
   3104     }
   3105     if (U_FAILURE(status)) {
   3106         log_err("Error parsing rules %s\n", u_errorName(status));
   3107         return;
   3108     }
   3109     status = U_ZERO_ERROR;
   3110   }
   3111 
   3112   setTestOption(QUICK_OPTION, myQ);
   3113 
   3114   log_verbose("Testing setting variable top to contractions\n");
   3115   {
   3116     UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
   3117     int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth;
   3118     while(*conts != 0) {
   3119       /*
   3120        * A continuation is NUL-terminated and NUL-padded
   3121        * except if it has the maximum length.
   3122        */
   3123       int32_t contractionLength = maxUCAContractionLength;
   3124       while(contractionLength > 0 && conts[contractionLength - 1] == 0) {
   3125         --contractionLength;
   3126       }
   3127       if(*(conts+1)==0) { /* pre-context */
   3128         varTop1 = ucol_setVariableTop(coll, conts, 1, &status);
   3129       } else {
   3130         varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status);
   3131       }
   3132       if(U_FAILURE(status)) {
   3133         if(status == U_PRIMARY_TOO_LONG_ERROR) {
   3134           /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
   3135            * therefore it is not an error when it complains about them. */
   3136           log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
   3137                       *conts, *(conts+1), *(conts+2));
   3138         } else {
   3139           log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
   3140                   *conts, *(conts+1), *(conts+2), u_errorName(status));
   3141         }
   3142         status = U_ZERO_ERROR;
   3143       }
   3144       conts+=maxUCAContractionLength;
   3145     }
   3146 
   3147     status = U_ZERO_ERROR;
   3148 
   3149     first[0] = 0x0040;
   3150     first[1] = 0x0050;
   3151     first[2] = 0x0000;
   3152 
   3153     ucol_setVariableTop(coll, first, -1, &status);
   3154 
   3155     if(U_SUCCESS(status)) {
   3156       log_err("Invalid contraction succeded in setting variable top!\n");
   3157     }
   3158 
   3159   }
   3160 
   3161   log_verbose("Test restoring variable top\n");
   3162 
   3163   status = U_ZERO_ERROR;
   3164   ucol_restoreVariableTop(coll, varTopOriginal, &status);
   3165   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
   3166     log_err("Couldn't restore old variable top\n");
   3167   }
   3168 
   3169   log_verbose("Testing calling with error set\n");
   3170 
   3171   status = U_INTERNAL_PROGRAM_ERROR;
   3172   varTop1 = ucol_setVariableTop(coll, first, 1, &status);
   3173   varTop2 = ucol_getVariableTop(coll, &status);
   3174   ucol_restoreVariableTop(coll, varTop2, &status);
   3175   varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
   3176   varTop2 = ucol_getVariableTop(NULL, &status);
   3177   ucol_restoreVariableTop(NULL, varTop2, &status);
   3178   if(status != U_INTERNAL_PROGRAM_ERROR) {
   3179     log_err("Bad reaction to passed error!\n");
   3180   }
   3181   uprv_free(src.source);
   3182   ucol_close(coll);
   3183   } else {
   3184     log_data_err("Couldn't open UCA collator\n");
   3185   }
   3186 
   3187 }
   3188 
   3189 static void TestNonChars(void) {
   3190   static const char *test[] = {
   3191       "\\u0000",  /* ignorable */
   3192       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
   3193       "\\uFDD0", "\\uFDEF",
   3194       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
   3195       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
   3196       "\\U0003FFFE", "\\U0003FFFF",
   3197       "\\U0004FFFE", "\\U0004FFFF",
   3198       "\\U0005FFFE", "\\U0005FFFF",
   3199       "\\U0006FFFE", "\\U0006FFFF",
   3200       "\\U0007FFFE", "\\U0007FFFF",
   3201       "\\U0008FFFE", "\\U0008FFFF",
   3202       "\\U0009FFFE", "\\U0009FFFF",
   3203       "\\U000AFFFE", "\\U000AFFFF",
   3204       "\\U000BFFFE", "\\U000BFFFF",
   3205       "\\U000CFFFE", "\\U000CFFFF",
   3206       "\\U000DFFFE", "\\U000DFFFF",
   3207       "\\U000EFFFE", "\\U000EFFFF",
   3208       "\\U000FFFFE", "\\U000FFFFF",
   3209       "\\U0010FFFE", "\\U0010FFFF",
   3210       "\\uFFFF"  /* special character with maximum primary weight */
   3211   };
   3212   UErrorCode status = U_ZERO_ERROR;
   3213   UCollator *coll = ucol_open("en_US", &status);
   3214 
   3215   log_verbose("Test non characters\n");
   3216 
   3217   if(U_SUCCESS(status)) {
   3218     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
   3219   } else {
   3220     log_err_status(status, "Unable to open collator\n");
   3221   }
   3222 
   3223   ucol_close(coll);
   3224 }
   3225 
   3226 static void TestExtremeCompression(void) {
   3227   static char *test[4];
   3228   int32_t j = 0, i = 0;
   3229 
   3230   for(i = 0; i<4; i++) {
   3231     test[i] = (char *)malloc(2048*sizeof(char));
   3232   }
   3233 
   3234   for(j = 20; j < 500; j++) {
   3235     for(i = 0; i<4; i++) {
   3236       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   3237       test[i][j-1] = (char)('a'+i);
   3238       test[i][j] = 0;
   3239     }
   3240     genericLocaleStarter("en_US", (const char **)test, 4);
   3241   }
   3242 
   3243 
   3244   for(i = 0; i<4; i++) {
   3245     free(test[i]);
   3246   }
   3247 }
   3248 
   3249 #if 0
   3250 static void TestExtremeCompression(void) {
   3251   static char *test[4];
   3252   int32_t j = 0, i = 0;
   3253   UErrorCode status = U_ZERO_ERROR;
   3254   UCollator *coll = ucol_open("en_US", status);
   3255   for(i = 0; i<4; i++) {
   3256     test[i] = (char *)malloc(2048*sizeof(char));
   3257   }
   3258   for(j = 10; j < 2048; j++) {
   3259     for(i = 0; i<4; i++) {
   3260       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
   3261       test[i][j-1] = (char)('a'+i);
   3262       test[i][j] = 0;
   3263     }
   3264   }
   3265   genericLocaleStarter("en_US", (const char **)test, 4);
   3266 
   3267   for(j = 10; j < 2048; j++) {
   3268     for(i = 0; i<1; i++) {
   3269       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   3270       test[i][j] = 0;
   3271     }
   3272   }
   3273   for(i = 0; i<4; i++) {
   3274     free(test[i]);
   3275   }
   3276 }
   3277 #endif
   3278 
   3279 static void TestSurrogates(void) {
   3280   static const char *test[] = {
   3281     "z","\\ud900\\udc25",  "\\ud805\\udc50",
   3282        "\\ud800\\udc00y",  "\\ud800\\udc00r",
   3283        "\\ud800\\udc00f",  "\\ud800\\udc00",
   3284        "\\ud800\\udc00c", "\\ud800\\udc00b",
   3285        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
   3286        "\\ud800\\udc00a",
   3287        "c", "b"
   3288   };
   3289 
   3290   static const char *rule =
   3291     "&z < \\ud900\\udc25   < \\ud805\\udc50"
   3292        "< \\ud800\\udc00y  < \\ud800\\udc00r"
   3293        "< \\ud800\\udc00f  << \\ud800\\udc00"
   3294        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
   3295        "< \\ud800\\udc00a  < c < b" ;
   3296 
   3297   genericRulesStarter(rule, test, 14);
   3298 }
   3299 
   3300 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
   3301 static void TestPrefix(void) {
   3302   uint32_t i;
   3303 
   3304   static const struct {
   3305     const char *rules;
   3306     const char *data[50];
   3307     const uint32_t len;
   3308   } tests[] = {
   3309     { "&z <<< z|a",
   3310       {"zz", "za"}, 2 },
   3311 
   3312     { "&z <<< z|   a",
   3313       {"zz", "za"}, 2 },
   3314     { "[strength I]"
   3315       "&a=\\ud900\\udc25"
   3316       "&z<<<\\ud900\\udc25|a",
   3317       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
   3318   };
   3319 
   3320 
   3321   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3322     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3323   }
   3324 }
   3325 
   3326 /* This test uses data suplied by Masashiko Maedera to test the implementation */
   3327 /* JIS X 4061 collation order implementation                                   */
   3328 static void TestNewJapanese(void) {
   3329 
   3330   static const char * const test1[] = {
   3331       "\\u30b7\\u30e3\\u30fc\\u30ec",
   3332       "\\u30b7\\u30e3\\u30a4",
   3333       "\\u30b7\\u30e4\\u30a3",
   3334       "\\u30b7\\u30e3\\u30ec",
   3335       "\\u3061\\u3087\\u3053",
   3336       "\\u3061\\u3088\\u3053",
   3337       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
   3338       "\\u3066\\u30fc\\u305f",
   3339       "\\u30c6\\u30fc\\u30bf",
   3340       "\\u30c6\\u30a7\\u30bf",
   3341       "\\u3066\\u3048\\u305f",
   3342       "\\u3067\\u30fc\\u305f",
   3343       "\\u30c7\\u30fc\\u30bf",
   3344       "\\u30c7\\u30a7\\u30bf",
   3345       "\\u3067\\u3048\\u305f",
   3346       "\\u3066\\u30fc\\u305f\\u30fc",
   3347       "\\u30c6\\u30fc\\u30bf\\u30a1",
   3348       "\\u30c6\\u30a7\\u30bf\\u30fc",
   3349       "\\u3066\\u3047\\u305f\\u3041",
   3350       "\\u3066\\u3048\\u305f\\u30fc",
   3351       "\\u3067\\u30fc\\u305f\\u30fc",
   3352       "\\u30c7\\u30fc\\u30bf\\u30a1",
   3353       "\\u3067\\u30a7\\u305f\\u30a1",
   3354       "\\u30c7\\u3047\\u30bf\\u3041",
   3355       "\\u30c7\\u30a8\\u30bf\\u30a2",
   3356       "\\u3072\\u3086",
   3357       "\\u3073\\u3085\\u3042",
   3358       "\\u3074\\u3085\\u3042",
   3359       "\\u3073\\u3085\\u3042\\u30fc",
   3360       "\\u30d3\\u30e5\\u30a2\\u30fc",
   3361       "\\u3074\\u3085\\u3042\\u30fc",
   3362       "\\u30d4\\u30e5\\u30a2\\u30fc",
   3363       "\\u30d2\\u30e5\\u30a6",
   3364       "\\u30d2\\u30e6\\u30a6",
   3365       "\\u30d4\\u30e5\\u30a6\\u30a2",
   3366       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
   3367       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
   3368       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
   3369       "\\u3072\\u3085\\u3093",
   3370       "\\u3074\\u3085\\u3093",
   3371       "\\u3075\\u30fc\\u308a",
   3372       "\\u30d5\\u30fc\\u30ea",
   3373       "\\u3075\\u3045\\u308a",
   3374       "\\u3075\\u30a5\\u308a",
   3375       "\\u3075\\u30a5\\u30ea",
   3376       "\\u30d5\\u30a6\\u30ea",
   3377       "\\u3076\\u30fc\\u308a",
   3378       "\\u30d6\\u30fc\\u30ea",
   3379       "\\u3076\\u3045\\u308a",
   3380       "\\u30d6\\u30a5\\u308a",
   3381       "\\u3077\\u3046\\u308a",
   3382       "\\u30d7\\u30a6\\u30ea",
   3383       "\\u3075\\u30fc\\u308a\\u30fc",
   3384       "\\u30d5\\u30a5\\u30ea\\u30fc",
   3385       "\\u3075\\u30a5\\u308a\\u30a3",
   3386       "\\u30d5\\u3045\\u308a\\u3043",
   3387       "\\u30d5\\u30a6\\u30ea\\u30fc",
   3388       "\\u3075\\u3046\\u308a\\u3043",
   3389       "\\u30d6\\u30a6\\u30ea\\u30a4",
   3390       "\\u3077\\u30fc\\u308a\\u30fc",
   3391       "\\u3077\\u30a5\\u308a\\u30a4",
   3392       "\\u3077\\u3046\\u308a\\u30fc",
   3393       "\\u30d7\\u30a6\\u30ea\\u30a4",
   3394       "\\u30d5\\u30fd",
   3395       "\\u3075\\u309e",
   3396       "\\u3076\\u309d",
   3397       "\\u3076\\u3075",
   3398       "\\u3076\\u30d5",
   3399       "\\u30d6\\u3075",
   3400       "\\u30d6\\u30d5",
   3401       "\\u3076\\u309e",
   3402       "\\u3076\\u3077",
   3403       "\\u30d6\\u3077",
   3404       "\\u3077\\u309d",
   3405       "\\u30d7\\u30fd",
   3406       "\\u3077\\u3075",
   3407 };
   3408 
   3409   static const char *test2[] = {
   3410     "\\u306f\\u309d", /* H\\u309d */
   3411     "\\u30cf\\u30fd", /* K\\u30fd */
   3412     "\\u306f\\u306f", /* HH */
   3413     "\\u306f\\u30cf", /* HK */
   3414     "\\u30cf\\u30cf", /* KK */
   3415     "\\u306f\\u309e", /* H\\u309e */
   3416     "\\u30cf\\u30fe", /* K\\u30fe */
   3417     "\\u306f\\u3070", /* HH\\u309b */
   3418     "\\u30cf\\u30d0", /* KK\\u309b */
   3419     "\\u306f\\u3071", /* HH\\u309c */
   3420     "\\u30cf\\u3071", /* KH\\u309c */
   3421     "\\u30cf\\u30d1", /* KK\\u309c */
   3422     "\\u3070\\u309d", /* H\\u309b\\u309d */
   3423     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
   3424     "\\u3070\\u306f", /* H\\u309bH */
   3425     "\\u30d0\\u30cf", /* K\\u309bK */
   3426     "\\u3070\\u309e", /* H\\u309b\\u309e */
   3427     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
   3428     "\\u3070\\u3070", /* H\\u309bH\\u309b */
   3429     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
   3430     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
   3431     "\\u3070\\u3071", /* H\\u309bH\\u309c */
   3432     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
   3433     "\\u3071\\u309d", /* H\\u309c\\u309d */
   3434     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
   3435     "\\u3071\\u306f", /* H\\u309cH */
   3436     "\\u30d1\\u30cf", /* K\\u309cK */
   3437     "\\u3071\\u3070", /* H\\u309cH\\u309b */
   3438     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
   3439     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
   3440     "\\u3071\\u3071", /* H\\u309cH\\u309c */
   3441     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
   3442   };
   3443   /*
   3444   static const char *test3[] = {
   3445     "\\u221er\\u221e",
   3446     "\\u221eR#",
   3447     "\\u221et\\u221e",
   3448     "#r\\u221e",
   3449     "#R#",
   3450     "#t%",
   3451     "#T%",
   3452     "8t\\u221e",
   3453     "8T\\u221e",
   3454     "8t#",
   3455     "8T#",
   3456     "8t%",
   3457     "8T%",
   3458     "8t8",
   3459     "8T8",
   3460     "\\u03c9r\\u221e",
   3461     "\\u03a9R%",
   3462     "rr\\u221e",
   3463     "rR\\u221e",
   3464     "Rr\\u221e",
   3465     "RR\\u221e",
   3466     "RT%",
   3467     "rt8",
   3468     "tr\\u221e",
   3469     "tr8",
   3470     "TR8",
   3471     "tt8",
   3472     "\\u30b7\\u30e3\\u30fc\\u30ec",
   3473   };
   3474   */
   3475   static const UColAttribute att[] = { UCOL_STRENGTH };
   3476   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
   3477 
   3478   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
   3479   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
   3480 
   3481   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
   3482   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
   3483   /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
   3484   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
   3485   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
   3486 }
   3487 
   3488 static void TestStrCollIdenticalPrefix(void) {
   3489   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
   3490   const char* test[] = {
   3491     "ab\\ud9b0\\udc70",
   3492     "ab\\ud9b0\\udc71"
   3493   };
   3494   genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
   3495 }
   3496 /* Contractions should have all their canonically equivalent */
   3497 /* strings included */
   3498 static void TestContractionClosure(void) {
   3499   static const struct {
   3500     const char *rules;
   3501     const char *data[10];
   3502     const uint32_t len;
   3503   } tests[] = {
   3504     {   "&b=\\u00e4\\u00e4",
   3505       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
   3506     {   "&b=\\u00C5",
   3507       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
   3508   };
   3509   uint32_t i;
   3510 
   3511 
   3512   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3513     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
   3514   }
   3515 }
   3516 
   3517 /* This tests also fails*/
   3518 static void TestBeforePrefixFailure(void) {
   3519   static const struct {
   3520     const char *rules;
   3521     const char *data[10];
   3522     const uint32_t len;
   3523   } tests[] = {
   3524     { "&g <<< a"
   3525       "&[before 3]\\uff41 <<< x",
   3526       {"x", "\\uff41"}, 2 },
   3527     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3528         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   3529         "&[before 3]\\u30a7<<<\\u30a9",
   3530       {"\\u30a9", "\\u30a7"}, 2 },
   3531     {   "&[before 3]\\u30a7<<<\\u30a9"
   3532         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3533         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
   3534       {"\\u30a9", "\\u30a7"}, 2 },
   3535   };
   3536   uint32_t i;
   3537 
   3538 
   3539   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3540     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3541   }
   3542 
   3543 #if 0
   3544   const char* rule1 =
   3545         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3546         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   3547         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
   3548   const char* rule2 =
   3549         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
   3550         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3551         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
   3552   const char* test[] = {
   3553       "\\u30c6\\u30fc\\u30bf",
   3554       "\\u30c6\\u30a7\\u30bf",
   3555   };
   3556   genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
   3557   genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
   3558 /* this piece of code should be in some sort of verbose mode     */
   3559 /* it gets the collation elements for elements and prints them   */
   3560 /* This is useful when trying to see whether the problem is      */
   3561   {
   3562     UErrorCode status = U_ZERO_ERROR;
   3563     uint32_t i = 0;
   3564     UCollationElements *it = NULL;
   3565     uint32_t CE;
   3566     UChar string[256];
   3567     uint32_t uStringLen;
   3568     UCollator *coll = NULL;
   3569 
   3570     uStringLen = u_unescape(rule1, string, 256);
   3571 
   3572     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   3573 
   3574     /*coll = ucol_open("ja_JP_JIS", &status);*/
   3575     it = ucol_openElements(coll, string, 0, &status);
   3576 
   3577     for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
   3578       log_verbose("%s\n", test[i]);
   3579       uStringLen = u_unescape(test[i], string, 256);
   3580       ucol_setText(it, string, uStringLen, &status);
   3581 
   3582       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
   3583         log_verbose("%08X\n", CE);
   3584       }
   3585       log_verbose("\n");
   3586 
   3587     }
   3588 
   3589     ucol_closeElements(it);
   3590     ucol_close(coll);
   3591   }
   3592 #endif
   3593 }
   3594 
   3595 static void TestPrefixCompose(void) {
   3596   const char* rule1 =
   3597         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
   3598   /*
   3599   const char* test[] = {
   3600       "\\u30c6\\u30fc\\u30bf",
   3601       "\\u30c6\\u30a7\\u30bf",
   3602   };
   3603   */
   3604   {
   3605     UErrorCode status = U_ZERO_ERROR;
   3606     /*uint32_t i = 0;*/
   3607     /*UCollationElements *it = NULL;*/
   3608 /*    uint32_t CE;*/
   3609     UChar string[256];
   3610     uint32_t uStringLen;
   3611     UCollator *coll = NULL;
   3612 
   3613     uStringLen = u_unescape(rule1, string, 256);
   3614 
   3615     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   3616     ucol_close(coll);
   3617   }
   3618 
   3619 
   3620 }
   3621 
   3622 /*
   3623 [last variable] last variable value
   3624 [last primary ignorable] largest CE for primary ignorable
   3625 [last secondary ignorable] largest CE for secondary ignorable
   3626 [last tertiary ignorable] largest CE for tertiary ignorable
   3627 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
   3628 */
   3629 
   3630 static void TestRuleOptions(void) {
   3631   /* values here are hardcoded and are correct for the current UCA
   3632    * when the UCA changes, one might be forced to change these
   3633    * values.
   3634    */
   3635 
   3636   /*
   3637    * These strings contain the last character before [variable top]
   3638    * and the first and second characters (by primary weights) after it.
   3639    * See FractionalUCA.txt. For example:
   3640       [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
   3641       [variable top = 0C FE]
   3642       [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
   3643      and
   3644       00B4; [0D 0C, 05, 05]
   3645    *
   3646    * Note: Starting with UCA 6.0, the [variable top] collation element
   3647    * is not the weight of any character or string,
   3648    * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
   3649    */
   3650 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
   3651 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
   3652 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
   3653 
   3654   /*
   3655    * This string has to match the character that has the [last regular] weight
   3656    * which changes with each UCA version.
   3657    * See the bottom of FractionalUCA.txt which says something like
   3658       [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
   3659    *
   3660    * Note: Starting with UCA 6.0, the [last regular] collation element
   3661    * is not the weight of any character or string,
   3662    * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
   3663    */
   3664 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
   3665 
   3666   static const struct {
   3667     const char *rules;
   3668     const char *data[10];
   3669     const uint32_t len;
   3670   } tests[] = {
   3671     /* - all befores here amount to zero */
   3672     { "&[before 3][first tertiary ignorable]<<<a",
   3673         { "\\u0000", "a"}, 2
   3674     }, /* you cannot go before first tertiary ignorable */
   3675 
   3676     { "&[before 3][last tertiary ignorable]<<<a",
   3677         { "\\u0000", "a"}, 2
   3678     }, /* you cannot go before last tertiary ignorable */
   3679 
   3680     { "&[before 3][first secondary ignorable]<<<a",
   3681         { "\\u0000", "a"}, 2
   3682     }, /* you cannot go before first secondary ignorable */
   3683 
   3684     { "&[before 3][last secondary ignorable]<<<a",
   3685         { "\\u0000", "a"}, 2
   3686     }, /* you cannot go before first secondary ignorable */
   3687 
   3688     /* 'normal' befores */
   3689 
   3690     { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
   3691         {  "c", "b", "\\u0332", "a" }, 4
   3692     },
   3693 
   3694     /* we don't have a code point that corresponds to
   3695      * the last primary ignorable
   3696      */
   3697     { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
   3698         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
   3699     },
   3700 
   3701     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
   3702         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
   3703     },
   3704 
   3705     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
   3706         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
   3707     },
   3708 
   3709     { "&[first regular]<a"
   3710       "&[before 1][first regular]<b",
   3711       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
   3712     },
   3713 
   3714     { "&[before 1][last regular]<b"
   3715       "&[last regular]<a",
   3716         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
   3717     },
   3718 
   3719     { "&[before 1][first implicit]<b"
   3720       "&[first implicit]<a",
   3721         { "b", "\\u4e00", "a", "\\u4e01"}, 4
   3722     },
   3723 
   3724     { "&[before 1][last implicit]<b"
   3725       "&[last implicit]<a",
   3726         { "b", "\\U0010FFFD", "a" }, 3
   3727     },
   3728 
   3729     { "&[last variable]<z"
   3730       "&[last primary ignorable]<x"
   3731       "&[last secondary ignorable]<<y"
   3732       "&[last tertiary ignorable]<<<w"
   3733       "&[top]<u",
   3734       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
   3735     }
   3736 
   3737   };
   3738   uint32_t i;
   3739 
   3740   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3741     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3742   }
   3743 }
   3744 
   3745 
   3746 static void TestOptimize(void) {
   3747   /* this is not really a test - just trying out
   3748    * whether copying of UCA contents will fail
   3749    * Cannot really test, since the functionality
   3750    * remains the same.
   3751    */
   3752   static const struct {
   3753     const char *rules;
   3754     const char *data[10];
   3755     const uint32_t len;
   3756   } tests[] = {
   3757     /* - all befores here amount to zero */
   3758     { "[optimize [\\uAC00-\\uD7FF]]",
   3759     { "a", "b"}, 2}
   3760   };
   3761   uint32_t i;
   3762 
   3763   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3764     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3765   }
   3766 }
   3767 
   3768 /*
   3769 cycheng (at) ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
   3770 weiv    ucol_strcollIter?
   3771 cycheng (at) ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
   3772 weiv    these are the input strings?
   3773 cycheng (at) ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
   3774 weiv    will check - could be a problem with utf-8 iterator
   3775 cycheng (at) ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
   3776 weiv    hmmm
   3777 cycheng (at) ca.ibm.c... note that we have a standalone high surrogate
   3778 weiv    that doesn't sound right
   3779 cycheng (at) ca.ibm.c... we got the same inconsistent results on AIX and Win2000
   3780 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
   3781 cycheng (at) ca.ibm.c... yes
   3782 weiv    and then do the comparison
   3783 cycheng (at) ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
   3784 weiv    utf-16 strings look like a little endian ones in the example you sent me
   3785 weiv    It could be a bug - let me try to test it out
   3786 cycheng (at) ca.ibm.c... ok
   3787 cycheng (at) ca.ibm.c... we can wait till the conf. call
   3788 cycheng (at) ca.ibm.c... next weke
   3789 weiv    that would be great
   3790 weiv    hmmm
   3791 weiv    I might be wrong
   3792 weiv    let me play with it some more
   3793 cycheng (at) ca.ibm.c... ok
   3794 cycheng (at) ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
   3795 cycheng (at) ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
   3796 cycheng (at) ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
   3797 weiv    ok
   3798 cycheng (at) ca.ibm.c... i ask sherman to send you more inconsistent data
   3799 weiv    thanks
   3800 cycheng (at) ca.ibm.c... the 4 strings we sent are just samples
   3801 */
   3802 #if 0
   3803 static void Alexis(void) {
   3804   UErrorCode status = U_ZERO_ERROR;
   3805   UCollator *coll = ucol_open("", &status);
   3806 
   3807 
   3808   const char utf16be[2][4] = {
   3809     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
   3810     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
   3811   };
   3812 
   3813   const char utf8[2][4] = {
   3814     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
   3815     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
   3816   };
   3817 
   3818   UCharIterator iterU161, iterU162;
   3819   UCharIterator iterU81, iterU82;
   3820 
   3821   UCollationResult resU16, resU8;
   3822 
   3823   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
   3824   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
   3825 
   3826   uiter_setUTF8(&iterU81, utf8[0], 4);
   3827   uiter_setUTF8(&iterU82, utf8[1], 4);
   3828 
   3829   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3830 
   3831   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
   3832   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
   3833 
   3834 
   3835   if(resU16 != resU8) {
   3836     log_err("different results\n");
   3837   }
   3838 
   3839   ucol_close(coll);
   3840 }
   3841 #endif
   3842 
   3843 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
   3844 static void Alexis2(void) {
   3845   UErrorCode status = U_ZERO_ERROR;
   3846   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   3847   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   3848   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   3849   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
   3850 
   3851   UConverter *conv = NULL;
   3852 
   3853   UCharIterator U16BEItS, U16BEItT;
   3854   UCharIterator U8ItS, U8ItT;
   3855 
   3856   UCollationResult resU16, resU16BE, resU8;
   3857 
   3858   static const char* const pairs[][2] = {
   3859     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
   3860     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
   3861     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
   3862     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
   3863     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
   3864     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
   3865     { "\\u0020", "\\u0020\\u0000"}
   3866 /*
   3867 5F20 (my result here)
   3868 5F204E008E3F
   3869 5F20 (your result here)
   3870 */
   3871   };
   3872 
   3873   int32_t i = 0;
   3874 
   3875   UCollator *coll = ucol_open("", &status);
   3876   if(status == U_FILE_ACCESS_ERROR) {
   3877     log_data_err("Is your data around?\n");
   3878     return;
   3879   } else if(U_FAILURE(status)) {
   3880     log_err("Error opening collator\n");
   3881     return;
   3882   }
   3883   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3884   conv = ucnv_open("UTF16BE", &status);
   3885   for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
   3886     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   3887     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   3888 
   3889     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
   3890 
   3891     log_verbose("Result of strcoll is %i\n", resU16);
   3892 
   3893     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
   3894     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
   3895 
   3896     /* use the original sizes, as the result from converter is in bytes */
   3897     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
   3898     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
   3899 
   3900     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
   3901 
   3902     log_verbose("Result of U16BE is %i\n", resU16BE);
   3903 
   3904     if(resU16 != resU16BE) {
   3905       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
   3906     }
   3907 
   3908     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
   3909     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
   3910 
   3911     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
   3912     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
   3913 
   3914     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
   3915 
   3916     if(resU16 != resU8) {
   3917       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
   3918     }
   3919 
   3920   }
   3921 
   3922   ucol_close(coll);
   3923   ucnv_close(conv);
   3924 }
   3925 
   3926 static void TestHebrewUCA(void) {
   3927   UErrorCode status = U_ZERO_ERROR;
   3928   static const char *first[] = {
   3929     "d790d6b8d79cd795d6bcd7a9",
   3930     "d790d79cd79ed7a7d799d799d7a1",
   3931     "d790d6b4d79ed795d6bcd7a9",
   3932   };
   3933 
   3934   char utf8String[3][256];
   3935   UChar utf16String[3][256];
   3936 
   3937   int32_t i = 0, j = 0;
   3938   int32_t sizeUTF8[3];
   3939   int32_t sizeUTF16[3];
   3940 
   3941   UCollator *coll = ucol_open("", &status);
   3942   if (U_FAILURE(status)) {
   3943       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
   3944       return;
   3945   }
   3946   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
   3947 
   3948   for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
   3949     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
   3950     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
   3951     log_verbose("%i: ");
   3952     for(j = 0; j < sizeUTF16[i]; j++) {
   3953       /*log_verbose("\\u%04X", utf16String[i][j]);*/
   3954       log_verbose("%04X", utf16String[i][j]);
   3955     }
   3956     log_verbose("\n");
   3957   }
   3958   for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
   3959     for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
   3960       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
   3961     }
   3962   }
   3963 
   3964   ucol_close(coll);
   3965 
   3966 }
   3967 
   3968 static void TestPartialSortKeyTermination(void) {
   3969   static const char* cases[] = {
   3970     "\\u1234\\u1234\\udc00",
   3971     "\\udc00\\ud800\\ud800"
   3972   };
   3973 
   3974   int32_t i = sizeof(UCollator);
   3975 
   3976   UErrorCode status = U_ZERO_ERROR;
   3977 
   3978   UCollator *coll = ucol_open("", &status);
   3979 
   3980   UCharIterator iter;
   3981 
   3982   UChar currCase[256];
   3983   int32_t length = 0;
   3984   int32_t pKeyLen = 0;
   3985 
   3986   uint8_t key[256];
   3987 
   3988   for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
   3989     uint32_t state[2] = {0, 0};
   3990     length = u_unescape(cases[i], currCase, 256);
   3991     uiter_setString(&iter, currCase, length);
   3992     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
   3993 
   3994     log_verbose("Done\n");
   3995 
   3996   }
   3997   ucol_close(coll);
   3998 }
   3999 
   4000 static void TestSettings(void) {
   4001   static const char* cases[] = {
   4002     "apple",
   4003       "Apple"
   4004   };
   4005 
   4006   static const char* locales[] = {
   4007     "",
   4008       "en"
   4009   };
   4010 
   4011   UErrorCode status = U_ZERO_ERROR;
   4012 
   4013   int32_t i = 0, j = 0;
   4014 
   4015   UChar source[256], target[256];
   4016   int32_t sLen = 0, tLen = 0;
   4017 
   4018   UCollator *collateObject = NULL;
   4019   for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
   4020     collateObject = ucol_open(locales[i], &status);
   4021     ucol_setStrength(collateObject, UCOL_PRIMARY);
   4022     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
   4023     for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
   4024       sLen = u_unescape(cases[j-1], source, 256);
   4025       source[sLen] = 0;
   4026       tLen = u_unescape(cases[j], target, 256);
   4027       source[tLen] = 0;
   4028       doTest(collateObject, source, target, UCOL_EQUAL);
   4029     }
   4030     ucol_close(collateObject);
   4031   }
   4032 }
   4033 
   4034 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
   4035     UErrorCode status = U_ZERO_ERROR;
   4036     int32_t errorNo = 0;
   4037     /*const UChar *sourceRules = NULL;*/
   4038     /*int32_t sourceRulesLen = 0;*/
   4039     UColAttributeValue french = UCOL_OFF;
   4040     int32_t cloneSize = 0;
   4041 
   4042     if(!ucol_equals(source, target)) {
   4043         log_err("Same collators, different address not equal\n");
   4044         errorNo++;
   4045     }
   4046     ucol_close(target);
   4047     if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
   4048         /* currently, safeClone is implemented through getRules/openRules
   4049         * so it is the same as the test below - I will comment that test out.
   4050         */
   4051         /* real thing */
   4052         target = ucol_safeClone(source, NULL, &cloneSize, &status);
   4053         if(U_FAILURE(status)) {
   4054             log_err("Error creating clone\n");
   4055             errorNo++;
   4056             return errorNo;
   4057         }
   4058         if(!ucol_equals(source, target)) {
   4059             log_err("Collator different from it's clone\n");
   4060             errorNo++;
   4061         }
   4062         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
   4063         if(french == UCOL_ON) {
   4064             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
   4065         } else {
   4066             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
   4067         }
   4068         if(U_FAILURE(status)) {
   4069             log_err("Error setting attributes\n");
   4070             errorNo++;
   4071             return errorNo;
   4072         }
   4073         if(ucol_equals(source, target)) {
   4074             log_err("Collators same even when options changed\n");
   4075             errorNo++;
   4076         }
   4077         ucol_close(target);
   4078         /* commented out since safeClone uses exactly the same technique */
   4079         /*
   4080         sourceRules = ucol_getRules(source, &sourceRulesLen);
   4081         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   4082         if(U_FAILURE(status)) {
   4083         log_err("Error instantiating target from rules\n");
   4084         errorNo++;
   4085         return errorNo;
   4086         }
   4087         if(!ucol_equals(source, target)) {
   4088         log_err("Collator different from collator that was created from the same rules\n");
   4089         errorNo++;
   4090         }
   4091         ucol_close(target);
   4092         */
   4093     }
   4094     return errorNo;
   4095 }
   4096 
   4097 
   4098 static void TestEquals(void) {
   4099     /* ucol_equals is not currently a public API. There is a chance that it will become
   4100     * something like this, but currently it is only used by RuleBasedCollator::operator==
   4101     */
   4102     /* test whether the two collators instantiated from the same locale are equal */
   4103     UErrorCode status = U_ZERO_ERROR;
   4104     UParseError parseError;
   4105     int32_t noOfLoc = uloc_countAvailable();
   4106     const char *locName = NULL;
   4107     UCollator *source = NULL, *target = NULL;
   4108     int32_t i = 0;
   4109 
   4110     const char* rules[] = {
   4111         "&l < lj <<< Lj <<< LJ",
   4112         "&n < nj <<< Nj <<< NJ",
   4113         "&ae <<< \\u00e4",
   4114         "&AE <<< \\u00c4"
   4115     };
   4116     /*
   4117     const char* badRules[] = {
   4118     "&l <<< Lj",
   4119     "&n < nj <<< nJ <<< NJ",
   4120     "&a <<< \\u00e4",
   4121     "&AE <<< \\u00c4 <<< x"
   4122     };
   4123     */
   4124 
   4125     UChar sourceRules[1024], targetRules[1024];
   4126     int32_t sourceRulesSize = 0, targetRulesSize = 0;
   4127     int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
   4128 
   4129     for(i = 0; i < rulesSize; i++) {
   4130         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
   4131         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
   4132     }
   4133 
   4134     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   4135     if(status == U_FILE_ACCESS_ERROR) {
   4136         log_data_err("Is your data around?\n");
   4137         return;
   4138     } else if(U_FAILURE(status)) {
   4139         log_err("Error opening collator\n");
   4140         return;
   4141     }
   4142     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   4143     if(!ucol_equals(source, target)) {
   4144         log_err("Equivalent collators not equal!\n");
   4145     }
   4146     ucol_close(source);
   4147     ucol_close(target);
   4148 
   4149     source = ucol_open("root", &status);
   4150     target = ucol_open("root", &status);
   4151     log_verbose("Testing root\n");
   4152     if(!ucol_equals(source, source)) {
   4153         log_err("Same collator not equal\n");
   4154     }
   4155     if(TestEqualsForCollator(locName, source, target)) {
   4156         log_err("Errors for root\n", locName);
   4157     }
   4158     ucol_close(source);
   4159 
   4160     for(i = 0; i<noOfLoc; i++) {
   4161         status = U_ZERO_ERROR;
   4162         locName = uloc_getAvailable(i);
   4163         /*if(hasCollationElements(locName)) {*/
   4164         log_verbose("Testing equality for locale %s\n", locName);
   4165         source = ucol_open(locName, &status);
   4166         target = ucol_open(locName, &status);
   4167         if (U_FAILURE(status)) {
   4168             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
   4169             continue;
   4170         }
   4171         if(TestEqualsForCollator(locName, source, target)) {
   4172             log_err("Errors for locale %s\n", locName);
   4173         }
   4174         ucol_close(source);
   4175         /*}*/
   4176     }
   4177 }
   4178 
   4179 static void TestJ2726(void) {
   4180     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
   4181     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
   4182     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
   4183     UErrorCode status = U_ZERO_ERROR;
   4184     UCollator *coll = ucol_open("en", &status);
   4185     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   4186     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   4187     doTest(coll, a, aSpace, UCOL_EQUAL);
   4188     doTest(coll, aSpace, a, UCOL_EQUAL);
   4189     doTest(coll, a, spaceA, UCOL_EQUAL);
   4190     doTest(coll, spaceA, a, UCOL_EQUAL);
   4191     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
   4192     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
   4193     ucol_close(coll);
   4194 }
   4195 
   4196 static void NullRule(void) {
   4197     UChar r[3] = {0};
   4198     UErrorCode status = U_ZERO_ERROR;
   4199     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   4200     if(U_SUCCESS(status)) {
   4201         log_err("This should have been an error!\n");
   4202         ucol_close(coll);
   4203     } else {
   4204         status = U_ZERO_ERROR;
   4205     }
   4206     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   4207     if(U_FAILURE(status)) {
   4208         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
   4209     } else {
   4210         ucol_close(coll);
   4211     }
   4212 }
   4213 
   4214 /**
   4215  * Test for CollationElementIterator previous and next for the whole set of
   4216  * unicode characters with normalization on.
   4217  */
   4218 static void TestNumericCollation(void)
   4219 {
   4220     UErrorCode status = U_ZERO_ERROR;
   4221 
   4222     const static char *basicTestStrings[]={
   4223     "hello1",
   4224     "hello2",
   4225     "hello2002",
   4226     "hello2003",
   4227     "hello123456",
   4228     "hello1234567",
   4229     "hello10000000",
   4230     "hello100000000",
   4231     "hello1000000000",
   4232     "hello10000000000",
   4233     };
   4234 
   4235     const static char *preZeroTestStrings[]={
   4236     "avery10000",
   4237     "avery010000",
   4238     "avery0010000",
   4239     "avery00010000",
   4240     "avery000010000",
   4241     "avery0000010000",
   4242     "avery00000010000",
   4243     "avery000000010000",
   4244     };
   4245 
   4246     const static char *thirtyTwoBitNumericStrings[]={
   4247     "avery42949672960",
   4248     "avery42949672961",
   4249     "avery42949672962",
   4250     "avery429496729610"
   4251     };
   4252 
   4253      const static char *longNumericStrings[]={
   4254      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
   4255         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
   4256         are treated as multiple collation elements. */
   4257     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
   4258     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
   4259     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
   4260     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
   4261     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
   4262     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
   4263     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
   4264     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
   4265     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
   4266     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
   4267     };
   4268 
   4269     const static char *supplementaryDigits[] = {
   4270       "\\uD835\\uDFCE", /* 0 */
   4271       "\\uD835\\uDFCF", /* 1 */
   4272       "\\uD835\\uDFD0", /* 2 */
   4273       "\\uD835\\uDFD1", /* 3 */
   4274       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
   4275       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
   4276       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
   4277       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
   4278       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
   4279       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
   4280     };
   4281 
   4282     const static char *foreignDigits[] = {
   4283       "\\u0661",
   4284         "\\u0662",
   4285         "\\u0663",
   4286       "\\u0661\\u0660",
   4287       "\\u0661\\u0662",
   4288       "\\u0661\\u0663",
   4289       "\\u0662\\u0660",
   4290       "\\u0662\\u0662",
   4291       "\\u0662\\u0663",
   4292       "\\u0663\\u0660",
   4293       "\\u0663\\u0662",
   4294       "\\u0663\\u0663"
   4295     };
   4296 
   4297     const static char *evenZeroes[] = {
   4298       "2000",
   4299       "2001",
   4300         "2002",
   4301         "2003"
   4302     };
   4303 
   4304     UColAttribute att = UCOL_NUMERIC_COLLATION;
   4305     UColAttributeValue val = UCOL_ON;
   4306 
   4307     /* Open our collator. */
   4308     UCollator* coll = ucol_open("root", &status);
   4309     if (U_FAILURE(status)){
   4310         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
   4311               myErrorName(status));
   4312         return;
   4313     }
   4314     genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
   4315     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
   4316     genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
   4317     genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
   4318     genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
   4319     genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
   4320 
   4321     /* Setting up our collator to do digits. */
   4322     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   4323     if (U_FAILURE(status)){
   4324         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
   4325               myErrorName(status));
   4326         return;
   4327     }
   4328 
   4329     /*
   4330        Testing that prepended zeroes still yield the correct collation behavior.
   4331        We expect that every element in our strings array will be equal.
   4332     */
   4333     genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
   4334 
   4335     ucol_close(coll);
   4336 }
   4337 
   4338 static void TestTibetanConformance(void)
   4339 {
   4340     const char* test[] = {
   4341         "\\u0FB2\\u0591\\u0F71\\u0061",
   4342         "\\u0FB2\\u0F71\\u0061"
   4343     };
   4344 
   4345     UErrorCode status = U_ZERO_ERROR;
   4346     UCollator *coll = ucol_open("", &status);
   4347     UChar source[100];
   4348     UChar target[100];
   4349     int result;
   4350     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   4351     if (U_SUCCESS(status)) {
   4352         u_unescape(test[0], source, 100);
   4353         u_unescape(test[1], target, 100);
   4354         doTest(coll, source, target, UCOL_EQUAL);
   4355         result = ucol_strcoll(coll, source, -1,   target, -1);
   4356         log_verbose("result %d\n", result);
   4357         if (UCOL_EQUAL != result) {
   4358             log_err("Tibetan comparison error\n");
   4359         }
   4360     }
   4361     ucol_close(coll);
   4362 
   4363     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
   4364 }
   4365 
   4366 static void TestPinyinProblem(void) {
   4367     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
   4368     genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
   4369 }
   4370 
   4371 #define TST_UCOL_MAX_INPUT 0x220001
   4372 #define topByte 0xFF000000;
   4373 #define bottomByte 0xFF;
   4374 #define fourBytes 0xFFFFFFFF;
   4375 
   4376 
   4377 static void showImplicit(UChar32 i) {
   4378     if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
   4379         log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
   4380     }
   4381 }
   4382 
   4383 static void TestImplicitGeneration(void) {
   4384     UErrorCode status = U_ZERO_ERROR;
   4385     UChar32 last = 0;
   4386     UChar32 current;
   4387     UChar32 i = 0, j = 0;
   4388     UChar32 roundtrip = 0;
   4389     UChar32 lastBottom = 0;
   4390     UChar32 currentBottom = 0;
   4391     UChar32 lastTop = 0;
   4392     UChar32 currentTop = 0;
   4393 
   4394     UCollator *coll = ucol_open("root", &status);
   4395     if(U_FAILURE(status)) {
   4396         log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   4397         return;
   4398     }
   4399 
   4400     uprv_uca_getRawFromImplicit(0xE20303E7);
   4401 
   4402     for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
   4403         current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
   4404 
   4405         /* check that it round-trips AND that all intervening ones are illegal*/
   4406         roundtrip = uprv_uca_getRawFromImplicit(current);
   4407         if (roundtrip != i) {
   4408             log_err("No roundtrip %08X\n", i);
   4409         }
   4410         if (last != 0) {
   4411             for (j = last + 1; j < current; ++j) {
   4412                 roundtrip = uprv_uca_getRawFromImplicit(j);
   4413                 /* raise an error if it *doesn't* find an error*/
   4414                 if (roundtrip != -1) {
   4415                     log_err("Fails to recognize illegal %08X\n", j);
   4416                 }
   4417             }
   4418         }
   4419         /* now do other consistency checks*/
   4420         lastBottom = last & bottomByte;
   4421         currentBottom = current & bottomByte;
   4422         lastTop = last & topByte;
   4423         currentTop = current & topByte;
   4424 
   4425         /* print out some values for spot-checking*/
   4426         if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
   4427             showImplicit(i-3);
   4428             showImplicit(i-2);
   4429             showImplicit(i-1);
   4430             showImplicit(i);
   4431             showImplicit(i+1);
   4432             showImplicit(i+2);
   4433         }
   4434         last = current;
   4435 
   4436         if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
   4437             log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
   4438         }
   4439     }
   4440     showImplicit(TST_UCOL_MAX_INPUT-2);
   4441     showImplicit(TST_UCOL_MAX_INPUT-1);
   4442     showImplicit(TST_UCOL_MAX_INPUT);
   4443     ucol_close(coll);
   4444 }
   4445 
   4446 /**
   4447  * Iterate through the given iterator, checking to see that all the strings
   4448  * in the expected array are present.
   4449  * @param expected array of strings we expect to see, or NULL
   4450  * @param expectedCount number of elements of expected, or 0
   4451  */
   4452 static int32_t checkUEnumeration(const char* msg,
   4453                                  UEnumeration* iter,
   4454                                  const char** expected,
   4455                                  int32_t expectedCount) {
   4456     UErrorCode ec = U_ZERO_ERROR;
   4457     int32_t i = 0, n, j, bit;
   4458     int32_t seenMask = 0;
   4459 
   4460     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
   4461     n = uenum_count(iter, &ec);
   4462     if (!assertSuccess("count", &ec)) return -1;
   4463     log_verbose("%s = [", msg);
   4464     for (;; ++i) {
   4465         const char* s = uenum_next(iter, NULL, &ec);
   4466         if (!assertSuccess("snext", &ec) || s == NULL) break;
   4467         if (i != 0) log_verbose(",");
   4468         log_verbose("%s", s);
   4469         /* check expected list */
   4470         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   4471             if ((seenMask&bit) == 0 &&
   4472                 uprv_strcmp(s, expected[j]) == 0) {
   4473                 seenMask |= bit;
   4474                 break;
   4475             }
   4476         }
   4477     }
   4478     log_verbose("] (%d)\n", i);
   4479     assertTrue("count verified", i==n);
   4480     /* did we see all expected strings? */
   4481     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   4482         if ((seenMask&bit)!=0) {
   4483             log_verbose("Ok: \"%s\" seen\n", expected[j]);
   4484         } else {
   4485             log_err("FAIL: \"%s\" not seen\n", expected[j]);
   4486         }
   4487     }
   4488     return n;
   4489 }
   4490 
   4491 /**
   4492  * Test new API added for separate collation tree.
   4493  */
   4494 static void TestSeparateTrees(void) {
   4495     UErrorCode ec = U_ZERO_ERROR;
   4496     UEnumeration *e = NULL;
   4497     int32_t n = -1;
   4498     UBool isAvailable;
   4499     char loc[256];
   4500 
   4501     static const char* AVAIL[] = { "en", "de" };
   4502 
   4503     static const char* KW[] = { "collation" };
   4504 
   4505     static const char* KWVAL[] = { "phonebook", "stroke" };
   4506 
   4507 #if !UCONFIG_NO_SERVICE
   4508     e = ucol_openAvailableLocales(&ec);
   4509     if (e != NULL) {
   4510         assertSuccess("ucol_openAvailableLocales", &ec);
   4511         assertTrue("ucol_openAvailableLocales!=0", e!=0);
   4512         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
   4513         /* Don't need to check n because we check list */
   4514         uenum_close(e);
   4515     } else {
   4516         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
   4517     }
   4518 #endif
   4519 
   4520     e = ucol_getKeywords(&ec);
   4521     if (e != NULL) {
   4522         assertSuccess("ucol_getKeywords", &ec);
   4523         assertTrue("ucol_getKeywords!=0", e!=0);
   4524         n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
   4525         /* Don't need to check n because we check list */
   4526         uenum_close(e);
   4527     } else {
   4528         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
   4529     }
   4530 
   4531     e = ucol_getKeywordValues(KW[0], &ec);
   4532     if (e != NULL) {
   4533         assertSuccess("ucol_getKeywordValues", &ec);
   4534         assertTrue("ucol_getKeywordValues!=0", e!=0);
   4535         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
   4536         /* Don't need to check n because we check list */
   4537         uenum_close(e);
   4538     } else {
   4539         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
   4540     }
   4541 
   4542     /* Try setting a warning before calling ucol_getKeywordValues */
   4543     ec = U_USING_FALLBACK_WARNING;
   4544     e = ucol_getKeywordValues(KW[0], &ec);
   4545     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
   4546         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
   4547         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
   4548         /* Don't need to check n because we check list */
   4549         uenum_close(e);
   4550     }
   4551 
   4552     /*
   4553 U_DRAFT int32_t U_EXPORT2
   4554 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
   4555                              const char* locale, UBool* isAvailable,
   4556                              UErrorCode* status);
   4557 }
   4558 */
   4559     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
   4560                                      &isAvailable, &ec);
   4561     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   4562         assertEquals("getFunctionalEquivalent(de)", "de", loc);
   4563         assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
   4564                    isAvailable == TRUE);
   4565     }
   4566 
   4567     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
   4568                                      &isAvailable, &ec);
   4569     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   4570         assertEquals("getFunctionalEquivalent(de_DE)", "de", loc);
   4571         assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
   4572                    isAvailable == TRUE);
   4573     }
   4574 }
   4575 
   4576 /* supercedes TestJ784 */
   4577 static void TestBeforePinyin(void) {
   4578     const static char rules[] = {
   4579         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
   4580         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
   4581         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
   4582         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
   4583         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
   4584         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
   4585     };
   4586 
   4587     const static char *test[] = {
   4588         "l\\u0101",
   4589         "la",
   4590         "l\\u0101n",
   4591         "lan ",
   4592         "l\\u0113",
   4593         "le",
   4594         "l\\u0113n",
   4595         "len"
   4596     };
   4597 
   4598     const static char *test2[] = {
   4599         "x\\u0101",
   4600         "x\\u0100",
   4601         "X\\u0101",
   4602         "X\\u0100",
   4603         "x\\u00E1",
   4604         "x\\u00C1",
   4605         "X\\u00E1",
   4606         "X\\u00C1",
   4607         "x\\u01CE",
   4608         "x\\u01CD",
   4609         "X\\u01CE",
   4610         "X\\u01CD",
   4611         "x\\u00E0",
   4612         "x\\u00C0",
   4613         "X\\u00E0",
   4614         "X\\u00C0",
   4615         "xa",
   4616         "xA",
   4617         "Xa",
   4618         "XA",
   4619         "x\\u0101x",
   4620         "x\\u0100x",
   4621         "x\\u00E1x",
   4622         "x\\u00C1x",
   4623         "x\\u01CEx",
   4624         "x\\u01CDx",
   4625         "x\\u00E0x",
   4626         "x\\u00C0x",
   4627         "xax",
   4628         "xAx"
   4629     };
   4630 
   4631     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
   4632     genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
   4633     genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
   4634     genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
   4635 }
   4636 
   4637 static void TestBeforeTightening(void) {
   4638     static const struct {
   4639         const char *rules;
   4640         UErrorCode expectedStatus;
   4641     } tests[] = {
   4642         { "&[before 1]a<x", U_ZERO_ERROR },
   4643         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
   4644         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
   4645         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
   4646         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
   4647         { "&[before 2]a<<x",U_ZERO_ERROR },
   4648         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
   4649         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
   4650         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
   4651         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
   4652         { "&[before 3]a<<<x",U_ZERO_ERROR },
   4653         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
   4654         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
   4655     };
   4656 
   4657     int32_t i = 0;
   4658 
   4659     UErrorCode status = U_ZERO_ERROR;
   4660     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   4661     uint32_t rlen = 0;
   4662 
   4663     UCollator *coll = NULL;
   4664 
   4665 
   4666     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   4667         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
   4668         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   4669         if(status != tests[i].expectedStatus) {
   4670             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
   4671                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
   4672         }
   4673         ucol_close(coll);
   4674         status = U_ZERO_ERROR;
   4675     }
   4676 
   4677 }
   4678 
   4679 /*
   4680 &m < a
   4681 &[before 1] a < x <<< X << q <<< Q < z
   4682 assert: m <<< M < x <<< X << q <<< Q < z < a < n
   4683 
   4684 &m < a
   4685 &[before 2] a << x <<< X << q <<< Q < z
   4686 assert: m <<< M < x <<< X << q <<< Q << a < z < n
   4687 
   4688 &m < a
   4689 &[before 3] a <<< x <<< X << q <<< Q < z
   4690 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
   4691 
   4692 
   4693 &m << a
   4694 &[before 1] a < x <<< X << q <<< Q < z
   4695 assert: x <<< X << q <<< Q < z < m <<< M << a < n
   4696 
   4697 &m << a
   4698 &[before 2] a << x <<< X << q <<< Q < z
   4699 assert: m <<< M << x <<< X << q <<< Q << a < z < n
   4700 
   4701 &m << a
   4702 &[before 3] a <<< x <<< X << q <<< Q < z
   4703 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
   4704 
   4705 
   4706 &m <<< a
   4707 &[before 1] a < x <<< X << q <<< Q < z
   4708 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
   4709 
   4710 &m <<< a
   4711 &[before 2] a << x <<< X << q <<< Q < z
   4712 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
   4713 
   4714 &m <<< a
   4715 &[before 3] a <<< x <<< X << q <<< Q < z
   4716 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
   4717 
   4718 
   4719 &[before 1] s < x <<< X << q <<< Q < z
   4720 assert: r <<< R < x <<< X << q <<< Q < z < s < n
   4721 
   4722 &[before 2] s << x <<< X << q <<< Q < z
   4723 assert: r <<< R < x <<< X << q <<< Q << s < z < n
   4724 
   4725 &[before 3] s <<< x <<< X << q <<< Q < z
   4726 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
   4727 
   4728 
   4729 &[before 1] \u24DC < x <<< X << q <<< Q < z
   4730 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
   4731 
   4732 &[before 2] \u24DC << x <<< X << q <<< Q < z
   4733 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
   4734 
   4735 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
   4736 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
   4737 */
   4738 
   4739 
   4740 #if 0
   4741 /* requires features not yet supported */
   4742 static void TestMoreBefore(void) {
   4743     static const struct {
   4744         const char* rules;
   4745         const char* order[16];
   4746         int32_t size;
   4747     } tests[] = {
   4748         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
   4749         { "m","M","x","X","q","Q","z","a","n" }, 9},
   4750         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
   4751         { "m","M","x","X","q","Q","a","z","n" }, 9},
   4752         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
   4753         { "m","M","x","X","a","q","Q","z","n" }, 9},
   4754         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
   4755         { "x","X","q","Q","z","m","M","a","n" }, 9},
   4756         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
   4757         { "m","M","x","X","q","Q","a","z","n" }, 9},
   4758         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
   4759         { "m","M","x","X","a","q","Q","z","n" }, 9},
   4760         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
   4761         { "x","X","q","Q","z","n","m","a","M" }, 9},
   4762         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
   4763         { "x","X","q","Q","m","a","M","z","n" }, 9},
   4764         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
   4765         { "m","x","X","a","M","q","Q","z","n" }, 9},
   4766         { "&[before 1] s < x <<< X << q <<< Q < z",
   4767         { "r","R","x","X","q","Q","z","s","n" }, 9},
   4768         { "&[before 2] s << x <<< X << q <<< Q < z",
   4769         { "r","R","x","X","q","Q","s","z","n" }, 9},
   4770         { "&[before 3] s <<< x <<< X << q <<< Q < z",
   4771         { "r","R","x","X","s","q","Q","z","n" }, 9},
   4772         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
   4773         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
   4774         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
   4775         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
   4776         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
   4777         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
   4778     };
   4779 
   4780     int32_t i = 0;
   4781 
   4782     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   4783         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
   4784     }
   4785 }
   4786 #endif
   4787 
   4788 static void TestTailorNULL( void ) {
   4789     const static char* rule = "&a <<< '\\u0000'";
   4790     UErrorCode status = U_ZERO_ERROR;
   4791     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   4792     uint32_t rlen = 0;
   4793     UChar a = 1, null = 0;
   4794     UCollationResult res = UCOL_EQUAL;
   4795 
   4796     UCollator *coll = NULL;
   4797 
   4798 
   4799     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
   4800     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   4801 
   4802     if(U_FAILURE(status)) {
   4803         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
   4804     } else {
   4805         res = ucol_strcoll(coll, &a, 1, &null, 1);
   4806 
   4807         if(res != UCOL_LESS) {
   4808             log_err("NULL was not tailored properly!\n");
   4809         }
   4810     }
   4811 
   4812     ucol_close(coll);
   4813 }
   4814 
   4815 static void
   4816 TestUpperFirstQuaternary(void)
   4817 {
   4818   const char* tests[] = { "B", "b", "Bb", "bB" };
   4819   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
   4820   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
   4821   genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
   4822 }
   4823 
   4824 static void
   4825 TestJ4960(void)
   4826 {
   4827   const char* tests[] = { "\\u00e2T", "aT" };
   4828   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
   4829   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
   4830   const char* tests2[] = { "a", "A" };
   4831   const char* rule = "&[first tertiary ignorable]=A=a";
   4832   UColAttribute att2[] = { UCOL_CASE_LEVEL };
   4833   UColAttributeValue attVals2[] = { UCOL_ON };
   4834   /* Test whether we correctly ignore primary ignorables on case level when */
   4835   /* we have only primary & case level */
   4836   genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
   4837   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
   4838   /* and case level */
   4839   genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
   4840   /* Test whether completely ignorable letters have case level info (they shouldn't) */
   4841   genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
   4842 }
   4843 
   4844 static void
   4845 TestJ5223(void)
   4846 {
   4847   static const char *test = "this is a test string";
   4848   UChar ustr[256];
   4849   int32_t ustr_length = u_unescape(test, ustr, 256);
   4850   unsigned char sortkey[256];
   4851   int32_t sortkey_length;
   4852   UErrorCode status = U_ZERO_ERROR;
   4853   static UCollator *coll = NULL;
   4854   coll = ucol_open("root", &status);
   4855   if(U_FAILURE(status)) {
   4856     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   4857     return;
   4858   }
   4859   ucol_setStrength(coll, UCOL_PRIMARY);
   4860   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   4861   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   4862   if (U_FAILURE(status)) {
   4863     log_err("Failed setting atributes\n");
   4864     return;
   4865   }
   4866   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
   4867   if (sortkey_length > 256) return;
   4868 
   4869   /* we mark the position where the null byte should be written in advance */
   4870   sortkey[sortkey_length-1] = 0xAA;
   4871 
   4872   /* we set the buffer size one byte higher than needed */
   4873   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
   4874     sortkey_length+1);
   4875 
   4876   /* no error occurs (for me) */
   4877   if (sortkey[sortkey_length-1] == 0xAA) {
   4878     log_err("Hit bug at first try\n");
   4879   }
   4880 
   4881   /* we mark the position where the null byte should be written again */
   4882   sortkey[sortkey_length-1] = 0xAA;
   4883 
   4884   /* this time we set the buffer size to the exact amount needed */
   4885   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
   4886     sortkey_length);
   4887 
   4888   /* now the trailing null byte is not written */
   4889   if (sortkey[sortkey_length-1] == 0xAA) {
   4890     log_err("Hit bug at second try\n");
   4891   }
   4892 
   4893   ucol_close(coll);
   4894 }
   4895 
   4896 /* Regression test for Thai partial sort key problem */
   4897 static void
   4898 TestJ5232(void)
   4899 {
   4900     const static char *test[] = {
   4901         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
   4902         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
   4903     };
   4904 
   4905     genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
   4906 }
   4907 
   4908 static void
   4909 TestJ5367(void)
   4910 {
   4911     const static char *test[] = { "a", "y" };
   4912     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
   4913     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
   4914 }
   4915 
   4916 static void
   4917 TestVI5913(void)
   4918 {
   4919     UErrorCode status = U_ZERO_ERROR;
   4920     int32_t i, j;
   4921     UCollator *coll =NULL;
   4922     uint8_t  resColl[100], expColl[100];
   4923     int32_t  rLen, tLen, ruleLen, sLen, kLen;
   4924     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &a<0x1FF3-omega with Ypogegrammeni*/
   4925     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
   4926     UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0};  /* &z<a+e with circumflex.*/
   4927     static const UChar tData[][20]={
   4928         {0x1EAC, 0},
   4929         {0x0041, 0x0323, 0x0302, 0},
   4930         {0x1EA0, 0x0302, 0},
   4931         {0x00C2, 0x0323, 0},
   4932         {0x1ED8, 0},  /* O with dot and circumflex */
   4933         {0x1ECC, 0x0302, 0},
   4934         {0x1EB7, 0},
   4935         {0x1EA1, 0x0306, 0},
   4936     };
   4937     static const UChar tailorData[][20]={
   4938         {0x1FA2, 0},  /* Omega with 3 combining marks */
   4939         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
   4940         {0x1FF3, 0x0313, 0x0300, 0},
   4941         {0x1F60, 0x0300, 0x0345, 0},
   4942         {0x1F62, 0x0345, 0},
   4943         {0x1FA0, 0x0300, 0},
   4944     };
   4945     static const UChar tailorData2[][20]={
   4946         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
   4947         {0x0073, 0x0323, 0x030C, 0},
   4948         {0x0073, 0x030C, 0x0323, 0},
   4949     };
   4950     static const UChar tailorData3[][20]={
   4951         {0x007a, 0},  /*  z */
   4952         {0x0061, 0x0065, 0},  /*  a + e */
   4953         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
   4954         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
   4955         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
   4956         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
   4957         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
   4958         {0x00EA, 0},  /* e with circumflex  */
   4959     };
   4960 
   4961     /* Test Vietnamese sort. */
   4962     coll = ucol_open("vi", &status);
   4963     if(U_FAILURE(status)) {
   4964         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
   4965         return;
   4966     }
   4967     log_verbose("\n\nVI collation:");
   4968     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
   4969         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
   4970     }
   4971     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
   4972         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
   4973     }
   4974     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
   4975         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
   4976     }
   4977     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
   4978         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
   4979     }
   4980 
   4981     for (j=0; j<8; j++) {
   4982         tLen = u_strlen(tData[j]);
   4983         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
   4984         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
   4985         for(i = 0; i<rLen; i++) {
   4986             log_verbose(" %02X", resColl[i]);
   4987         }
   4988     }
   4989 
   4990     ucol_close(coll);
   4991 
   4992     /* Test Romanian sort. */
   4993     coll = ucol_open("ro", &status);
   4994     log_verbose("\n\nRO collation:");
   4995     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
   4996         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
   4997     }
   4998     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
   4999         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
   5000     }
   5001     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
   5002         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
   5003     }
   5004 
   5005     for (j=4; j<8; j++) {
   5006         tLen = u_strlen(tData[j]);
   5007         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
   5008         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
   5009         for(i = 0; i<rLen; i++) {
   5010             log_verbose(" %02X", resColl[i]);
   5011         }
   5012     }
   5013     ucol_close(coll);
   5014 
   5015     /* Test the precomposed Greek character with 3 combining marks. */
   5016     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
   5017     ruleLen = u_strlen(rule);
   5018     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5019     if (U_FAILURE(status)) {
   5020         log_err("ucol_openRules failed with %s\n", u_errorName(status));
   5021         return;
   5022     }
   5023     sLen = u_strlen(tailorData[0]);
   5024     for (j=1; j<6; j++) {
   5025         tLen = u_strlen(tailorData[j]);
   5026         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
   5027             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
   5028         }
   5029     }
   5030     /* Test getSortKey. */
   5031     tLen = u_strlen(tailorData[0]);
   5032     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
   5033     for (j=0; j<6; j++) {
   5034         tLen = u_strlen(tailorData[j]);
   5035         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
   5036         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   5037             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5038             for(i = 0; i<rLen; i++) {
   5039                 log_err(" %02X", resColl[i]);
   5040             }
   5041         }
   5042     }
   5043     ucol_close(coll);
   5044 
   5045     log_verbose("\n\nTailoring test for s with caron:");
   5046     ruleLen = u_strlen(rule2);
   5047     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5048     tLen = u_strlen(tailorData2[0]);
   5049     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
   5050     for (j=1; j<3; j++) {
   5051         tLen = u_strlen(tailorData2[j]);
   5052         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
   5053         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   5054             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5055             for(i = 0; i<rLen; i++) {
   5056                 log_err(" %02X", resColl[i]);
   5057             }
   5058         }
   5059     }
   5060     ucol_close(coll);
   5061 
   5062     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
   5063     ruleLen = u_strlen(rule3);
   5064     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5065     tLen = u_strlen(tailorData3[3]);
   5066     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
   5067     for (j=4; j<6; j++) {
   5068         tLen = u_strlen(tailorData3[j]);
   5069         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
   5070 
   5071         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   5072             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5073             for(i = 0; i<rLen; i++) {
   5074                 log_err(" %02X", resColl[i]);
   5075             }
   5076         }
   5077 
   5078         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5079          for(i = 0; i<rLen; i++) {
   5080              log_verbose(" %02X", resColl[i]);
   5081          }
   5082     }
   5083     ucol_close(coll);
   5084 }
   5085 
   5086 static void
   5087 TestTailor6179(void)
   5088 {
   5089     UErrorCode status = U_ZERO_ERROR;
   5090     int32_t i;
   5091     UCollator *coll =NULL;
   5092     uint8_t  resColl[100];
   5093     int32_t  rLen, tLen, ruleLen;
   5094     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
   5095     static const UChar rule1[]={
   5096             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
   5097             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
   5098             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
   5099             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
   5100     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
   5101     static const UChar rule2[]={
   5102             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
   5103             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
   5104             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
   5105             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
   5106             0x3C,0x3C,0x20,0x62,0};
   5107 
   5108     static const UChar tData1[][4]={
   5109         {0x61, 0},
   5110         {0x62, 0},
   5111         { 0xFDD0,0x009E, 0}
   5112     };
   5113     static const UChar tData2[][4]={
   5114         {0x61, 0},
   5115         {0x62, 0},
   5116         { 0xFDD0,0x009E, 0}
   5117      };
   5118 
   5119     /*
   5120      * These values from FractionalUCA.txt will change,
   5121      * and need to be updated here.
   5122      */
   5123     static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0};
   5124     static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0};
   5125     static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
   5126     static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
   5127 
   5128     /* Test [Last Primary ignorable] */
   5129 
   5130     log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
   5131     ruleLen = u_strlen(rule1);
   5132     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5133     if (U_FAILURE(status)) {
   5134         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
   5135         return;
   5136     }
   5137     tLen = u_strlen(tData1[0]);
   5138     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
   5139     if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
   5140         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
   5141         for(i = 0; i<rLen; i++) {
   5142             log_err(" %02X", resColl[i]);
   5143         }
   5144         log_err("\n");
   5145     }
   5146     tLen = u_strlen(tData1[1]);
   5147     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
   5148     if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
   5149         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
   5150         for(i = 0; i<rLen; i++) {
   5151             log_err(" %02X", resColl[i]);
   5152         }
   5153         log_err("\n");
   5154     }
   5155     ucol_close(coll);
   5156 
   5157 
   5158     /* Test [Last Secondary ignorable] */
   5159     log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
   5160     ruleLen = u_strlen(rule1);
   5161     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5162     if (U_FAILURE(status)) {
   5163         log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
   5164         return;
   5165     }
   5166     tLen = u_strlen(tData2[0]);
   5167     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
   5168     if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
   5169         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
   5170         for(i = 0; i<rLen; i++) {
   5171             log_err(" %02X", resColl[i]);
   5172         }
   5173         log_err("\n");
   5174     }
   5175 if(isICUVersionAtLeast(51, 1, 0)) {  /* TODO: debug & fix, see ticket #8982 */
   5176     tLen = u_strlen(tData2[1]);
   5177     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
   5178     if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
   5179         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
   5180         for(i = 0; i<rLen; i++) {
   5181             log_err(" %02X", resColl[i]);
   5182         }
   5183         log_err("\n");
   5184     }
   5185 }
   5186     ucol_close(coll);
   5187 }
   5188 
   5189 static void
   5190 TestUCAPrecontext(void)
   5191 {
   5192     UErrorCode status = U_ZERO_ERROR;
   5193     int32_t i, j;
   5194     UCollator *coll =NULL;
   5195     uint8_t  resColl[100], prevColl[100];
   5196     int32_t  rLen, tLen, ruleLen;
   5197     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
   5198     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
   5199     /* & l middle-dot << a  a is an expansion. */
   5200 
   5201     UChar tData1[][20]={
   5202             { 0xb7, 0},  /* standalone middle dot(0xb7) */
   5203             { 0x387, 0}, /* standalone middle dot(0x387) */
   5204             { 0x61, 0},  /* a */
   5205             { 0x6C, 0},  /* l */
   5206             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
   5207             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
   5208             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
   5209             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
   5210             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
   5211             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
   5212             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
   5213      };
   5214 
   5215     log_verbose("\n\nEN collation:");
   5216     coll = ucol_open("en", &status);
   5217     if (U_FAILURE(status)) {
   5218         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
   5219         return;
   5220     }
   5221     for (j=0; j<11; j++) {
   5222         tLen = u_strlen(tData1[j]);
   5223         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5224         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5225             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5226                     j, tData1[j]);
   5227         }
   5228         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5229         for(i = 0; i<rLen; i++) {
   5230             log_verbose(" %02X", resColl[i]);
   5231         }
   5232         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5233      }
   5234      ucol_close(coll);
   5235 
   5236 
   5237      log_verbose("\n\nJA collation:");
   5238      coll = ucol_open("ja", &status);
   5239      if (U_FAILURE(status)) {
   5240          log_err("Tailoring test: &z <<a|- failed!");
   5241          return;
   5242      }
   5243      for (j=0; j<11; j++) {
   5244          tLen = u_strlen(tData1[j]);
   5245          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5246          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5247              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5248                      j, tData1[j]);
   5249          }
   5250          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5251          for(i = 0; i<rLen; i++) {
   5252              log_verbose(" %02X", resColl[i]);
   5253          }
   5254          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5255       }
   5256       ucol_close(coll);
   5257 
   5258 
   5259       log_verbose("\n\nTailoring test: & middle dot < a ");
   5260       ruleLen = u_strlen(rule1);
   5261       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5262       if (U_FAILURE(status)) {
   5263           log_err("Tailoring test: & middle dot < a failed!");
   5264           return;
   5265       }
   5266       for (j=0; j<11; j++) {
   5267           tLen = u_strlen(tData1[j]);
   5268           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5269           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5270               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5271                       j, tData1[j]);
   5272           }
   5273           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5274           for(i = 0; i<rLen; i++) {
   5275               log_verbose(" %02X", resColl[i]);
   5276           }
   5277           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5278        }
   5279        ucol_close(coll);
   5280 
   5281 
   5282        log_verbose("\n\nTailoring test: & l middle-dot << a ");
   5283        ruleLen = u_strlen(rule2);
   5284        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5285        if (U_FAILURE(status)) {
   5286            log_err("Tailoring test: & l middle-dot << a failed!");
   5287            return;
   5288        }
   5289        for (j=0; j<11; j++) {
   5290            tLen = u_strlen(tData1[j]);
   5291            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5292            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5293                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5294                        j, tData1[j]);
   5295            }
   5296            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
   5297                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
   5298                        j, tData1[j]);
   5299            }
   5300            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5301            for(i = 0; i<rLen; i++) {
   5302                log_verbose(" %02X", resColl[i]);
   5303            }
   5304            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5305         }
   5306         ucol_close(coll);
   5307 }
   5308 
   5309 static void
   5310 TestOutOfBuffer5468(void)
   5311 {
   5312     static const char *test = "\\u4e00";
   5313     UChar ustr[256];
   5314     int32_t ustr_length = u_unescape(test, ustr, 256);
   5315     unsigned char shortKeyBuf[1];
   5316     int32_t sortkey_length;
   5317     UErrorCode status = U_ZERO_ERROR;
   5318     static UCollator *coll = NULL;
   5319 
   5320     coll = ucol_open("root", &status);
   5321     if(U_FAILURE(status)) {
   5322       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   5323       return;
   5324     }
   5325     ucol_setStrength(coll, UCOL_PRIMARY);
   5326     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   5327     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   5328     if (U_FAILURE(status)) {
   5329       log_err("Failed setting atributes\n");
   5330       return;
   5331     }
   5332 
   5333     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
   5334     if (sortkey_length != 4) {
   5335         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
   5336     }
   5337     log_verbose("length of sortKey is %d", sortkey_length);
   5338     ucol_close(coll);
   5339 }
   5340 
   5341 #define TSKC_DATA_SIZE 5
   5342 #define TSKC_BUF_SIZE  50
   5343 static void
   5344 TestSortKeyConsistency(void)
   5345 {
   5346     UErrorCode icuRC = U_ZERO_ERROR;
   5347     UCollator* ucol;
   5348     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
   5349 
   5350     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
   5351     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
   5352     int32_t i, j, i2;
   5353 
   5354     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
   5355     if (U_FAILURE(icuRC))
   5356     {
   5357         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
   5358         return;
   5359     }
   5360 
   5361     for (i = 0; i < TSKC_DATA_SIZE; i++)
   5362     {
   5363         UCharIterator uiter;
   5364         uint32_t state[2] = { 0, 0 };
   5365         int32_t dataLen = i+1;
   5366         for (j=0; j<TSKC_BUF_SIZE; j++)
   5367             bufFull[i][j] = bufPart[i][j] = 0;
   5368 
   5369         /* Full sort key */
   5370         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
   5371 
   5372         /* Partial sort key */
   5373         uiter_setString(&uiter, data, dataLen);
   5374         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
   5375         if (U_FAILURE(icuRC))
   5376         {
   5377             log_err("ucol_nextSortKeyPart failed\n");
   5378             ucol_close(ucol);
   5379             return;
   5380         }
   5381 
   5382         for (i2=0; i2<i; i2++)
   5383         {
   5384             UBool fullMatch = TRUE;
   5385             UBool partMatch = TRUE;
   5386             for (j=0; j<TSKC_BUF_SIZE; j++)
   5387             {
   5388                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
   5389                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
   5390             }
   5391             if (fullMatch != partMatch) {
   5392                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
   5393                                   : "partial key was consistent, but full key changed\n");
   5394                 ucol_close(ucol);
   5395                 return;
   5396             }
   5397         }
   5398     }
   5399 
   5400     /*=============================================*/
   5401    ucol_close(ucol);
   5402 }
   5403 
   5404 /* ticket: 6101 */
   5405 static void TestCroatianSortKey(void) {
   5406     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
   5407     UErrorCode status = U_ZERO_ERROR;
   5408     UCollator *ucol;
   5409     UCharIterator iter;
   5410 
   5411     static const UChar text[] = { 0x0044, 0xD81A };
   5412 
   5413     size_t length = sizeof(text)/sizeof(*text);
   5414 
   5415     uint8_t textSortKey[32];
   5416     size_t lenSortKey = 32;
   5417     size_t actualSortKeyLen;
   5418     uint32_t uStateInfo[2] = { 0, 0 };
   5419 
   5420     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
   5421     if (U_FAILURE(status)) {
   5422         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
   5423         return;
   5424     }
   5425 
   5426     uiter_setString(&iter, text, length);
   5427 
   5428     actualSortKeyLen = ucol_nextSortKeyPart(
   5429         ucol, &iter, (uint32_t*)uStateInfo,
   5430         textSortKey, lenSortKey, &status
   5431         );
   5432 
   5433     if (actualSortKeyLen == lenSortKey) {
   5434         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
   5435     }
   5436 
   5437     ucol_close(ucol);
   5438 }
   5439 
   5440 /* ticket: 6140 */
   5441 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
   5442  * they are both Hiragana and Katakana
   5443  */
   5444 #define SORTKEYLEN 50
   5445 static void TestHiragana(void) {
   5446     UErrorCode status = U_ZERO_ERROR;
   5447     UCollator* ucol;
   5448     UCollationResult strcollresult;
   5449     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
   5450     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
   5451     int32_t data1Len = sizeof(data1)/sizeof(*data1);
   5452     int32_t data2Len = sizeof(data2)/sizeof(*data2);
   5453     int32_t i, j;
   5454     uint8_t sortKey1[SORTKEYLEN];
   5455     uint8_t sortKey2[SORTKEYLEN];
   5456 
   5457     UCharIterator uiter1;
   5458     UCharIterator uiter2;
   5459     uint32_t state1[2] = { 0, 0 };
   5460     uint32_t state2[2] = { 0, 0 };
   5461     int32_t keySize1;
   5462     int32_t keySize2;
   5463 
   5464     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
   5465             &status);
   5466     if (U_FAILURE(status)) {
   5467         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
   5468         return;
   5469     }
   5470 
   5471     /* Start of full sort keys */
   5472     /* Full sort key1 */
   5473     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
   5474     /* Full sort key2 */
   5475     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
   5476     if (keySize1 == keySize2) {
   5477         for (i = 0; i < keySize1; i++) {
   5478             if (sortKey1[i] != sortKey2[i]) {
   5479                 log_err("Full sort keys are different. Should be equal.");
   5480             }
   5481         }
   5482     } else {
   5483         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
   5484     }
   5485     /* End of full sort keys */
   5486 
   5487     /* Start of partial sort keys */
   5488     /* Partial sort key1 */
   5489     uiter_setString(&uiter1, data1, data1Len);
   5490     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
   5491     /* Partial sort key2 */
   5492     uiter_setString(&uiter2, data2, data2Len);
   5493     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
   5494     if (U_SUCCESS(status) && keySize1 == keySize2) {
   5495         for (j = 0; j < keySize1; j++) {
   5496             if (sortKey1[j] != sortKey2[j]) {
   5497                 log_err("Partial sort keys are different. Should be equal");
   5498             }
   5499         }
   5500     } else {
   5501         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
   5502     }
   5503     /* End of partial sort keys */
   5504 
   5505     /* Start of strcoll */
   5506     /* Use ucol_strcoll() to determine ordering */
   5507     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
   5508     if (strcollresult != UCOL_EQUAL) {
   5509         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
   5510     }
   5511 
   5512     ucol_close(ucol);
   5513 }
   5514 
   5515 /* Convenient struct for running collation tests */
   5516 typedef struct {
   5517   const UChar source[MAX_TOKEN_LEN];  /* String on left */
   5518   const UChar target[MAX_TOKEN_LEN];  /* String on right */
   5519   UCollationResult result;            /* -1, 0 or +1, depending on collation */
   5520 } OneTestCase;
   5521 
   5522 /*
   5523  * Utility function to test one collation test case.
   5524  * @param testcases Array of test cases.
   5525  * @param n_testcases Size of the array testcases.
   5526  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
   5527  * @param n_rules Size of the array str_rules.
   5528  */
   5529 static void doTestOneTestCase(const OneTestCase testcases[],
   5530                               int n_testcases,
   5531                               const char* str_rules[],
   5532                               int n_rules)
   5533 {
   5534   int rule_no, testcase_no;
   5535   UChar rule[500];
   5536   int32_t length = 0;
   5537   UErrorCode status = U_ZERO_ERROR;
   5538   UParseError parse_error;
   5539   UCollator  *myCollation;
   5540 
   5541   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
   5542 
   5543     length = u_unescape(str_rules[rule_no], rule, 500);
   5544     if (length == 0) {
   5545         log_err("ERROR: The rule cannot be unescaped: %s\n");
   5546         return;
   5547     }
   5548     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
   5549     if(U_FAILURE(status)){
   5550         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5551         return;
   5552     }
   5553     log_verbose("Testing the <<* syntax\n");
   5554     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   5555     ucol_setStrength(myCollation, UCOL_TERTIARY);
   5556     for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
   5557       doTest(myCollation,
   5558              testcases[testcase_no].source,
   5559              testcases[testcase_no].target,
   5560              testcases[testcase_no].result
   5561              );
   5562     }
   5563     ucol_close(myCollation);
   5564   }
   5565 }
   5566 
   5567 const static OneTestCase rangeTestcases[] = {
   5568   { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
   5569   { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
   5570   { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
   5571 
   5572   { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
   5573   { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
   5574   { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
   5575   { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
   5576   { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
   5577 
   5578   { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
   5579   { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
   5580   { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
   5581   { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
   5582 
   5583   { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
   5584   { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
   5585   { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
   5586   { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
   5587   { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
   5588   { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
   5589   { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
   5590   { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
   5591 };
   5592 
   5593 static int nRangeTestcases = LEN(rangeTestcases);
   5594 
   5595 const static OneTestCase rangeTestcasesSupplemental[] = {
   5596   { {0xfffe},                            {0xffff},                          UCOL_LESS }, /* U+FFFE < U+FFFF */
   5597   { {0xffff},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFF < U+10000 */
   5598   { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
   5599   { {0xfffe},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+FFFE < U+10001 */
   5600   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
   5601   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
   5602   { {0xfffe},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+FFFE < U+10001 */
   5603 };
   5604 
   5605 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
   5606 
   5607 const static OneTestCase rangeTestcasesQwerty[] = {
   5608   { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
   5609   { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
   5610 
   5611   { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
   5612   { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
   5613 
   5614   { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
   5615   { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
   5616 
   5617   { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
   5618   { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
   5619 
   5620   { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
   5621     {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
   5622   { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
   5623     {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
   5624 };
   5625 
   5626 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
   5627 
   5628 static void TestSameStrengthList(void)
   5629 {
   5630   const char* strRules[] = {
   5631     /* Normal */
   5632     "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
   5633 
   5634     /* Lists */
   5635     "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
   5636   };
   5637   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   5638 }
   5639 
   5640 static void TestSameStrengthListQuoted(void)
   5641 {
   5642   const char* strRules[] = {
   5643     /* Lists with quoted characters */
   5644     "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
   5645     "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
   5646 
   5647     "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
   5648     "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
   5649 
   5650     "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
   5651     "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
   5652   };
   5653   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   5654 }
   5655 
   5656 static void TestSameStrengthListSupplemental(void)
   5657 {
   5658   const char* strRules[] = {
   5659     "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
   5660     "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
   5661     "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
   5662     "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
   5663   };
   5664   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
   5665 }
   5666 
   5667 static void TestSameStrengthListQwerty(void)
   5668 {
   5669   const char* strRules[] = {
   5670     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
   5671     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
   5672     "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
   5673     "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
   5674     "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
   5675 
   5676     /* Quoted characters also will work if two quoted characters are not consecutive.  */
   5677     "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
   5678 
   5679     /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
   5680     /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
   5681 
   5682  };
   5683   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
   5684 }
   5685 
   5686 static void TestSameStrengthListQuotedQwerty(void)
   5687 {
   5688   const char* strRules[] = {
   5689     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
   5690     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
   5691     "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
   5692 
   5693     /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
   5694     /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
   5695    };
   5696   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
   5697 }
   5698 
   5699 static void TestSameStrengthListRanges(void)
   5700 {
   5701   const char* strRules[] = {
   5702     "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
   5703   };
   5704   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   5705 }
   5706 
   5707 static void TestSameStrengthListSupplementalRanges(void)
   5708 {
   5709   const char* strRules[] = {
   5710     "&\\ufffe<*\\uffff-\\U00010002",
   5711   };
   5712   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
   5713 }
   5714 
   5715 static void TestSpecialCharacters(void)
   5716 {
   5717   const char* strRules[] = {
   5718     /* Normal */
   5719     "&';'<'+'<','<'-'<'&'<'*'",
   5720 
   5721     /* List */
   5722     "&';'<*'+,-&*'",
   5723 
   5724     /* Range */
   5725     "&';'<*'+'-'-&*'",
   5726   };
   5727 
   5728   const static OneTestCase specialCharacterStrings[] = {
   5729     { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
   5730     { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
   5731     { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
   5732     { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
   5733   };
   5734   doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
   5735 }
   5736 
   5737 static void TestPrivateUseCharacters(void)
   5738 {
   5739   const char* strRules[] = {
   5740     /* Normal */
   5741     "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
   5742     "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
   5743   };
   5744 
   5745   const static OneTestCase privateUseCharacterStrings[] = {
   5746     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   5747     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   5748     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   5749     { {0xe2da}, {0xe2db}, UCOL_LESS },
   5750     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   5751     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   5752   };
   5753   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5754 }
   5755 
   5756 static void TestPrivateUseCharactersInList(void)
   5757 {
   5758   const char* strRules[] = {
   5759     /* List */
   5760     "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
   5761     /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
   5762     "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
   5763   };
   5764 
   5765   const static OneTestCase privateUseCharacterStrings[] = {
   5766     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   5767     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   5768     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   5769     { {0xe2da}, {0xe2db}, UCOL_LESS },
   5770     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   5771     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   5772   };
   5773   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5774 }
   5775 
   5776 static void TestPrivateUseCharactersInRange(void)
   5777 {
   5778   const char* strRules[] = {
   5779     /* Range */
   5780     "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
   5781     "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
   5782     /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
   5783   };
   5784 
   5785   const static OneTestCase privateUseCharacterStrings[] = {
   5786     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   5787     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   5788     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   5789     { {0xe2da}, {0xe2db}, UCOL_LESS },
   5790     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   5791     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   5792   };
   5793   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5794 }
   5795 
   5796 static void TestInvalidListsAndRanges(void)
   5797 {
   5798   const char* invalidRules[] = {
   5799     /* Range not in starred expression */
   5800     "&\\ufffe<\\uffff-\\U00010002",
   5801 
   5802     /* Range without start */
   5803     "&a<*-c",
   5804 
   5805     /* Range without end */
   5806     "&a<*b-",
   5807 
   5808     /* More than one hyphen */
   5809     "&a<*b-g-l",
   5810 
   5811     /* Range in the wrong order */
   5812     "&a<*k-b",
   5813 
   5814   };
   5815 
   5816   UChar rule[500];
   5817   UErrorCode status = U_ZERO_ERROR;
   5818   UParseError parse_error;
   5819   int n_rules = LEN(invalidRules);
   5820   int rule_no;
   5821   int length;
   5822   UCollator  *myCollation;
   5823 
   5824   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
   5825 
   5826     length = u_unescape(invalidRules[rule_no], rule, 500);
   5827     if (length == 0) {
   5828         log_err("ERROR: The rule cannot be unescaped: %s\n");
   5829         return;
   5830     }
   5831     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
   5832     if(!U_FAILURE(status)){
   5833       log_err("ERROR: Could not cause a failure as expected: \n");
   5834     }
   5835     status = U_ZERO_ERROR;
   5836   }
   5837 }
   5838 
   5839 /*
   5840  * This test ensures that characters placed before a character in a different script have the same lead byte
   5841  * in their collation key before and after script reordering.
   5842  */
   5843 static void TestBeforeRuleWithScriptReordering(void)
   5844 {
   5845     UParseError error;
   5846     UErrorCode status = U_ZERO_ERROR;
   5847     UCollator  *myCollation;
   5848     char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
   5849     UChar rules[500];
   5850     uint32_t rulesLength = 0;
   5851     int32_t reorderCodes[1] = {USCRIPT_GREEK};
   5852     UCollationResult collResult;
   5853 
   5854     uint8_t baseKey[256];
   5855     uint32_t baseKeyLength;
   5856     uint8_t beforeKey[256];
   5857     uint32_t beforeKeyLength;
   5858 
   5859     UChar base[] = { 0x03b1 }; /* base */
   5860     int32_t baseLen = sizeof(base)/sizeof(*base);
   5861 
   5862     UChar before[] = { 0x0e01 }; /* ko kai */
   5863     int32_t beforeLen = sizeof(before)/sizeof(*before);
   5864 
   5865     /*UChar *data[] = { before, base };
   5866     genericRulesStarter(srules, data, 2);*/
   5867 
   5868     log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
   5869 
   5870 
   5871     /* build collator */
   5872     log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
   5873 
   5874     rulesLength = u_unescape(srules, rules, LEN(rules));
   5875     myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   5876     if(U_FAILURE(status)) {
   5877         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5878         return;
   5879     }
   5880 
   5881     /* check collation results - before rule applied but not script reordering */
   5882     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
   5883     if (collResult != UCOL_GREATER) {
   5884         log_err("Collation result not correct before script reordering = %d\n", collResult);
   5885     }
   5886 
   5887     /* check the lead byte of the collation keys before script reordering */
   5888     baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
   5889     beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
   5890     if (baseKey[0] != beforeKey[0]) {
   5891       log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
   5892    }
   5893 
   5894     /* reorder the scripts */
   5895     ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
   5896     if(U_FAILURE(status)) {
   5897         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
   5898         return;
   5899     }
   5900 
   5901     /* check collation results - before rule applied and after script reordering */
   5902     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
   5903     if (collResult != UCOL_GREATER) {
   5904         log_err("Collation result not correct after script reordering = %d\n", collResult);
   5905     }
   5906 
   5907     /* check the lead byte of the collation keys after script reordering */
   5908     ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
   5909     ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
   5910     if (baseKey[0] != beforeKey[0]) {
   5911         log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
   5912     }
   5913 
   5914     ucol_close(myCollation);
   5915 }
   5916 
   5917 /*
   5918  * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
   5919  */
   5920 static void TestNonLeadBytesDuringCollationReordering(void)
   5921 {
   5922     UErrorCode status = U_ZERO_ERROR;
   5923     UCollator  *myCollation;
   5924     int32_t reorderCodes[1] = {USCRIPT_GREEK};
   5925 
   5926     uint8_t baseKey[256];
   5927     uint32_t baseKeyLength;
   5928     uint8_t reorderKey[256];
   5929     uint32_t reorderKeyLength;
   5930 
   5931     UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
   5932 
   5933     uint32_t i;
   5934 
   5935 
   5936     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   5937 
   5938     /* build collator tertiary */
   5939     myCollation = ucol_open("", &status);
   5940     ucol_setStrength(myCollation, UCOL_TERTIARY);
   5941     if(U_FAILURE(status)) {
   5942         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5943         return;
   5944     }
   5945     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
   5946 
   5947     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   5948     if(U_FAILURE(status)) {
   5949         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   5950         return;
   5951     }
   5952     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
   5953 
   5954     if (baseKeyLength != reorderKeyLength) {
   5955         log_err("Key lengths not the same during reordering.\n");
   5956         return;
   5957     }
   5958 
   5959     for (i = 1; i < baseKeyLength; i++) {
   5960         if (baseKey[i] != reorderKey[i]) {
   5961             log_err("Collation key bytes not the same at position %d.\n", i);
   5962             return;
   5963         }
   5964     }
   5965     ucol_close(myCollation);
   5966 
   5967     /* build collator quaternary */
   5968     myCollation = ucol_open("", &status);
   5969     ucol_setStrength(myCollation, UCOL_QUATERNARY);
   5970     if(U_FAILURE(status)) {
   5971         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5972         return;
   5973     }
   5974     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
   5975 
   5976     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   5977     if(U_FAILURE(status)) {
   5978         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   5979         return;
   5980     }
   5981     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
   5982 
   5983     if (baseKeyLength != reorderKeyLength) {
   5984         log_err("Key lengths not the same during reordering.\n");
   5985         return;
   5986     }
   5987 
   5988     for (i = 1; i < baseKeyLength; i++) {
   5989         if (baseKey[i] != reorderKey[i]) {
   5990             log_err("Collation key bytes not the same at position %d.\n", i);
   5991             return;
   5992         }
   5993     }
   5994     ucol_close(myCollation);
   5995 }
   5996 
   5997 /*
   5998  * Test reordering API.
   5999  */
   6000 static void TestReorderingAPI(void)
   6001 {
   6002     UErrorCode status = U_ZERO_ERROR;
   6003     UCollator  *myCollation;
   6004     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   6005     int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS};
   6006     int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   6007     UCollationResult collResult;
   6008     int32_t retrievedReorderCodesLength;
   6009     int32_t retrievedReorderCodes[10];
   6010     UChar greekString[] = { 0x03b1 };
   6011     UChar punctuationString[] = { 0x203e };
   6012     int loopIndex;
   6013 
   6014     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   6015 
   6016     /* build collator tertiary */
   6017     myCollation = ucol_open("", &status);
   6018     ucol_setStrength(myCollation, UCOL_TERTIARY);
   6019     if(U_FAILURE(status)) {
   6020         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6021         return;
   6022     }
   6023 
   6024     /* set the reorderding */
   6025     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   6026     if (U_FAILURE(status)) {
   6027         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   6028         return;
   6029     }
   6030 
   6031     /* get the reordering */
   6032     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   6033     if (status != U_BUFFER_OVERFLOW_ERROR) {
   6034         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
   6035         return;
   6036     }
   6037     status = U_ZERO_ERROR;
   6038     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   6039         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   6040         return;
   6041     }
   6042     /* now let's really get it */
   6043     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   6044     if (U_FAILURE(status)) {
   6045         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   6046         return;
   6047     }
   6048     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   6049         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   6050         return;
   6051     }
   6052     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   6053         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   6054             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   6055             return;
   6056         }
   6057     }
   6058     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   6059     if (collResult != UCOL_LESS) {
   6060         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
   6061         return;
   6062     }
   6063 
   6064     /* clear the reordering */
   6065     ucol_setReorderCodes(myCollation, NULL, 0, &status);
   6066     if (U_FAILURE(status)) {
   6067         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
   6068         return;
   6069     }
   6070 
   6071     /* get the reordering again */
   6072     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   6073     if (retrievedReorderCodesLength != 0) {
   6074         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
   6075         return;
   6076     }
   6077 
   6078     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   6079     if (collResult != UCOL_GREATER) {
   6080         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   6081         return;
   6082     }
   6083 
   6084     /* test for error condition on duplicate reorder codes */
   6085     ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
   6086     if (!U_FAILURE(status)) {
   6087         log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
   6088         return;
   6089     }
   6090 
   6091     status = U_ZERO_ERROR;
   6092     /* test for reorder codes after a reset code */
   6093     ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status);
   6094     if (!U_FAILURE(status)) {
   6095         log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
   6096         return;
   6097     }
   6098 
   6099     ucol_close(myCollation);
   6100 }
   6101 
   6102 /*
   6103  * Test reordering API.
   6104  */
   6105 static void TestReorderingAPIWithRuleCreatedCollator(void)
   6106 {
   6107     UErrorCode status = U_ZERO_ERROR;
   6108     UCollator  *myCollation;
   6109     UChar rules[90];
   6110     int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
   6111     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   6112     UCollationResult collResult;
   6113     int32_t retrievedReorderCodesLength;
   6114     int32_t retrievedReorderCodes[10];
   6115     UChar greekString[] = { 0x03b1 };
   6116     UChar punctuationString[] = { 0x203e };
   6117     UChar hanString[] = { 0x65E5, 0x672C };
   6118     int loopIndex;
   6119 
   6120     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   6121 
   6122     /* build collator from rules */
   6123     u_uastrcpy(rules, "[reorder Hani Grek]");
   6124     myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
   6125     if(U_FAILURE(status)) {
   6126         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6127         return;
   6128     }
   6129 
   6130     /* get the reordering */
   6131     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   6132     if (U_FAILURE(status)) {
   6133         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   6134         return;
   6135     }
   6136     if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
   6137         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
   6138         return;
   6139     }
   6140     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   6141         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
   6142             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   6143             return;
   6144         }
   6145     }
   6146     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString));
   6147     if (collResult != UCOL_GREATER) {
   6148         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
   6149         return;
   6150     }
   6151 
   6152 
   6153     /* set the reorderding */
   6154     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   6155     if (U_FAILURE(status)) {
   6156         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   6157         return;
   6158     }
   6159 
   6160     /* get the reordering */
   6161     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   6162     if (status != U_BUFFER_OVERFLOW_ERROR) {
   6163         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
   6164         return;
   6165     }
   6166     status = U_ZERO_ERROR;
   6167     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   6168         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   6169         return;
   6170     }
   6171     /* now let's really get it */
   6172     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   6173     if (U_FAILURE(status)) {
   6174         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   6175         return;
   6176     }
   6177     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   6178         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   6179         return;
   6180     }
   6181     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   6182         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   6183             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   6184             return;
   6185         }
   6186     }
   6187     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   6188     if (collResult != UCOL_LESS) {
   6189         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
   6190         return;
   6191     }
   6192 
   6193     /* clear the reordering */
   6194     ucol_setReorderCodes(myCollation, NULL, 0, &status);
   6195     if (U_FAILURE(status)) {
   6196         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
   6197         return;
   6198     }
   6199 
   6200     /* get the reordering again */
   6201     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   6202     if (retrievedReorderCodesLength != 0) {
   6203         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
   6204         return;
   6205     }
   6206 
   6207     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   6208     if (collResult != UCOL_GREATER) {
   6209         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   6210         return;
   6211     }
   6212 
   6213     ucol_close(myCollation);
   6214 }
   6215 
   6216 static int compareUScriptCodes(const void * a, const void * b)
   6217 {
   6218   return ( *(int32_t*)a - *(int32_t*)b );
   6219 }
   6220 
   6221 static void TestEquivalentReorderingScripts(void) {
   6222     UErrorCode status = U_ZERO_ERROR;
   6223     int32_t equivalentScripts[50];
   6224     int32_t equivalentScriptsLength;
   6225     int loopIndex;
   6226     int32_t equivalentScriptsResult[] = {
   6227         USCRIPT_BOPOMOFO,
   6228         USCRIPT_LISU,
   6229         USCRIPT_LYCIAN,
   6230         USCRIPT_CARIAN,
   6231         USCRIPT_LYDIAN,
   6232         USCRIPT_YI,
   6233         USCRIPT_OLD_ITALIC,
   6234         USCRIPT_GOTHIC,
   6235         USCRIPT_DESERET,
   6236         USCRIPT_SHAVIAN,
   6237         USCRIPT_OSMANYA,
   6238         USCRIPT_LINEAR_B,
   6239         USCRIPT_CYPRIOT,
   6240         USCRIPT_OLD_SOUTH_ARABIAN,
   6241         USCRIPT_AVESTAN,
   6242         USCRIPT_IMPERIAL_ARAMAIC,
   6243         USCRIPT_INSCRIPTIONAL_PARTHIAN,
   6244         USCRIPT_INSCRIPTIONAL_PAHLAVI,
   6245         USCRIPT_UGARITIC,
   6246         USCRIPT_OLD_PERSIAN,
   6247         USCRIPT_CUNEIFORM,
   6248         USCRIPT_EGYPTIAN_HIEROGLYPHS,
   6249         USCRIPT_PHONETIC_POLLARD,
   6250         USCRIPT_SORA_SOMPENG,
   6251         USCRIPT_MEROITIC_CURSIVE,
   6252         USCRIPT_MEROITIC_HIEROGLYPHS
   6253     };
   6254 
   6255     qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes);
   6256 
   6257     /* UScript.GOTHIC */
   6258     equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
   6259     if (U_FAILURE(status)) {
   6260         log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
   6261         return;
   6262     }
   6263     /*
   6264     fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
   6265     fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
   6266     for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
   6267         fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
   6268     }
   6269     */
   6270     if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
   6271         log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
   6272         return;
   6273     }
   6274     for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
   6275         if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
   6276             log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
   6277             return;
   6278         }
   6279     }
   6280 
   6281     /* UScript.SHAVIAN */
   6282     equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
   6283     if (U_FAILURE(status)) {
   6284         log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
   6285         return;
   6286     }
   6287     if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
   6288         log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
   6289         return;
   6290     }
   6291     for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
   6292         if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
   6293             log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
   6294             return;
   6295         }
   6296     }
   6297 }
   6298 
   6299 static void TestReorderingAcrossCloning(void)
   6300 {
   6301     UErrorCode status = U_ZERO_ERROR;
   6302     UCollator  *myCollation;
   6303     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   6304     UCollator *clonedCollation;
   6305     int32_t bufferSize;
   6306     int32_t retrievedReorderCodesLength;
   6307     int32_t retrievedReorderCodes[10];
   6308     int loopIndex;
   6309 
   6310     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   6311 
   6312     /* build collator tertiary */
   6313     myCollation = ucol_open("", &status);
   6314     ucol_setStrength(myCollation, UCOL_TERTIARY);
   6315     if(U_FAILURE(status)) {
   6316         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6317         return;
   6318     }
   6319 
   6320     /* set the reorderding */
   6321     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   6322     if (U_FAILURE(status)) {
   6323         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   6324         return;
   6325     }
   6326 
   6327     /* clone the collator */
   6328     clonedCollation = ucol_safeClone(myCollation, NULL, &bufferSize, &status);
   6329     if (U_FAILURE(status)) {
   6330         log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
   6331         return;
   6332     }
   6333 
   6334     /* get the reordering */
   6335     retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   6336     if (U_FAILURE(status)) {
   6337         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   6338         return;
   6339     }
   6340     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   6341         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   6342         return;
   6343     }
   6344     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   6345         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   6346             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   6347             return;
   6348         }
   6349     }
   6350 
   6351     /*uprv_free(buffer);*/
   6352     ucol_close(myCollation);
   6353     ucol_close(clonedCollation);
   6354 }
   6355 
   6356 /*
   6357  * Utility function to test one collation reordering test case set.
   6358  * @param testcases Array of test cases.
   6359  * @param n_testcases Size of the array testcases.
   6360  * @param reorderTokens Array of reordering codes.
   6361  * @param reorderTokensLen Size of the array reorderTokens.
   6362  */
   6363 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
   6364 {
   6365     uint32_t testCaseNum;
   6366     UErrorCode status = U_ZERO_ERROR;
   6367     UCollator  *myCollation;
   6368 
   6369     myCollation = ucol_open("", &status);
   6370     if (U_FAILURE(status)) {
   6371         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6372         return;
   6373     }
   6374     ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
   6375     if(U_FAILURE(status)) {
   6376         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
   6377         return;
   6378     }
   6379 
   6380     for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
   6381         doTest(myCollation,
   6382             testCases[testCaseNum].source,
   6383             testCases[testCaseNum].target,
   6384             testCases[testCaseNum].result
   6385         );
   6386     }
   6387     ucol_close(myCollation);
   6388 }
   6389 
   6390 static void TestGreekFirstReorder(void)
   6391 {
   6392     const char* strRules[] = {
   6393         "[reorder Grek]"
   6394     };
   6395 
   6396     const int32_t apiRules[] = {
   6397         USCRIPT_GREEK
   6398     };
   6399 
   6400     const static OneTestCase privateUseCharacterStrings[] = {
   6401         { {0x0391}, {0x0391}, UCOL_EQUAL },
   6402         { {0x0041}, {0x0391}, UCOL_GREATER },
   6403         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
   6404         { {0x0060}, {0x0391}, UCOL_LESS },
   6405         { {0x0391}, {0xe2dc}, UCOL_LESS },
   6406         { {0x0391}, {0x0060}, UCOL_GREATER },
   6407     };
   6408 
   6409     /* Test rules creation */
   6410     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6411 
   6412     /* Test collation reordering API */
   6413     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   6414 }
   6415 
   6416 static void TestGreekLastReorder(void)
   6417 {
   6418     const char* strRules[] = {
   6419         "[reorder Zzzz Grek]"
   6420     };
   6421 
   6422     const int32_t apiRules[] = {
   6423         USCRIPT_UNKNOWN, USCRIPT_GREEK
   6424     };
   6425 
   6426     const static OneTestCase privateUseCharacterStrings[] = {
   6427         { {0x0391}, {0x0391}, UCOL_EQUAL },
   6428         { {0x0041}, {0x0391}, UCOL_LESS },
   6429         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
   6430         { {0x0060}, {0x0391}, UCOL_LESS },
   6431         { {0x0391}, {0xe2dc}, UCOL_GREATER },
   6432     };
   6433 
   6434     /* Test rules creation */
   6435     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6436 
   6437     /* Test collation reordering API */
   6438     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   6439 }
   6440 
   6441 static void TestNonScriptReorder(void)
   6442 {
   6443     const char* strRules[] = {
   6444         "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
   6445     };
   6446 
   6447     const int32_t apiRules[] = {
   6448         USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
   6449         UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
   6450         UCOL_REORDER_CODE_CURRENCY
   6451     };
   6452 
   6453     const static OneTestCase privateUseCharacterStrings[] = {
   6454         { {0x0391}, {0x0041}, UCOL_LESS },
   6455         { {0x0041}, {0x0391}, UCOL_GREATER },
   6456         { {0x0060}, {0x0041}, UCOL_LESS },
   6457         { {0x0060}, {0x0391}, UCOL_GREATER },
   6458         { {0x0024}, {0x0041}, UCOL_GREATER },
   6459     };
   6460 
   6461     /* Test rules creation */
   6462     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6463 
   6464     /* Test collation reordering API */
   6465     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   6466 }
   6467 
   6468 static void TestHaniReorder(void)
   6469 {
   6470     const char* strRules[] = {
   6471         "[reorder Hani]"
   6472     };
   6473     const int32_t apiRules[] = {
   6474         USCRIPT_HAN
   6475     };
   6476 
   6477     const static OneTestCase privateUseCharacterStrings[] = {
   6478         { {0x4e00}, {0x0041}, UCOL_LESS },
   6479         { {0x4e00}, {0x0060}, UCOL_GREATER },
   6480         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
   6481         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
   6482         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
   6483         { {0xfa27}, {0x0041}, UCOL_LESS },
   6484         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
   6485     };
   6486 
   6487     /* Test rules creation */
   6488     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6489 
   6490     /* Test collation reordering API */
   6491     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   6492 }
   6493 
   6494 static void TestHaniReorderWithOtherRules(void)
   6495 {
   6496     const char* strRules[] = {
   6497         "[reorder Hani] &b<a"
   6498     };
   6499     /*const int32_t apiRules[] = {
   6500         USCRIPT_HAN
   6501     };*/
   6502 
   6503     const static OneTestCase privateUseCharacterStrings[] = {
   6504         { {0x4e00}, {0x0041}, UCOL_LESS },
   6505         { {0x4e00}, {0x0060}, UCOL_GREATER },
   6506         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
   6507         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
   6508         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
   6509         { {0xfa27}, {0x0041}, UCOL_LESS },
   6510         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
   6511         { {0x0062}, {0x0061}, UCOL_LESS },
   6512     };
   6513 
   6514     /* Test rules creation */
   6515     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6516 }
   6517 
   6518 static void TestMultipleReorder(void)
   6519 {
   6520     const char* strRules[] = {
   6521         "[reorder Grek Zzzz DIGIT Latn Hani]"
   6522     };
   6523 
   6524     const int32_t apiRules[] = {
   6525         USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
   6526     };
   6527 
   6528     const static OneTestCase collationTestCases[] = {
   6529         { {0x0391}, {0x0041}, UCOL_LESS},
   6530         { {0x0031}, {0x0041}, UCOL_LESS},
   6531         { {0x0041}, {0x4e00}, UCOL_LESS},
   6532     };
   6533 
   6534     /* Test rules creation */
   6535     doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
   6536 
   6537     /* Test collation reordering API */
   6538     doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
   6539 }
   6540 
   6541 /*
   6542  * Test that covers issue reported in ticket 8814
   6543  */
   6544 static void TestReorderWithNumericCollation(void)
   6545 {
   6546     UErrorCode status = U_ZERO_ERROR;
   6547     UCollator  *myCollation;
   6548     UCollator  *myReorderCollation;
   6549     int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
   6550     /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
   6551     UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
   6552     UChar fortyS[] = { 0x0053 };
   6553     UChar fortyThreeP[] = { 0x0050 };
   6554     uint8_t fortyS_sortKey[128];
   6555     int32_t fortyS_sortKey_Length;
   6556     uint8_t fortyThreeP_sortKey[128];
   6557     int32_t fortyThreeP_sortKey_Length;
   6558     uint8_t fortyS_sortKey_reorder[128];
   6559     int32_t fortyS_sortKey_reorder_Length;
   6560     uint8_t fortyThreeP_sortKey_reorder[128];
   6561     int32_t fortyThreeP_sortKey_reorder_Length;
   6562     UCollationResult collResult;
   6563     UCollationResult collResultReorder;
   6564 
   6565     log_verbose("Testing reordering with and without numeric collation\n");
   6566 
   6567     /* build collator tertiary with numeric */
   6568     myCollation = ucol_open("", &status);
   6569     /*
   6570     ucol_setStrength(myCollation, UCOL_TERTIARY);
   6571     */
   6572     ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   6573     if(U_FAILURE(status)) {
   6574         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6575         return;
   6576     }
   6577 
   6578     /* build collator tertiary with numeric and reordering */
   6579     myReorderCollation = ucol_open("", &status);
   6580     /*
   6581     ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
   6582     */
   6583     ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   6584     ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status);
   6585     if(U_FAILURE(status)) {
   6586         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6587         return;
   6588     }
   6589 
   6590     fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128);
   6591     fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128);
   6592     fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128);
   6593     fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
   6594 
   6595     if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
   6596         log_err_status(status, "ERROR: couldn't generate sort keys\n");
   6597         return;
   6598     }
   6599     collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
   6600     collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
   6601     /*
   6602     fprintf(stderr, "\tcollResult = %x\n", collResult);
   6603     fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
   6604     fprintf(stderr, "\nfortyS\n");
   6605     for (i = 0; i < fortyS_sortKey_Length; i++) {
   6606         fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
   6607     }
   6608     fprintf(stderr, "\nfortyThreeP\n");
   6609     for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
   6610         fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
   6611     }
   6612     */
   6613     if (collResult != collResultReorder) {
   6614         log_err_status(status, "ERROR: collation results should have been the same.\n");
   6615         return;
   6616     }
   6617 
   6618     ucol_close(myCollation);
   6619     ucol_close(myReorderCollation);
   6620 }
   6621 
   6622 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
   6623 {
   6624   for (; *a == *b; ++a, ++b) {
   6625     if (*a == 0) {
   6626       return 0;
   6627     }
   6628   }
   6629   return (*a < *b ? -1 : 1);
   6630 }
   6631 
   6632 static void TestImportRulesDeWithPhonebook(void)
   6633 {
   6634   const char* normalRules[] = {
   6635     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
   6636     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
   6637     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
   6638   };
   6639   const OneTestCase normalTests[] = {
   6640     { {0x00e6}, {0x00c6}, UCOL_LESS},
   6641     { {0x00fc}, {0x00dc}, UCOL_GREATER},
   6642   };
   6643 
   6644   const char* importRules[] = {
   6645     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
   6646     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
   6647     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
   6648   };
   6649   const OneTestCase importTests[] = {
   6650     { {0x00e6}, {0x00c6}, UCOL_LESS},
   6651     { {0x00fc}, {0x00dc}, UCOL_LESS},
   6652   };
   6653 
   6654   doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules));
   6655   doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules));
   6656 }
   6657 
   6658 #if 0
   6659 static void TestImportRulesFiWithEor(void)
   6660 {
   6661   /* DUCET. */
   6662   const char* defaultRules[] = {
   6663     "&a<b",                                    /* Dummy rule. */
   6664   };
   6665 
   6666   const OneTestCase defaultTests[] = {
   6667     { {0x0110}, {0x00F0}, UCOL_LESS},
   6668     { {0x00a3}, {0x00a5}, UCOL_LESS},
   6669     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
   6670   };
   6671 
   6672   /* European Ordering rules: ignore currency characters. */
   6673   const char* eorRules[] = {
   6674     "[import root-u-co-eor]",
   6675   };
   6676 
   6677   const OneTestCase eorTests[] = {
   6678     { {0x0110}, {0x00F0}, UCOL_LESS},
   6679     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
   6680     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
   6681   };
   6682 
   6683   const char* fiStdRules[] = {
   6684     "[import fi-u-co-standard]",
   6685   };
   6686 
   6687   const OneTestCase fiStdTests[] = {
   6688     { {0x0110}, {0x00F0}, UCOL_GREATER},
   6689     { {0x00a3}, {0x00a5}, UCOL_LESS},
   6690     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
   6691   };
   6692 
   6693   /* Both European Ordering Rules and Fi Standard Rules. */
   6694   const char* eorFiStdRules[] = {
   6695     "[import root-u-co-eor][import fi-u-co-standard]",
   6696   };
   6697 
   6698   /* This is essentially same as the one before once fi.txt is updated with import. */
   6699   const char* fiEorRules[] = {
   6700     "[import fi-u-co-eor]",
   6701   };
   6702 
   6703   const OneTestCase fiEorTests[] = {
   6704     { {0x0110}, {0x00F0}, UCOL_GREATER},
   6705     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
   6706     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
   6707   };
   6708 
   6709   doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
   6710   doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));
   6711   doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));
   6712   doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules));
   6713 
   6714   /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
   6715         eor{
   6716             Sequence{
   6717                 "[import root-u-co-eor][import fi-u-co-standard]"
   6718             }
   6719             Version{"21.0"}
   6720         }
   6721   */
   6722   /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
   6723 
   6724 }
   6725 #endif
   6726 
   6727 #if 0
   6728 /*
   6729  * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
   6730  * the resource files are built with -includeUnihanColl option.
   6731  * TODO: Uncomment this function and make it work when unihan rules are built by default.
   6732  */
   6733 static void TestImportRulesCJKWithUnihan(void)
   6734 {
   6735   /* DUCET. */
   6736   const char* defaultRules[] = {
   6737     "&a<b",                                    /* Dummy rule. */
   6738   };
   6739 
   6740   const OneTestCase defaultTests[] = {
   6741     { {0x3402}, {0x4e1e}, UCOL_GREATER},
   6742   };
   6743 
   6744   /* European Ordering rules: ignore currency characters. */
   6745   const char* unihanRules[] = {
   6746     "[import ko-u-co-unihan]",
   6747   };
   6748 
   6749   const OneTestCase unihanTests[] = {
   6750     { {0x3402}, {0x4e1e}, UCOL_LESS},
   6751   };
   6752 
   6753   doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
   6754   doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules));
   6755 
   6756 }
   6757 #endif
   6758 
   6759 static void TestImport(void)
   6760 {
   6761     UCollator* vicoll;
   6762     UCollator* escoll;
   6763     UCollator* viescoll;
   6764     UCollator* importviescoll;
   6765     UParseError error;
   6766     UErrorCode status = U_ZERO_ERROR;
   6767     UChar* virules;
   6768     int32_t viruleslength;
   6769     UChar* esrules;
   6770     int32_t esruleslength;
   6771     UChar* viesrules;
   6772     int32_t viesruleslength;
   6773     char srules[500] = "[import vi][import es]";
   6774     UChar rules[500];
   6775     uint32_t length = 0;
   6776     int32_t itemCount;
   6777     int32_t i, k;
   6778     UChar32 start;
   6779     UChar32 end;
   6780     UChar str[500];
   6781     int32_t strLength;
   6782 
   6783     uint8_t sk1[500];
   6784     uint8_t sk2[500];
   6785 
   6786     UBool b;
   6787     USet* tailoredSet;
   6788     USet* importTailoredSet;
   6789 
   6790 
   6791     vicoll = ucol_open("vi", &status);
   6792     if(U_FAILURE(status)){
   6793         log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
   6794         return;
   6795     }
   6796 
   6797     virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
   6798     escoll = ucol_open("es", &status);
   6799     esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
   6800     viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
   6801     viesrules[0] = 0;
   6802     u_strcat(viesrules, virules);
   6803     u_strcat(viesrules, esrules);
   6804     viesruleslength = viruleslength + esruleslength;
   6805     viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   6806 
   6807     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
   6808     length = u_unescape(srules, rules, 500);
   6809     importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
   6810     if(U_FAILURE(status)){
   6811         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   6812         return;
   6813     }
   6814 
   6815     tailoredSet = ucol_getTailoredSet(viescoll, &status);
   6816     importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
   6817 
   6818     if(!uset_equals(tailoredSet, importTailoredSet)){
   6819         log_err("Tailored sets not equal");
   6820     }
   6821 
   6822     uset_close(importTailoredSet);
   6823 
   6824     itemCount = uset_getItemCount(tailoredSet);
   6825 
   6826     for( i = 0; i < itemCount; i++){
   6827         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
   6828         if(strLength < 2){
   6829             for (; start <= end; start++){
   6830                 k = 0;
   6831                 U16_APPEND(str, k, 500, start, b);
   6832                 ucol_getSortKey(viescoll, str, 1, sk1, 500);
   6833                 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
   6834                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
   6835                     log_err("Sort key for %s not equal\n", str);
   6836                     break;
   6837                 }
   6838             }
   6839         }else{
   6840             ucol_getSortKey(viescoll, str, strLength, sk1, 500);
   6841             ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
   6842             if(compare_uint8_t_arrays(sk1, sk2) != 0){
   6843                 log_err("ZZSort key for %s not equal\n", str);
   6844                 break;
   6845             }
   6846 
   6847         }
   6848     }
   6849 
   6850     uset_close(tailoredSet);
   6851 
   6852     uprv_free(viesrules);
   6853 
   6854     ucol_close(vicoll);
   6855     ucol_close(escoll);
   6856     ucol_close(viescoll);
   6857     ucol_close(importviescoll);
   6858 }
   6859 
   6860 static void TestImportWithType(void)
   6861 {
   6862     UCollator* vicoll;
   6863     UCollator* decoll;
   6864     UCollator* videcoll;
   6865     UCollator* importvidecoll;
   6866     UParseError error;
   6867     UErrorCode status = U_ZERO_ERROR;
   6868     const UChar* virules;
   6869     int32_t viruleslength;
   6870     const UChar* derules;
   6871     int32_t deruleslength;
   6872     UChar* viderules;
   6873     int32_t videruleslength;
   6874     const char srules[500] = "[import vi][import de-u-co-phonebk]";
   6875     UChar rules[500];
   6876     uint32_t length = 0;
   6877     int32_t itemCount;
   6878     int32_t i, k;
   6879     UChar32 start;
   6880     UChar32 end;
   6881     UChar str[500];
   6882     int32_t strLength;
   6883 
   6884     uint8_t sk1[500];
   6885     uint8_t sk2[500];
   6886 
   6887     USet* tailoredSet;
   6888     USet* importTailoredSet;
   6889 
   6890     vicoll = ucol_open("vi", &status);
   6891     if(U_FAILURE(status)){
   6892         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   6893         return;
   6894     }
   6895     virules = ucol_getRules(vicoll, &viruleslength);
   6896     /* decoll = ucol_open("de@collation=phonebook", &status); */
   6897     decoll = ucol_open("de-u-co-phonebk", &status);
   6898     if(U_FAILURE(status)){
   6899         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   6900         return;
   6901     }
   6902 
   6903 
   6904     derules = ucol_getRules(decoll, &deruleslength);
   6905     viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
   6906     viderules[0] = 0;
   6907     u_strcat(viderules, virules);
   6908     u_strcat(viderules, derules);
   6909     videruleslength = viruleslength + deruleslength;
   6910     videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   6911 
   6912     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
   6913     length = u_unescape(srules, rules, 500);
   6914     importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
   6915     if(U_FAILURE(status)){
   6916         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   6917         return;
   6918     }
   6919 
   6920     tailoredSet = ucol_getTailoredSet(videcoll, &status);
   6921     importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
   6922 
   6923     if(!uset_equals(tailoredSet, importTailoredSet)){
   6924         log_err("Tailored sets not equal");
   6925     }
   6926 
   6927     uset_close(importTailoredSet);
   6928 
   6929     itemCount = uset_getItemCount(tailoredSet);
   6930 
   6931     for( i = 0; i < itemCount; i++){
   6932         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
   6933         if(strLength < 2){
   6934             for (; start <= end; start++){
   6935                 k = 0;
   6936                 U16_APPEND_UNSAFE(str, k, start);
   6937                 ucol_getSortKey(videcoll, str, 1, sk1, 500);
   6938                 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
   6939                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
   6940                     log_err("Sort key for %s not equal\n", str);
   6941                     break;
   6942                 }
   6943             }
   6944         }else{
   6945             ucol_getSortKey(videcoll, str, strLength, sk1, 500);
   6946             ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
   6947             if(compare_uint8_t_arrays(sk1, sk2) != 0){
   6948                 log_err("Sort key for %s not equal\n", str);
   6949                 break;
   6950             }
   6951 
   6952         }
   6953     }
   6954 
   6955     uset_close(tailoredSet);
   6956 
   6957     uprv_free(viderules);
   6958 
   6959     ucol_close(videcoll);
   6960     ucol_close(importvidecoll);
   6961     ucol_close(vicoll);
   6962     ucol_close(decoll);
   6963 }
   6964 
   6965 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
   6966 static const UChar longUpperStr1[]= { /* 155 chars */
   6967     0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
   6968     0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
   6969     0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
   6970     0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
   6971     0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
   6972     0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
   6973     0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
   6974     0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
   6975     0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
   6976     0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
   6977 };
   6978 
   6979 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
   6980 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
   6981     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   6982     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   6983     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   6984     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   6985     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
   6986 };
   6987 
   6988 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
   6989 static const UChar longUpperStr3[]= { /* 324 chars */
   6990     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   6991     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   6992     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   6993     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   6994     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   6995     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   6996     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   6997     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   6998     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   6999     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7000     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7001     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
   7002 };
   7003 
   7004 #define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
   7005 
   7006 typedef struct {
   7007     const UChar * longUpperStrPtr;
   7008     int32_t       longUpperStrLen;
   7009 } LongUpperStrItem;
   7010 
   7011 /* String pointers must be in reverse collation order of the corresponding strings */
   7012 static const LongUpperStrItem longUpperStrItems[] = {
   7013     { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
   7014     { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
   7015     { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
   7016     { NULL,          0                           }
   7017 };
   7018 
   7019 enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with collation changes */
   7020 
   7021 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
   7022 static void TestCaseLevelBufferOverflow(void)
   7023 {
   7024     UErrorCode status = U_ZERO_ERROR;
   7025     UCollator * ucol = ucol_open("root", &status);
   7026     if ( U_SUCCESS(status) ) {
   7027         ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
   7028         if ( U_SUCCESS(status) ) {
   7029             const LongUpperStrItem * itemPtr;
   7030             uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
   7031             for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
   7032                 int32_t sortKeyLen;
   7033                 if (itemPtr > longUpperStrItems) {
   7034                     uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
   7035                 }
   7036                 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
   7037                 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
   7038                     log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
   7039                     break;
   7040                 }
   7041                 if ( itemPtr > longUpperStrItems ) {
   7042                     int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
   7043                     if (compareResult >= 0) {
   7044                         log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
   7045                     }
   7046                 }
   7047             }
   7048         } else {
   7049             log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
   7050         }
   7051         ucol_close(ucol);
   7052     } else {
   7053         log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
   7054     }
   7055 }
   7056 
   7057 
   7058 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
   7059 
   7060 void addMiscCollTest(TestNode** root)
   7061 {
   7062     TEST(TestRuleOptions);
   7063     TEST(TestBeforePrefixFailure);
   7064     TEST(TestContractionClosure);
   7065     TEST(TestPrefixCompose);
   7066     TEST(TestStrCollIdenticalPrefix);
   7067     TEST(TestPrefix);
   7068     TEST(TestNewJapanese);
   7069     /*TEST(TestLimitations);*/
   7070     TEST(TestNonChars);
   7071     TEST(TestExtremeCompression);
   7072     TEST(TestSurrogates);
   7073     /* BEGIN android-removed
   7074        To save space, Android does not include the collation tailoring rules.
   7075        We skip the tailing tests for collations. */
   7076     /* TEST(TestVariableTopSetting); */
   7077     /* END android-removed */
   7078     TEST(TestBocsuCoverage);
   7079     TEST(TestCyrillicTailoring);
   7080     TEST(TestCase);
   7081     TEST(IncompleteCntTest);
   7082     TEST(BlackBirdTest);
   7083     TEST(FunkyATest);
   7084     TEST(BillFairmanTest);
   7085     TEST(RamsRulesTest);
   7086     TEST(IsTailoredTest);
   7087     TEST(TestCollations);
   7088     TEST(TestChMove);
   7089     TEST(TestImplicitTailoring);
   7090     TEST(TestFCDProblem);
   7091     TEST(TestEmptyRule);
   7092     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
   7093     TEST(TestJ815);
   7094     /*TEST(TestJ831);*/ /* we changed lv locale */
   7095     TEST(TestBefore);
   7096     TEST(TestRedundantRules);
   7097     TEST(TestExpansionSyntax);
   7098     TEST(TestHangulTailoring);
   7099     TEST(TestUCARules);
   7100     TEST(TestIncrementalNormalize);
   7101     TEST(TestComposeDecompose);
   7102     TEST(TestCompressOverlap);
   7103     TEST(TestContraction);
   7104     TEST(TestExpansion);
   7105     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
   7106     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
   7107     TEST(TestOptimize);
   7108     TEST(TestSuppressContractions);
   7109     TEST(Alexis2);
   7110     TEST(TestHebrewUCA);
   7111     TEST(TestPartialSortKeyTermination);
   7112     TEST(TestSettings);
   7113     TEST(TestEquals);
   7114     TEST(TestJ2726);
   7115     TEST(NullRule);
   7116     TEST(TestNumericCollation);
   7117     TEST(TestTibetanConformance);
   7118     TEST(TestPinyinProblem);
   7119     TEST(TestImplicitGeneration);
   7120     TEST(TestSeparateTrees);
   7121     TEST(TestBeforePinyin);
   7122     TEST(TestBeforeTightening);
   7123     /*TEST(TestMoreBefore);*/
   7124     TEST(TestTailorNULL);
   7125     TEST(TestUpperFirstQuaternary);
   7126     TEST(TestJ4960);
   7127     TEST(TestJ5223);
   7128     TEST(TestJ5232);
   7129     TEST(TestJ5367);
   7130     TEST(TestHiragana);
   7131     TEST(TestSortKeyConsistency);
   7132     TEST(TestVI5913);  /* VI, RO tailored rules */
   7133     TEST(TestCroatianSortKey);
   7134     TEST(TestTailor6179);
   7135     TEST(TestUCAPrecontext);
   7136     TEST(TestOutOfBuffer5468);
   7137     TEST(TestSameStrengthList);
   7138 
   7139     TEST(TestSameStrengthListQuoted);
   7140     TEST(TestSameStrengthListSupplemental);
   7141     TEST(TestSameStrengthListQwerty);
   7142     TEST(TestSameStrengthListQuotedQwerty);
   7143     TEST(TestSameStrengthListRanges);
   7144     TEST(TestSameStrengthListSupplementalRanges);
   7145     TEST(TestSpecialCharacters);
   7146     TEST(TestPrivateUseCharacters);
   7147     TEST(TestPrivateUseCharactersInList);
   7148     TEST(TestPrivateUseCharactersInRange);
   7149     TEST(TestInvalidListsAndRanges);
   7150     TEST(TestImportRulesDeWithPhonebook);
   7151     /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
   7152     /* TEST(TestImportRulesCJKWithUnihan); */
   7153     /* BEGIN android-removed: Due to Android does not include reverse UCA table.
   7154     TEST(TestImport);
   7155     TEST(TestImportWithType);
   7156     END android-removed */
   7157 
   7158     TEST(TestBeforeRuleWithScriptReordering);
   7159     TEST(TestNonLeadBytesDuringCollationReordering);
   7160     TEST(TestReorderingAPI);
   7161     TEST(TestReorderingAPIWithRuleCreatedCollator);
   7162     TEST(TestEquivalentReorderingScripts);
   7163     TEST(TestGreekFirstReorder);
   7164     TEST(TestGreekLastReorder);
   7165     TEST(TestNonScriptReorder);
   7166     TEST(TestHaniReorder);
   7167     TEST(TestHaniReorderWithOtherRules);
   7168     TEST(TestMultipleReorder);
   7169     TEST(TestReorderingAcrossCloning);
   7170     TEST(TestReorderWithNumericCollation);
   7171 
   7172     TEST(TestCaseLevelBufferOverflow);
   7173 }
   7174 
   7175 #endif /* #if !UCONFIG_NO_COLLATION */
   7176