Home | History | Annotate | Download | only in cintltst
      1 
      2 /********************************************************************
      3  * COPYRIGHT:
      4  * Copyright (c) 2001-2013, International Business Machines Corporation and
      5  * others. All Rights Reserved.
      6  ********************************************************************/
      7 /*******************************************************************************
      8 *
      9 * File cmsccoll.C
     10 *
     11 *******************************************************************************/
     12 /**
     13  * These are the tests specific to ICU 1.8 and above, that I didn't know where
     14  * to fit.
     15  */
     16 
     17 #include <stdio.h>
     18 
     19 #include "unicode/utypes.h"
     20 
     21 #if !UCONFIG_NO_COLLATION
     22 
     23 #include "unicode/ucol.h"
     24 #include "unicode/ucoleitr.h"
     25 #include "unicode/uloc.h"
     26 #include "cintltst.h"
     27 #include "ccolltst.h"
     28 #include "callcoll.h"
     29 #include "unicode/ustring.h"
     30 #include "string.h"
     31 #include "ucol_imp.h"
     32 #include "ucol_tok.h"
     33 #include "cmemory.h"
     34 #include "cstring.h"
     35 #include "uassert.h"
     36 #include "unicode/parseerr.h"
     37 #include "unicode/ucnv.h"
     38 #include "unicode/ures.h"
     39 #include "unicode/uscript.h"
     40 #include "unicode/utf16.h"
     41 #include "uparse.h"
     42 #include "putilimp.h"
     43 
     44 
     45 #define LEN(a) (sizeof(a)/sizeof(a[0]))
     46 
     47 #define MAX_TOKEN_LEN 16
     48 
     49 typedef UCollationResult tst_strcoll(void *collator, const int object,
     50                         const UChar *source, const int sLen,
     51                         const UChar *target, const int tLen);
     52 
     53 
     54 
     55 const static char cnt1[][10] = {
     56 
     57   "AA",
     58   "AC",
     59   "AZ",
     60   "AQ",
     61   "AB",
     62   "ABZ",
     63   "ABQ",
     64   "Z",
     65   "ABC",
     66   "Q",
     67   "B"
     68 };
     69 
     70 const static char cnt2[][10] = {
     71   "DA",
     72   "DAD",
     73   "DAZ",
     74   "MAR",
     75   "Z",
     76   "DAVIS",
     77   "MARK",
     78   "DAV",
     79   "DAVI"
     80 };
     81 
     82 static void IncompleteCntTest(void)
     83 {
     84   UErrorCode status = U_ZERO_ERROR;
     85   UChar temp[90];
     86   UChar t1[90];
     87   UChar t2[90];
     88 
     89   UCollator *coll =  NULL;
     90   uint32_t i = 0, j = 0;
     91   uint32_t size = 0;
     92 
     93   u_uastrcpy(temp, " & Z < ABC < Q < B");
     94 
     95   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
     96 
     97   if(U_SUCCESS(status)) {
     98     size = sizeof(cnt1)/sizeof(cnt1[0]);
     99     for(i = 0; i < size-1; i++) {
    100       for(j = i+1; j < size; j++) {
    101         UCollationElements *iter;
    102         u_uastrcpy(t1, cnt1[i]);
    103         u_uastrcpy(t2, cnt1[j]);
    104         doTest(coll, t1, t2, UCOL_LESS);
    105         /* synwee : added collation element iterator test */
    106         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    107         if (U_FAILURE(status)) {
    108           log_err("Creation of iterator failed\n");
    109           break;
    110         }
    111         backAndForth(iter);
    112         ucol_closeElements(iter);
    113       }
    114     }
    115   }
    116 
    117   ucol_close(coll);
    118 
    119 
    120   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
    121   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
    122 
    123   if(U_SUCCESS(status)) {
    124     size = sizeof(cnt2)/sizeof(cnt2[0]);
    125     for(i = 0; i < size-1; i++) {
    126       for(j = i+1; j < size; j++) {
    127         UCollationElements *iter;
    128         u_uastrcpy(t1, cnt2[i]);
    129         u_uastrcpy(t2, cnt2[j]);
    130         doTest(coll, t1, t2, UCOL_LESS);
    131 
    132         /* synwee : added collation element iterator test */
    133         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    134         if (U_FAILURE(status)) {
    135           log_err("Creation of iterator failed\n");
    136           break;
    137         }
    138         backAndForth(iter);
    139         ucol_closeElements(iter);
    140       }
    141     }
    142   }
    143 
    144   ucol_close(coll);
    145 
    146 
    147 }
    148 
    149 const static char shifted[][20] = {
    150   "black bird",
    151   "black-bird",
    152   "blackbird",
    153   "black Bird",
    154   "black-Bird",
    155   "blackBird",
    156   "black birds",
    157   "black-birds",
    158   "blackbirds"
    159 };
    160 
    161 const static UCollationResult shiftedTert[] = {
    162   UCOL_EQUAL,
    163   UCOL_EQUAL,
    164   UCOL_EQUAL,
    165   UCOL_LESS,
    166   UCOL_EQUAL,
    167   UCOL_EQUAL,
    168   UCOL_LESS,
    169   UCOL_EQUAL,
    170   UCOL_EQUAL
    171 };
    172 
    173 const static char nonignorable[][20] = {
    174   "black bird",
    175   "black Bird",
    176   "black birds",
    177   "black-bird",
    178   "black-Bird",
    179   "black-birds",
    180   "blackbird",
    181   "blackBird",
    182   "blackbirds"
    183 };
    184 
    185 static void BlackBirdTest(void) {
    186   UErrorCode status = U_ZERO_ERROR;
    187   UChar t1[90];
    188   UChar t2[90];
    189 
    190   uint32_t i = 0, j = 0;
    191   uint32_t size = 0;
    192   UCollator *coll = ucol_open("en_US", &status);
    193 
    194   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
    195   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
    196 
    197   if(U_SUCCESS(status)) {
    198     size = sizeof(nonignorable)/sizeof(nonignorable[0]);
    199     for(i = 0; i < size-1; i++) {
    200       for(j = i+1; j < size; j++) {
    201         u_uastrcpy(t1, nonignorable[i]);
    202         u_uastrcpy(t2, nonignorable[j]);
    203         doTest(coll, t1, t2, UCOL_LESS);
    204       }
    205     }
    206   }
    207 
    208   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
    209   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
    210 
    211   if(U_SUCCESS(status)) {
    212     size = sizeof(shifted)/sizeof(shifted[0]);
    213     for(i = 0; i < size-1; i++) {
    214       for(j = i+1; j < size; j++) {
    215         u_uastrcpy(t1, shifted[i]);
    216         u_uastrcpy(t2, shifted[j]);
    217         doTest(coll, t1, t2, UCOL_LESS);
    218       }
    219     }
    220   }
    221 
    222   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
    223   if(U_SUCCESS(status)) {
    224     size = sizeof(shifted)/sizeof(shifted[0]);
    225     for(i = 1; i < size; i++) {
    226       u_uastrcpy(t1, shifted[i-1]);
    227       u_uastrcpy(t2, shifted[i]);
    228       doTest(coll, t1, t2, shiftedTert[i]);
    229     }
    230   }
    231 
    232   ucol_close(coll);
    233 }
    234 
    235 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
    236     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
    237     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
    238     {0x0041/*'A'*/, 0x0300, 0x0000},
    239     {0x00C0, 0x0301, 0x0000},
    240     /* this would work with forced normalization */
    241     {0x00C0, 0x0316, 0x0000}
    242 };
    243 
    244 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
    245     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    246     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
    247     {0x00C0, 0},
    248     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    249     /* this would work with forced normalization */
    250     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
    251 };
    252 
    253 const static UCollationResult results[] = {
    254     UCOL_GREATER,
    255     UCOL_EQUAL,
    256     UCOL_EQUAL,
    257     UCOL_GREATER,
    258     UCOL_EQUAL
    259 };
    260 
    261 static void FunkyATest(void)
    262 {
    263 
    264     int32_t i;
    265     UErrorCode status = U_ZERO_ERROR;
    266     UCollator  *myCollation;
    267     myCollation = ucol_open("en_US", &status);
    268     if(U_FAILURE(status)){
    269         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    270         return;
    271     }
    272     log_verbose("Testing some A letters, for some reason\n");
    273     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    274     ucol_setStrength(myCollation, UCOL_TERTIARY);
    275     for (i = 0; i < 4 ; i++)
    276     {
    277         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    278     }
    279     ucol_close(myCollation);
    280 }
    281 
    282 UColAttributeValue caseFirst[] = {
    283     UCOL_OFF,
    284     UCOL_LOWER_FIRST,
    285     UCOL_UPPER_FIRST
    286 };
    287 
    288 
    289 UColAttributeValue alternateHandling[] = {
    290     UCOL_NON_IGNORABLE,
    291     UCOL_SHIFTED
    292 };
    293 
    294 UColAttributeValue caseLevel[] = {
    295     UCOL_OFF,
    296     UCOL_ON
    297 };
    298 
    299 UColAttributeValue strengths[] = {
    300     UCOL_PRIMARY,
    301     UCOL_SECONDARY,
    302     UCOL_TERTIARY,
    303     UCOL_QUATERNARY,
    304     UCOL_IDENTICAL
    305 };
    306 
    307 #if 0
    308 static const char * strengthsC[] = {
    309     "UCOL_PRIMARY",
    310     "UCOL_SECONDARY",
    311     "UCOL_TERTIARY",
    312     "UCOL_QUATERNARY",
    313     "UCOL_IDENTICAL"
    314 };
    315 
    316 static const char * caseFirstC[] = {
    317     "UCOL_OFF",
    318     "UCOL_LOWER_FIRST",
    319     "UCOL_UPPER_FIRST"
    320 };
    321 
    322 
    323 static const char * alternateHandlingC[] = {
    324     "UCOL_NON_IGNORABLE",
    325     "UCOL_SHIFTED"
    326 };
    327 
    328 static const char * caseLevelC[] = {
    329     "UCOL_OFF",
    330     "UCOL_ON"
    331 };
    332 
    333 /* not used currently - does not test only prints */
    334 static void PrintMarkDavis(void)
    335 {
    336   UErrorCode status = U_ZERO_ERROR;
    337   UChar m[256];
    338   uint8_t sortkey[256];
    339   UCollator *coll = ucol_open("en_US", &status);
    340   uint32_t h,i,j,k, sortkeysize;
    341   uint32_t sizem = 0;
    342   char buffer[512];
    343   uint32_t len = 512;
    344 
    345   log_verbose("PrintMarkDavis");
    346 
    347   u_uastrcpy(m, "Mark Davis");
    348   sizem = u_strlen(m);
    349 
    350 
    351   m[1] = 0xe4;
    352 
    353   for(i = 0; i<sizem; i++) {
    354     fprintf(stderr, "\\u%04X ", m[i]);
    355   }
    356   fprintf(stderr, "\n");
    357 
    358   for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
    359     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
    360     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
    361 
    362     for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
    363       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
    364       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
    365 
    366       for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
    367         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
    368         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
    369 
    370         for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
    371           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
    372           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
    373           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
    374           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
    375         }
    376 
    377       }
    378 
    379     }
    380 
    381   }
    382 }
    383 #endif
    384 
    385 static void BillFairmanTest(void) {
    386 /*
    387 ** check for actual locale via ICU resource bundles
    388 **
    389 ** lp points to the original locale ("fr_FR_....")
    390 */
    391 
    392     UResourceBundle *lr,*cr;
    393     UErrorCode              lec = U_ZERO_ERROR;
    394     const char *lp = "fr_FR_you_ll_never_find_this_locale";
    395 
    396     log_verbose("BillFairmanTest\n");
    397 
    398     lr = ures_open(NULL,lp,&lec);
    399     if (lr) {
    400         cr = ures_getByKey(lr,"collations",0,&lec);
    401         if (cr) {
    402             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
    403             if (lp) {
    404                 if (U_SUCCESS(lec)) {
    405                     if(strcmp(lp, "fr") != 0) {
    406                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
    407                     }
    408                 }
    409             }
    410             ures_close(cr);
    411         }
    412         ures_close(lr);
    413     }
    414 }
    415 
    416 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
    417     UChar source[256] = { '\0'};
    418     UChar target[256] = { '\0'};
    419     UChar preP = 0x31a3;
    420     UChar preQ = 0x310d;
    421 /*
    422     UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
    423     UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
    424 */
    425     /*log_verbose("Testing primary\n");*/
    426 
    427     doTest(col, p, q, UCOL_LESS);
    428 /*
    429     UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
    430 
    431     if(result!=UCOL_LESS){
    432        aescstrdup(p,utfSource,256);
    433        aescstrdup(q,utfTarget,256);
    434        fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
    435     }
    436 */
    437     source[0] = preP;
    438     u_strcpy(source+1,p);
    439     target[0] = preQ;
    440     u_strcpy(target+1,q);
    441     doTest(col, source, target, UCOL_LESS);
    442 /*
    443     fprintf(file,"Primary swamps 2nd failed  source: %s target: %s \n", utfSource,utfTarget);
    444 */
    445 }
    446 
    447 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
    448     UChar source[256] = { '\0'};
    449     UChar target[256] = { '\0'};
    450 
    451     /*log_verbose("Testing secondary\n");*/
    452 
    453     doTest(col, p, q, UCOL_LESS);
    454 /*
    455     fprintf(file,"secondary failed  source: %s target: %s \n", utfSource,utfTarget);
    456 */
    457     source[0] = 0x0053;
    458     u_strcpy(source+1,p);
    459     target[0]= 0x0073;
    460     u_strcpy(target+1,q);
    461 
    462     doTest(col, source, target, UCOL_LESS);
    463 /*
    464     fprintf(file,"secondary swamps 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
    465 */
    466 
    467 
    468     u_strcpy(source,p);
    469     source[u_strlen(p)] = 0x62;
    470     source[u_strlen(p)+1] = 0;
    471 
    472 
    473     u_strcpy(target,q);
    474     target[u_strlen(q)] = 0x61;
    475     target[u_strlen(q)+1] = 0;
    476 
    477     doTest(col, source, target, UCOL_GREATER);
    478 
    479 /*
    480     fprintf(file,"secondary is swamped by 1  failed  source: %s target: %s \n",utfSource,utfTarget);
    481 */
    482 }
    483 
    484 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
    485     UChar source[256] = { '\0'};
    486     UChar target[256] = { '\0'};
    487 
    488     /*log_verbose("Testing tertiary\n");*/
    489 
    490     doTest(col, p, q, UCOL_LESS);
    491 /*
    492     fprintf(file,"Tertiary failed  source: %s target: %s \n",utfSource,utfTarget);
    493 */
    494     source[0] = 0x0020;
    495     u_strcpy(source+1,p);
    496     target[0]= 0x002D;
    497     u_strcpy(target+1,q);
    498 
    499     doTest(col, source, target, UCOL_LESS);
    500 /*
    501     fprintf(file,"Tertiary swamps 4th failed  source: %s target: %s \n", utfSource,utfTarget);
    502 */
    503 
    504     u_strcpy(source,p);
    505     source[u_strlen(p)] = 0xE0;
    506     source[u_strlen(p)+1] = 0;
    507 
    508     u_strcpy(target,q);
    509     target[u_strlen(q)] = 0x61;
    510     target[u_strlen(q)+1] = 0;
    511 
    512     doTest(col, source, target, UCOL_GREATER);
    513 
    514 /*
    515     fprintf(file,"Tertiary is swamped by 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
    516 */
    517 }
    518 
    519 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
    520 /*
    521     UChar source[256] = { '\0'};
    522     UChar target[256] = { '\0'};
    523 */
    524 
    525     doTest(col, p, q, UCOL_EQUAL);
    526 /*
    527     fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
    528 */
    529 }
    530 
    531 static void testCollator(UCollator *coll, UErrorCode *status) {
    532   const UChar *rules = NULL, *current = NULL;
    533   int32_t ruleLen = 0;
    534   uint32_t strength = 0;
    535   uint32_t chOffset = 0; uint32_t chLen = 0;
    536   uint32_t exOffset = 0; uint32_t exLen = 0;
    537   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
    538   uint32_t firstEx = 0;
    539 /*  uint32_t rExpsLen = 0; */
    540   uint32_t firstLen = 0;
    541   UBool varT = FALSE; UBool top_ = TRUE;
    542   uint16_t specs = 0;
    543   UBool startOfRules = TRUE;
    544   UBool lastReset = FALSE;
    545   UBool before = FALSE;
    546   uint32_t beforeStrength = 0;
    547   UColTokenParser src;
    548   UColOptionSet opts;
    549 
    550   UChar first[256];
    551   UChar second[256];
    552   UChar tempB[256];
    553   uint32_t tempLen;
    554   UChar *rulesCopy = NULL;
    555   UParseError parseError;
    556 
    557   uprv_memset(&src, 0, sizeof(UColTokenParser));
    558 
    559   src.opts = &opts;
    560 
    561   rules = ucol_getRules(coll, &ruleLen);
    562   if(U_SUCCESS(*status) && ruleLen > 0) {
    563     rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
    564     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
    565     src.current = src.source = rulesCopy;
    566     src.end = rulesCopy+ruleLen;
    567     src.extraCurrent = src.end;
    568     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
    569     *first = *second = 0;
    570 
    571 	/* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
    572 	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
    573     while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
    574       strength = src.parsedToken.strength;
    575       chOffset = src.parsedToken.charsOffset;
    576       chLen = src.parsedToken.charsLen;
    577       exOffset = src.parsedToken.extensionOffset;
    578       exLen = src.parsedToken.extensionLen;
    579       prefixOffset = src.parsedToken.prefixOffset;
    580       prefixLen = src.parsedToken.prefixLen;
    581       specs = src.parsedToken.flags;
    582 
    583       startOfRules = FALSE;
    584       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
    585       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
    586       if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
    587         second[0] = 0;
    588       } else {
    589         u_strncpy(second,src.source+chOffset, chLen);
    590         second[chLen] = 0;
    591 
    592         if(exLen > 0 && firstEx == 0) {
    593           u_strncat(first, src.source+exOffset, exLen);
    594           first[firstLen+exLen] = 0;
    595         }
    596 
    597         if(lastReset == TRUE && prefixLen != 0) {
    598           u_strncpy(first+prefixLen, first, firstLen);
    599           u_strncpy(first, src.source+prefixOffset, prefixLen);
    600           first[firstLen+prefixLen] = 0;
    601           firstLen = firstLen+prefixLen;
    602         }
    603 
    604         if(before == TRUE) { /* swap first and second */
    605           u_strcpy(tempB, first);
    606           u_strcpy(first, second);
    607           u_strcpy(second, tempB);
    608 
    609           tempLen = firstLen;
    610           firstLen = chLen;
    611           chLen = tempLen;
    612 
    613           tempLen = firstEx;
    614           firstEx = exLen;
    615           exLen = tempLen;
    616           if(beforeStrength < strength) {
    617             strength = beforeStrength;
    618           }
    619         }
    620       }
    621       lastReset = FALSE;
    622 
    623       switch(strength){
    624       case UCOL_IDENTICAL:
    625           testEquality(coll,first,second);
    626           break;
    627       case UCOL_PRIMARY:
    628           testPrimary(coll,first,second);
    629           break;
    630       case UCOL_SECONDARY:
    631           testSecondary(coll,first,second);
    632           break;
    633       case UCOL_TERTIARY:
    634           testTertiary(coll,first,second);
    635           break;
    636       case UCOL_TOK_RESET:
    637         lastReset = TRUE;
    638         before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
    639         if(before) {
    640           beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
    641         }
    642         break;
    643       default:
    644           break;
    645       }
    646 
    647       if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
    648         before = FALSE;
    649       } else {
    650         firstLen = chLen;
    651         firstEx = exLen;
    652         u_strcpy(first, second);
    653       }
    654     }
    655     uprv_free(src.source);
    656     uprv_free(src.reorderCodes);
    657   }
    658 }
    659 
    660 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
    661   UCollator *UCA = (UCollator *)collator;
    662   return ucol_strcoll(UCA, source, sLen, target, tLen);
    663 }
    664 
    665 /*
    666 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
    667 #if U_PLATFORM_HAS_WIN32_API
    668   LCID lcid = (LCID)collator;
    669   return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
    670 #else
    671   return 0;
    672 #endif
    673 }
    674 */
    675 
    676 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
    677                                      UChar s1, UChar s2,
    678                                      const UChar *s, const uint32_t sLen,
    679                                      const UChar *t, const uint32_t tLen) {
    680   UChar source[256] = {0};
    681   UChar target[256] = {0};
    682 
    683   source[0] = s1;
    684   u_strcpy(source+1, s);
    685   target[0] = s2;
    686   u_strcpy(target+1, t);
    687 
    688   return func(collator, opts, source, sLen+1, target, tLen+1);
    689 }
    690 
    691 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
    692                                    UChar s1, UChar s2,
    693                                    const UChar *s, const uint32_t sLen,
    694                                    const UChar *t, const uint32_t tLen) {
    695   UChar source[256] = {0};
    696   UChar target[256] = {0};
    697 
    698   u_strcpy(source, s);
    699   source[sLen] = s1;
    700   u_strcpy(target, t);
    701   target[tLen] = s2;
    702 
    703   return func(collator, opts, source, sLen+1, target, tLen+1);
    704 }
    705 
    706 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
    707                               const UChar *s, const uint32_t sLen,
    708                               const UChar *t, const uint32_t tLen,
    709                               UCollationResult result) {
    710   /*UChar fPrimary = 0x6d;*/
    711   /*UChar sPrimary = 0x6e;*/
    712   UChar fSecondary = 0x310d;
    713   UChar sSecondary = 0x31a3;
    714   UChar fTertiary = 0x310f;
    715   UChar sTertiary = 0x31b7;
    716 
    717   UCollationResult oposite;
    718   if(result == UCOL_EQUAL) {
    719     return UCOL_IDENTICAL;
    720   } else if(result == UCOL_GREATER) {
    721     oposite = UCOL_LESS;
    722   } else {
    723     oposite = UCOL_GREATER;
    724   }
    725 
    726   if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
    727     return UCOL_PRIMARY;
    728   } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
    729     (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
    730     return UCOL_SECONDARY;
    731   } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
    732     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
    733     return UCOL_TERTIARY;
    734   } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
    735     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
    736     return UCOL_QUATERNARY;
    737   } else {
    738     return UCOL_IDENTICAL;
    739   }
    740 }
    741 
    742 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
    743   uint32_t i = 0;
    744 
    745   if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
    746     buffer[0] = '=';
    747     buffer[1] = '=';
    748     buffer[2] = '\0';
    749   } else if(res == UCOL_GREATER) {
    750     for(i = 0; i<strength+1; i++) {
    751       buffer[i] = '>';
    752     }
    753     buffer[strength+1] = '\0';
    754   } else {
    755     for(i = 0; i<strength+1; i++) {
    756       buffer[i] = '<';
    757     }
    758     buffer[strength+1] = '\0';
    759   }
    760 
    761   return buffer;
    762 }
    763 
    764 
    765 
    766 static void logFailure (const char *platform, const char *test,
    767                         const UChar *source, const uint32_t sLen,
    768                         const UChar *target, const uint32_t tLen,
    769                         UCollationResult realRes, uint32_t realStrength,
    770                         UCollationResult expRes, uint32_t expStrength, UBool error) {
    771 
    772   uint32_t i = 0;
    773 
    774   char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
    775   static int32_t maxOutputLength = 0;
    776   int32_t outputLength;
    777 
    778   *sEsc = *tEsc = *s = *t = 0;
    779   if(error == TRUE) {
    780     log_err("Difference between expected and generated order. Run test with -v for more info\n");
    781   } else if(getTestOption(VERBOSITY_OPTION) == 0) {
    782     return;
    783   }
    784   for(i = 0; i<sLen; i++) {
    785     sprintf(b, "%04X", source[i]);
    786     strcat(sEsc, "\\u");
    787     strcat(sEsc, b);
    788     strcat(s, b);
    789     strcat(s, " ");
    790     if(source[i] < 0x80) {
    791       sprintf(b, "(%c)", source[i]);
    792       strcat(sEsc, b);
    793     }
    794   }
    795   for(i = 0; i<tLen; i++) {
    796     sprintf(b, "%04X", target[i]);
    797     strcat(tEsc, "\\u");
    798     strcat(tEsc, b);
    799     strcat(t, b);
    800     strcat(t, " ");
    801     if(target[i] < 0x80) {
    802       sprintf(b, "(%c)", target[i]);
    803       strcat(tEsc, b);
    804     }
    805   }
    806 /*
    807   strcpy(output, "[[ ");
    808   strcat(output, sEsc);
    809   strcat(output, getRelationSymbol(expRes, expStrength, relation));
    810   strcat(output, tEsc);
    811 
    812   strcat(output, " : ");
    813 
    814   strcat(output, sEsc);
    815   strcat(output, getRelationSymbol(realRes, realStrength, relation));
    816   strcat(output, tEsc);
    817   strcat(output, " ]] ");
    818 
    819   log_verbose("%s", output);
    820 */
    821 
    822 
    823   strcpy(output, "DIFF: ");
    824 
    825   strcat(output, s);
    826   strcat(output, " : ");
    827   strcat(output, t);
    828 
    829   strcat(output, test);
    830   strcat(output, ": ");
    831 
    832   strcat(output, sEsc);
    833   strcat(output, getRelationSymbol(expRes, expStrength, relation));
    834   strcat(output, tEsc);
    835 
    836   strcat(output, " ");
    837 
    838   strcat(output, platform);
    839   strcat(output, ": ");
    840 
    841   strcat(output, sEsc);
    842   strcat(output, getRelationSymbol(realRes, realStrength, relation));
    843   strcat(output, tEsc);
    844 
    845   outputLength = (int32_t)strlen(output);
    846   if(outputLength > maxOutputLength) {
    847     maxOutputLength = outputLength;
    848     U_ASSERT(outputLength < sizeof(output));
    849   }
    850 
    851   log_verbose("%s\n", output);
    852 
    853 }
    854 
    855 /*
    856 static void printOutRules(const UChar *rules) {
    857   uint32_t len = u_strlen(rules);
    858   uint32_t i = 0;
    859   char toPrint;
    860   uint32_t line = 0;
    861 
    862   fprintf(stdout, "Rules:");
    863 
    864   for(i = 0; i<len; i++) {
    865     if(rules[i]<0x7f && rules[i]>=0x20) {
    866       toPrint = (char)rules[i];
    867       if(toPrint == '&') {
    868         line = 1;
    869         fprintf(stdout, "\n&");
    870       } else if(toPrint == ';') {
    871         fprintf(stdout, "<<");
    872         line+=2;
    873       } else if(toPrint == ',') {
    874         fprintf(stdout, "<<<");
    875         line+=3;
    876       } else {
    877         fprintf(stdout, "%c", toPrint);
    878         line++;
    879       }
    880     } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
    881       fprintf(stdout, "\\u%04X", rules[i]);
    882       line+=6;
    883     }
    884     if(line>72) {
    885       fprintf(stdout, "\n");
    886       line = 0;
    887     }
    888   }
    889 
    890   log_verbose("\n");
    891 
    892 }
    893 */
    894 
    895 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
    896   uint32_t diffs = 0;
    897   UCollationResult realResult;
    898   uint32_t realStrength;
    899 
    900   uint32_t sLen = u_strlen(first);
    901   uint32_t tLen = u_strlen(second);
    902 
    903   realResult = func(collator, opts, first, sLen, second, tLen);
    904   realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
    905 
    906   if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) {
    907     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
    908     diffs++;
    909   } else if(realResult != UCOL_LESS || realStrength != strength) {
    910     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
    911     diffs++;
    912   }
    913   return diffs;
    914 }
    915 
    916 
    917 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
    918   const UChar *rules = NULL, *current = NULL;
    919   int32_t ruleLen = 0;
    920   uint32_t strength = 0;
    921   uint32_t chOffset = 0; uint32_t chLen = 0;
    922   uint32_t exOffset = 0; uint32_t exLen = 0;
    923   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
    924 /*  uint32_t rExpsLen = 0; */
    925   uint32_t firstLen = 0, secondLen = 0;
    926   UBool varT = FALSE; UBool top_ = TRUE;
    927   uint16_t specs = 0;
    928   UBool startOfRules = TRUE;
    929   UColTokenParser src;
    930   UColOptionSet opts;
    931 
    932   UChar first[256];
    933   UChar second[256];
    934   UChar *rulesCopy = NULL;
    935 
    936   uint32_t UCAdiff = 0;
    937   uint32_t Windiff = 1;
    938   UParseError parseError;
    939 
    940   uprv_memset(&src, 0, sizeof(UColTokenParser));
    941   src.opts = &opts;
    942 
    943   rules = ucol_getRules(coll, &ruleLen);
    944 
    945   /*printOutRules(rules);*/
    946 
    947   if(U_SUCCESS(*status) && ruleLen > 0) {
    948     rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
    949     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
    950     src.current = src.source = rulesCopy;
    951     src.end = rulesCopy+ruleLen;
    952     src.extraCurrent = src.end;
    953     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
    954     *first = *second = 0;
    955 
    956     /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
    957        the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
    958     while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
    959       strength = src.parsedToken.strength;
    960       chOffset = src.parsedToken.charsOffset;
    961       chLen = src.parsedToken.charsLen;
    962       exOffset = src.parsedToken.extensionOffset;
    963       exLen = src.parsedToken.extensionLen;
    964       prefixOffset = src.parsedToken.prefixOffset;
    965       prefixLen = src.parsedToken.prefixLen;
    966       specs = src.parsedToken.flags;
    967 
    968       startOfRules = FALSE;
    969       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
    970       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
    971 
    972       u_strncpy(second,src.source+chOffset, chLen);
    973       second[chLen] = 0;
    974       secondLen = chLen;
    975 
    976       if(exLen > 0) {
    977         u_strncat(first, src.source+exOffset, exLen);
    978         first[firstLen+exLen] = 0;
    979         firstLen += exLen;
    980       }
    981 
    982       if(strength != UCOL_TOK_RESET) {
    983         if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
    984           UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
    985           /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
    986         }
    987       }
    988 
    989 
    990       firstLen = chLen;
    991       u_strcpy(first, second);
    992 
    993     }
    994     if(UCAdiff != 0 && Windiff != 0) {
    995       log_verbose("\n");
    996     }
    997     if(UCAdiff == 0) {
    998       log_verbose("No immediate difference with %s!\n", refName);
    999     }
   1000     if(Windiff == 0) {
   1001       log_verbose("No immediate difference with Win32!\n");
   1002     }
   1003     uprv_free(src.source);
   1004     uprv_free(src.reorderCodes);
   1005   }
   1006 }
   1007 
   1008 /*
   1009  * Takes two CEs (lead and continuation) and
   1010  * compares them as CEs should be compared:
   1011  * primary vs. primary, secondary vs. secondary
   1012  * tertiary vs. tertiary
   1013  */
   1014 static int32_t compareCEs(uint32_t s1, uint32_t s2,
   1015                    uint32_t t1, uint32_t t2) {
   1016   uint32_t s = 0, t = 0;
   1017   if(s1 == t1 && s2 == t2) {
   1018     return 0;
   1019   }
   1020   s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
   1021   t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
   1022   if(s < t) {
   1023     return -1;
   1024   } else if(s > t) {
   1025     return 1;
   1026   } else {
   1027     s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
   1028     t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
   1029     if(s < t) {
   1030       return -1;
   1031     } else if(s > t) {
   1032       return 1;
   1033     } else {
   1034       s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
   1035       t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
   1036       if(s < t) {
   1037         return -1;
   1038       } else {
   1039         return 1;
   1040       }
   1041     }
   1042   }
   1043 }
   1044 
   1045 typedef struct {
   1046   uint32_t startCE;
   1047   uint32_t startContCE;
   1048   uint32_t limitCE;
   1049   uint32_t limitContCE;
   1050 } indirectBoundaries;
   1051 
   1052 /* these values are used for finding CE values for indirect positioning. */
   1053 /* Indirect positioning is a mechanism for allowing resets on symbolic   */
   1054 /* values. It only works for resets and you cannot tailor indirect names */
   1055 /* An indirect name can define either an anchor point or a range. An     */
   1056 /* anchor point behaves in exactly the same way as a code point in reset */
   1057 /* would, except that it cannot be tailored. A range (we currently only  */
   1058 /* know for the [top] range will explicitly set the upper bound for      */
   1059 /* generated CEs, thus allowing for better control over how many CEs can */
   1060 /* be squeezed between in the range without performance penalty.         */
   1061 /* In that respect, we use [top] for tailoring of locales that use CJK   */
   1062 /* characters. Other indirect values are currently a pure convenience,   */
   1063 /* they can be used to assure that the CEs will be always positioned in  */
   1064 /* the same place relative to a point with known properties (e.g. first  */
   1065 /* primary ignorable). */
   1066 static indirectBoundaries ucolIndirectBoundaries[15];
   1067 static UBool indirectBoundariesSet = FALSE;
   1068 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
   1069     /* Set values for the top - TODO: once we have values for all the indirects, we are going */
   1070     /* to initalize here. */
   1071     ucolIndirectBoundaries[indexR].startCE = start[0];
   1072     ucolIndirectBoundaries[indexR].startContCE = start[1];
   1073     if(end) {
   1074         ucolIndirectBoundaries[indexR].limitCE = end[0];
   1075         ucolIndirectBoundaries[indexR].limitContCE = end[1];
   1076     } else {
   1077         ucolIndirectBoundaries[indexR].limitCE = 0;
   1078         ucolIndirectBoundaries[indexR].limitContCE = 0;
   1079     }
   1080 }
   1081 
   1082 static void testCEs(UCollator *coll, UErrorCode *status) {
   1083     const UChar *rules = NULL, *current = NULL;
   1084     int32_t ruleLen = 0;
   1085 
   1086     uint32_t strength = 0;
   1087     uint32_t maxStrength = UCOL_IDENTICAL;
   1088     uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
   1089     uint32_t lastCE;
   1090     uint32_t lastContCE;
   1091 
   1092     int32_t result = 0;
   1093     uint32_t chOffset = 0; uint32_t chLen = 0;
   1094     uint32_t exOffset = 0; uint32_t exLen = 0;
   1095     uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
   1096     uint32_t oldOffset = 0;
   1097 
   1098     /* uint32_t rExpsLen = 0; */
   1099     /* uint32_t firstLen = 0; */
   1100     uint16_t specs = 0;
   1101     UBool varT = FALSE; UBool top_ = TRUE;
   1102     UBool startOfRules = TRUE;
   1103     UBool before = FALSE;
   1104     UColTokenParser src;
   1105     UColOptionSet opts;
   1106     UParseError parseError;
   1107     UChar *rulesCopy = NULL;
   1108     collIterate *c = uprv_new_collIterate(status);
   1109     UCAConstants *consts = NULL;
   1110     uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
   1111         UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
   1112     const char *colLoc;
   1113     UCollator *UCA = ucol_open("root", status);
   1114 
   1115     if (U_FAILURE(*status)) {
   1116         log_err("Could not open root collator %s\n", u_errorName(*status));
   1117         uprv_delete_collIterate(c);
   1118         return;
   1119     }
   1120 
   1121     colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
   1122     if (U_FAILURE(*status)) {
   1123         log_err("Could not get collator name: %s\n", u_errorName(*status));
   1124         ucol_close(UCA);
   1125         uprv_delete_collIterate(c);
   1126         return;
   1127     }
   1128 
   1129     uprv_memset(&src, 0, sizeof(UColTokenParser));
   1130 
   1131     consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
   1132     UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
   1133     /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
   1134     UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
   1135     UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
   1136 
   1137     baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
   1138 
   1139     src.opts = &opts;
   1140 
   1141     rules = ucol_getRules(coll, &ruleLen);
   1142 
   1143     src.invUCA = ucol_initInverseUCA(status);
   1144 
   1145     if(indirectBoundariesSet == FALSE) {
   1146         /* UCOL_RESET_TOP_VALUE */
   1147         setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
   1148         /* UCOL_FIRST_PRIMARY_IGNORABLE */
   1149         setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
   1150         /* UCOL_LAST_PRIMARY_IGNORABLE */
   1151         setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
   1152         /* UCOL_FIRST_SECONDARY_IGNORABLE */
   1153         setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
   1154         /* UCOL_LAST_SECONDARY_IGNORABLE */
   1155         setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
   1156         /* UCOL_FIRST_TERTIARY_IGNORABLE */
   1157         setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
   1158         /* UCOL_LAST_TERTIARY_IGNORABLE */
   1159         setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
   1160         /* UCOL_FIRST_VARIABLE */
   1161         setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
   1162         /* UCOL_LAST_VARIABLE */
   1163         setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
   1164         /* UCOL_FIRST_NON_VARIABLE */
   1165         setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
   1166         /* UCOL_LAST_NON_VARIABLE */
   1167         setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
   1168         /* UCOL_FIRST_IMPLICIT */
   1169         setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
   1170         /* UCOL_LAST_IMPLICIT */
   1171         setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
   1172         /* UCOL_FIRST_TRAILING */
   1173         setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
   1174         /* UCOL_LAST_TRAILING */
   1175         setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
   1176         ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
   1177         indirectBoundariesSet = TRUE;
   1178     }
   1179 
   1180 
   1181     if(U_SUCCESS(*status) && ruleLen > 0) {
   1182         rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
   1183         uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
   1184         src.current = src.source = rulesCopy;
   1185         src.end = rulesCopy+ruleLen;
   1186         src.extraCurrent = src.end;
   1187         src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   1188 
   1189 	    /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
   1190 	       the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
   1191         while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
   1192             strength = src.parsedToken.strength;
   1193             chOffset = src.parsedToken.charsOffset;
   1194             chLen = src.parsedToken.charsLen;
   1195             exOffset = src.parsedToken.extensionOffset;
   1196             exLen = src.parsedToken.extensionLen;
   1197             prefixOffset = src.parsedToken.prefixOffset;
   1198             prefixLen = src.parsedToken.prefixLen;
   1199             specs = src.parsedToken.flags;
   1200 
   1201             startOfRules = FALSE;
   1202             varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
   1203             top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
   1204 
   1205             uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
   1206 
   1207             currCE = ucol_getNextCE(coll, c, status);
   1208             if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
   1209                 log_verbose("Thai prevowel detected. Will pick next CE\n");
   1210                 currCE = ucol_getNextCE(coll, c, status);
   1211             }
   1212 
   1213             currContCE = ucol_getNextCE(coll, c, status);
   1214             if(!isContinuation(currContCE)) {
   1215                 currContCE = 0;
   1216             }
   1217 
   1218             /* we need to repack CEs here */
   1219 
   1220             if(strength == UCOL_TOK_RESET) {
   1221                 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
   1222                 if(top_ == TRUE) {
   1223                     int32_t tokenIndex = src.parsedToken.indirectIndex;
   1224 
   1225                     nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
   1226                     nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
   1227                 } else {
   1228                     nextCE = baseCE = currCE;
   1229                     nextContCE = baseContCE = currContCE;
   1230                 }
   1231                 maxStrength = UCOL_IDENTICAL;
   1232             } else {
   1233                 if(strength < maxStrength) {
   1234                     maxStrength = strength;
   1235                     if(baseCE == UCOL_RESET_TOP_VALUE) {
   1236                         log_verbose("Resetting to [top]\n");
   1237                         nextCE = UCOL_NEXT_TOP_VALUE;
   1238                         nextContCE = UCOL_NEXT_TOP_CONT;
   1239                     } else {
   1240                         result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
   1241                     }
   1242                     if(result < 0) {
   1243                         if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
   1244                             log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
   1245                             return;
   1246                         } else {
   1247                             log_err("%s: couldn't find the CE\n", colLoc);
   1248                             return;
   1249                         }
   1250                     }
   1251                 }
   1252 
   1253                 currCE &= 0xFFFFFF3F;
   1254                 currContCE &= 0xFFFFFFBF;
   1255 
   1256                 if(maxStrength == UCOL_IDENTICAL) {
   1257                     if(baseCE != currCE || baseContCE != currContCE) {
   1258                         log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
   1259                     }
   1260                 } else {
   1261                     if(strength == UCOL_IDENTICAL) {
   1262                         if(lastCE != currCE || lastContCE != currContCE) {
   1263                             log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
   1264                         }
   1265                     } else {
   1266                         if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
   1267                             /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
   1268                             log_err("%s: current CE is not less than base CE\n", colLoc);
   1269                         }
   1270                         if(!before) {
   1271                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
   1272                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
   1273                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
   1274                             }
   1275                         } else {
   1276                             before = FALSE;
   1277                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
   1278                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
   1279                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
   1280                             }
   1281                         }
   1282                     }
   1283                 }
   1284 
   1285             }
   1286 
   1287             oldOffset = chOffset;
   1288             lastCE = currCE & 0xFFFFFF3F;
   1289             lastContCE = currContCE & 0xFFFFFFBF;
   1290         }
   1291         uprv_free(src.source);
   1292         uprv_free(src.reorderCodes);
   1293     }
   1294     ucol_close(UCA);
   1295     uprv_delete_collIterate(c);
   1296 }
   1297 
   1298 #if 0
   1299 /* these locales are now picked from index RB */
   1300 static const char* localesToTest[] = {
   1301 "ar", "bg", "ca", "cs", "da",
   1302 "el", "en_BE", "en_US_POSIX",
   1303 "es", "et", "fi", "fr", "hi",
   1304 "hr", "hu", "is", "iw", "ja",
   1305 "ko", "lt", "lv", "mk", "mt",
   1306 "nb", "nn", "nn_NO", "pl", "ro",
   1307 "ru", "sh", "sk", "sl", "sq",
   1308 "sr", "sv", "th", "tr", "uk",
   1309 "vi", "zh", "zh_TW"
   1310 };
   1311 #endif
   1312 
   1313 static const char* rulesToTest[] = {
   1314   /* Funky fa rule */
   1315   "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
   1316   /*"& Z < p, P",*/
   1317     /* Cui Mins rules */
   1318     "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
   1319     "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
   1320     "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
   1321     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
   1322     "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
   1323     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
   1324     "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U"  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
   1325 };
   1326 
   1327 
   1328 static void TestCollations(void) {
   1329     int32_t noOfLoc = uloc_countAvailable();
   1330     int32_t i = 0, j = 0;
   1331 
   1332     UErrorCode status = U_ZERO_ERROR;
   1333     char cName[256];
   1334     UChar name[256];
   1335     int32_t nameSize;
   1336 
   1337 
   1338     const char *locName = NULL;
   1339     UCollator *coll = NULL;
   1340     UCollator *UCA = ucol_open("", &status);
   1341     UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
   1342     if (U_FAILURE(status)) {
   1343         log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
   1344         return;
   1345     }
   1346     ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
   1347 
   1348     for(i = 0; i<noOfLoc; i++) {
   1349         status = U_ZERO_ERROR;
   1350         locName = uloc_getAvailable(i);
   1351         if(uprv_strcmp("ja", locName) == 0) {
   1352             log_verbose("Don't know how to test prefixes\n");
   1353             continue;
   1354         }
   1355         if(hasCollationElements(locName)) {
   1356             nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
   1357             for(j = 0; j<nameSize; j++) {
   1358                 cName[j] = (char)name[j];
   1359             }
   1360             cName[nameSize] = 0;
   1361             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
   1362             coll = ucol_open(locName, &status);
   1363             if(U_SUCCESS(status)) {
   1364                 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
   1365                 ucol_close(coll);
   1366             } else {
   1367                 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
   1368                 status = U_ZERO_ERROR;
   1369             }
   1370         }
   1371     }
   1372     ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
   1373     ucol_close(UCA);
   1374 }
   1375 
   1376 static void RamsRulesTest(void) {
   1377     UErrorCode status = U_ZERO_ERROR;
   1378     int32_t i = 0;
   1379     UCollator *coll = NULL;
   1380     UChar rule[2048];
   1381     uint32_t ruleLen;
   1382     int32_t noOfLoc = uloc_countAvailable();
   1383     const char *locName = NULL;
   1384 
   1385     log_verbose("RamsRulesTest\n");
   1386 
   1387     if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
   1388         /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
   1389         return;
   1390     }
   1391 
   1392     for(i = 0; i<noOfLoc; i++) {
   1393         locName = uloc_getAvailable(i);
   1394         if(hasCollationElements(locName)) {
   1395             if (uprv_strcmp("ja", locName)==0) {
   1396                 log_verbose("Don't know how to test Japanese because of prefixes\n");
   1397                 continue;
   1398             }
   1399             if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
   1400                 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
   1401                 continue;
   1402             }
   1403             if (uprv_strcmp("bn", locName)==0 ||
   1404                 uprv_strcmp("bs", locName)==0 ||            /* Add due to import per cldrbug 5647 */
   1405                 uprv_strcmp("bs_Cyrl", locName)==0 ||       /* Add due to import per cldrbug 5647 */
   1406                 uprv_strcmp("en_US_POSIX", locName)==0 ||
   1407                 uprv_strcmp("fa_AF", locName)==0 ||         /* Add due to import per cldrbug 5647 */
   1408                 uprv_strcmp("he", locName)==0 ||            /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
   1409                 uprv_strcmp("he_IL", locName)==0 ||         /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
   1410                 uprv_strcmp("km", locName)==0 ||
   1411                 uprv_strcmp("km_KH", locName)==0 ||
   1412                 uprv_strcmp("my", locName)==0 ||
   1413                 uprv_strcmp("si", locName)==0 ||
   1414                 uprv_strcmp("si_LK", locName)==0 ||
   1415                 uprv_strcmp("sr_Latn", locName)==0 ||       /* Add due to import per cldrbug 5647 */
   1416                 uprv_strcmp("th", locName)==0 ||
   1417                 uprv_strcmp("th_TH", locName)==0 ||
   1418                 uprv_strcmp("zh", locName)==0 ||
   1419                 uprv_strcmp("zh_Hant", locName)==0
   1420             ) {
   1421                 log_verbose("Don't know how to test %s. "
   1422                             "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
   1423                 continue;
   1424             }
   1425             log_verbose("Testing locale %s\n", locName);
   1426             status = U_ZERO_ERROR;
   1427             coll = ucol_open(locName, &status);
   1428             if(U_SUCCESS(status)) {
   1429               if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
   1430                 if(coll->image->jamoSpecial == TRUE) {
   1431                   log_err("%s has special JAMOs\n", locName);
   1432                 }
   1433                 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
   1434                 testCollator(coll, &status);
   1435                 testCEs(coll, &status);
   1436               } else {
   1437                 log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
   1438               }
   1439               ucol_close(coll);
   1440             } else {
   1441               log_err("Could not open %s: %s\n", locName, u_errorName(status));
   1442             }
   1443         }
   1444     }
   1445 
   1446     for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
   1447         log_verbose("Testing rule: %s\n", rulesToTest[i]);
   1448         ruleLen = u_unescape(rulesToTest[i], rule, 2048);
   1449         status = U_ZERO_ERROR;
   1450         coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   1451         if(U_SUCCESS(status)) {
   1452             testCollator(coll, &status);
   1453             testCEs(coll, &status);
   1454             ucol_close(coll);
   1455         } else {
   1456           log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
   1457         }
   1458     }
   1459 
   1460 }
   1461 
   1462 static void IsTailoredTest(void) {
   1463     UErrorCode status = U_ZERO_ERROR;
   1464     uint32_t i = 0;
   1465     UCollator *coll = NULL;
   1466     UChar rule[2048];
   1467     UChar tailored[2048];
   1468     UChar notTailored[2048];
   1469     uint32_t ruleLen, tailoredLen, notTailoredLen;
   1470 
   1471     log_verbose("IsTailoredTest\n");
   1472 
   1473     u_uastrcpy(rule, "&Z < A, B, C;c < d");
   1474     ruleLen = u_strlen(rule);
   1475 
   1476     u_uastrcpy(tailored, "ABCcd");
   1477     tailoredLen = u_strlen(tailored);
   1478 
   1479     u_uastrcpy(notTailored, "ZabD");
   1480     notTailoredLen = u_strlen(notTailored);
   1481 
   1482     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   1483     if(U_SUCCESS(status)) {
   1484         for(i = 0; i<tailoredLen; i++) {
   1485             if(!ucol_isTailored(coll, tailored[i], &status)) {
   1486                 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
   1487             }
   1488         }
   1489         for(i = 0; i<notTailoredLen; i++) {
   1490             if(ucol_isTailored(coll, notTailored[i], &status)) {
   1491                 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
   1492             }
   1493         }
   1494         ucol_close(coll);
   1495     }
   1496     else {
   1497         log_err_status(status, "Can't tailor rules\n");
   1498     }
   1499     /* Code coverage */
   1500     status = U_ZERO_ERROR;
   1501     coll = ucol_open("ja", &status);
   1502     if(!ucol_isTailored(coll, 0x4E9C, &status)) {
   1503         log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
   1504     }
   1505     ucol_close(coll);
   1506 }
   1507 
   1508 
   1509 const static char chTest[][20] = {
   1510   "c",
   1511   "C",
   1512   "ca", "cb", "cx", "cy", "CZ",
   1513   "c\\u030C", "C\\u030C",
   1514   "h",
   1515   "H",
   1516   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
   1517   "ch", "cH", "Ch", "CH",
   1518   "cha", "charly", "che", "chh", "chch", "chr",
   1519   "i", "I", "iarly",
   1520   "r", "R",
   1521   "r\\u030C", "R\\u030C",
   1522   "s",
   1523   "S",
   1524   "s\\u030C", "S\\u030C",
   1525   "z", "Z",
   1526   "z\\u030C", "Z\\u030C"
   1527 };
   1528 
   1529 static void TestChMove(void) {
   1530     UChar t1[256] = {0};
   1531     UChar t2[256] = {0};
   1532 
   1533     uint32_t i = 0, j = 0;
   1534     uint32_t size = 0;
   1535     UErrorCode status = U_ZERO_ERROR;
   1536 
   1537     UCollator *coll = ucol_open("cs", &status);
   1538 
   1539     if(U_SUCCESS(status)) {
   1540         size = sizeof(chTest)/sizeof(chTest[0]);
   1541         for(i = 0; i < size-1; i++) {
   1542             for(j = i+1; j < size; j++) {
   1543                 u_unescape(chTest[i], t1, 256);
   1544                 u_unescape(chTest[j], t2, 256);
   1545                 doTest(coll, t1, t2, UCOL_LESS);
   1546             }
   1547         }
   1548     }
   1549     else {
   1550         log_data_err("Can't open collator");
   1551     }
   1552     ucol_close(coll);
   1553 }
   1554 
   1555 
   1556 
   1557 
   1558 const static char impTest[][20] = {
   1559   "\\u4e00",
   1560     "a",
   1561     "A",
   1562     "b",
   1563     "B",
   1564     "\\u4e01"
   1565 };
   1566 
   1567 
   1568 static void TestImplicitTailoring(void) {
   1569   static const struct {
   1570     const char *rules;
   1571     const char *data[10];
   1572     const uint32_t len;
   1573   } tests[] = {
   1574       { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
   1575       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
   1576       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
   1577       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
   1578   };
   1579 
   1580   int32_t i = 0;
   1581 
   1582   for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   1583       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   1584   }
   1585 
   1586 /*
   1587   UChar t1[256] = {0};
   1588   UChar t2[256] = {0};
   1589 
   1590   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
   1591 
   1592   uint32_t i = 0, j = 0;
   1593   uint32_t size = 0;
   1594   uint32_t ruleLen = 0;
   1595   UErrorCode status = U_ZERO_ERROR;
   1596   UCollator *coll = NULL;
   1597   ruleLen = u_unescape(rule, t1, 256);
   1598 
   1599   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
   1600 
   1601   if(U_SUCCESS(status)) {
   1602     size = sizeof(impTest)/sizeof(impTest[0]);
   1603     for(i = 0; i < size-1; i++) {
   1604       for(j = i+1; j < size; j++) {
   1605         u_unescape(impTest[i], t1, 256);
   1606         u_unescape(impTest[j], t2, 256);
   1607         doTest(coll, t1, t2, UCOL_LESS);
   1608       }
   1609     }
   1610   }
   1611   else {
   1612     log_err("Can't open collator");
   1613   }
   1614   ucol_close(coll);
   1615   */
   1616 }
   1617 
   1618 static void TestFCDProblem(void) {
   1619   UChar t1[256] = {0};
   1620   UChar t2[256] = {0};
   1621 
   1622   const char *s1 = "\\u0430\\u0306\\u0325";
   1623   const char *s2 = "\\u04D1\\u0325";
   1624 
   1625   UErrorCode status = U_ZERO_ERROR;
   1626   UCollator *coll = ucol_open("", &status);
   1627   u_unescape(s1, t1, 256);
   1628   u_unescape(s2, t2, 256);
   1629 
   1630   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
   1631   doTest(coll, t1, t2, UCOL_EQUAL);
   1632 
   1633   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   1634   doTest(coll, t1, t2, UCOL_EQUAL);
   1635 
   1636   ucol_close(coll);
   1637 }
   1638 
   1639 /*
   1640 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
   1641 We're only using NFC/NFD in this test.
   1642 */
   1643 #define NORM_BUFFER_TEST_LEN 18
   1644 typedef struct {
   1645   UChar32 u;
   1646   UChar NFC[NORM_BUFFER_TEST_LEN];
   1647   UChar NFD[NORM_BUFFER_TEST_LEN];
   1648 } tester;
   1649 
   1650 static void TestComposeDecompose(void) {
   1651     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
   1652     static const UChar UNICODESET_STR[] = {
   1653         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
   1654         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
   1655         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
   1656     };
   1657     int32_t noOfLoc;
   1658     int32_t i = 0, j = 0;
   1659 
   1660     UErrorCode status = U_ZERO_ERROR;
   1661     const char *locName = NULL;
   1662     uint32_t nfcSize;
   1663     uint32_t nfdSize;
   1664     tester **t;
   1665     uint32_t noCases = 0;
   1666     UCollator *coll = NULL;
   1667     UChar32 u = 0;
   1668     UChar comp[NORM_BUFFER_TEST_LEN];
   1669     uint32_t len = 0;
   1670     UCollationElements *iter;
   1671     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
   1672     int32_t charsToTestSize;
   1673 
   1674     noOfLoc = uloc_countAvailable();
   1675 
   1676     coll = ucol_open("", &status);
   1677     if (U_FAILURE(status)) {
   1678         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
   1679         return;
   1680     }
   1681     charsToTestSize = uset_size(charsToTest);
   1682     if (charsToTestSize <= 0) {
   1683         log_err("Set was zero. Missing data?\n");
   1684         return;
   1685     }
   1686     t = (tester **)malloc(charsToTestSize * sizeof(tester *));
   1687     t[0] = (tester *)malloc(sizeof(tester));
   1688     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
   1689 
   1690     for(u = 0; u < charsToTestSize; u++) {
   1691         UChar32 ch = uset_charAt(charsToTest, u);
   1692         len = 0;
   1693         U16_APPEND_UNSAFE(comp, len, ch);
   1694         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
   1695         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
   1696 
   1697         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
   1698           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
   1699             t[noCases]->u = ch;
   1700             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
   1701                 u_strncpy(t[noCases]->NFC, comp, len);
   1702                 t[noCases]->NFC[len] = 0;
   1703             }
   1704             noCases++;
   1705             t[noCases] = (tester *)malloc(sizeof(tester));
   1706             uprv_memset(t[noCases], 0, sizeof(tester));
   1707         }
   1708     }
   1709     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
   1710     uset_close(charsToTest);
   1711     charsToTest = NULL;
   1712 
   1713     for(u=0; u<(UChar32)noCases; u++) {
   1714         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
   1715             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
   1716             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
   1717         }
   1718     }
   1719     /*
   1720     for(u = 0; u < charsToTestSize; u++) {
   1721       if(!(u&0xFFFF)) {
   1722         log_verbose("%08X ", u);
   1723       }
   1724       uprv_memset(t[noCases], 0, sizeof(tester));
   1725       t[noCases]->u = u;
   1726       len = 0;
   1727       U16_APPEND_UNSAFE(comp, len, u);
   1728       comp[len] = 0;
   1729       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
   1730       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
   1731       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
   1732       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
   1733     }
   1734     */
   1735 
   1736     ucol_close(coll);
   1737 
   1738     log_verbose("Testing locales, number of cases = %i\n", noCases);
   1739     for(i = 0; i<noOfLoc; i++) {
   1740         status = U_ZERO_ERROR;
   1741         locName = uloc_getAvailable(i);
   1742         if(hasCollationElements(locName)) {
   1743             char cName[256];
   1744             UChar name[256];
   1745             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
   1746 
   1747             for(j = 0; j<nameSize; j++) {
   1748                 cName[j] = (char)name[j];
   1749             }
   1750             cName[nameSize] = 0;
   1751             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
   1752 
   1753             coll = ucol_open(locName, &status);
   1754             ucol_setStrength(coll, UCOL_IDENTICAL);
   1755             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
   1756 
   1757             for(u=0; u<(UChar32)noCases; u++) {
   1758                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
   1759                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
   1760                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
   1761                     log_verbose("Testing NFC\n");
   1762                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
   1763                     backAndForth(iter);
   1764                     log_verbose("Testing NFD\n");
   1765                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
   1766                     backAndForth(iter);
   1767                 }
   1768             }
   1769             ucol_closeElements(iter);
   1770             ucol_close(coll);
   1771         }
   1772     }
   1773     for(u = 0; u <= (UChar32)noCases; u++) {
   1774         free(t[u]);
   1775     }
   1776     free(t);
   1777 }
   1778 
   1779 static void TestEmptyRule(void) {
   1780   UErrorCode status = U_ZERO_ERROR;
   1781   UChar rulez[] = { 0 };
   1782   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
   1783 
   1784   ucol_close(coll);
   1785 }
   1786 
   1787 static void TestUCARules(void) {
   1788   UErrorCode status = U_ZERO_ERROR;
   1789   UChar b[256];
   1790   UChar *rules = b;
   1791   uint32_t ruleLen = 0;
   1792   UCollator *UCAfromRules = NULL;
   1793   UCollator *coll = ucol_open("", &status);
   1794   if(status == U_FILE_ACCESS_ERROR) {
   1795     log_data_err("Is your data around?\n");
   1796     return;
   1797   } else if(U_FAILURE(status)) {
   1798     log_err("Error opening collator\n");
   1799     return;
   1800   }
   1801   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
   1802 
   1803   log_verbose("TestUCARules\n");
   1804   if(ruleLen > 256) {
   1805     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
   1806     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
   1807   }
   1808   log_verbose("Rules length is %d\n", ruleLen);
   1809   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   1810   if(U_SUCCESS(status)) {
   1811     ucol_close(UCAfromRules);
   1812   } else {
   1813     log_verbose("Unable to create a collator from UCARules!\n");
   1814   }
   1815 /*
   1816   u_unescape(blah, b, 256);
   1817   ucol_getSortKey(coll, b, 1, res, 256);
   1818 */
   1819   ucol_close(coll);
   1820   if(rules != b) {
   1821     free(rules);
   1822   }
   1823 }
   1824 
   1825 
   1826 /* Pinyin tonal order */
   1827 /*
   1828     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
   1829           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
   1830     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
   1831     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
   1832     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
   1833     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
   1834       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
   1835 .. (\u00fc)
   1836 
   1837 However, in testing we got the following order:
   1838     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
   1839           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
   1840     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
   1841 .. (\u0113)
   1842     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
   1843     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
   1844     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
   1845 .. (\u01d8)
   1846       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
   1847 */
   1848 
   1849 static void TestBefore(void) {
   1850   const static char *data[] = {
   1851       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
   1852       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
   1853       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
   1854       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
   1855       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
   1856       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
   1857   };
   1858   genericRulesStarter(
   1859     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
   1860     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
   1861     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
   1862     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
   1863     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
   1864     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
   1865     data, sizeof(data)/sizeof(data[0]));
   1866 }
   1867 
   1868 #if 0
   1869 /* superceded by TestBeforePinyin */
   1870 static void TestJ784(void) {
   1871   const static char *data[] = {
   1872       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
   1873       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
   1874       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
   1875       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
   1876       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
   1877       "\\u00fc",
   1878            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
   1879   };
   1880   genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
   1881 }
   1882 #endif
   1883 
   1884 #if 0
   1885 /* superceded by the changes to the lv locale */
   1886 static void TestJ831(void) {
   1887   const static char *data[] = {
   1888     "I",
   1889       "i",
   1890       "Y",
   1891       "y"
   1892   };
   1893   genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
   1894 }
   1895 #endif
   1896 
   1897 static void TestJ815(void) {
   1898   const static char *data[] = {
   1899     "aa",
   1900       "Aa",
   1901       "ab",
   1902       "Ab",
   1903       "ad",
   1904       "Ad",
   1905       "ae",
   1906       "Ae",
   1907       "\\u00e6",
   1908       "\\u00c6",
   1909       "af",
   1910       "Af",
   1911       "b",
   1912       "B"
   1913   };
   1914   genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
   1915   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
   1916 }
   1917 
   1918 
   1919 /*
   1920 "& a < b < c < d& r < c",                                   "& a < b < d& r < c",
   1921 "& a < b < c < d& c < m",                                   "& a < b < c < m < d",
   1922 "& a < b < c < d& a < m",                                   "& a < m < b < c < d",
   1923 "& a <<< b << c < d& a < m",                                "& a <<< b << c < m < d",
   1924 "& a < b < c < d& [before 1] c < m",                        "& a < b < m < c < d",
   1925 "& a < b <<< c << d <<< e& [before 3] e <<< x",            "& a < b <<< c << d <<< x <<< e",
   1926 "& a < b <<< c << d <<< e& [before 2] e <<< x",            "& a < b <<< c <<< x << d <<< e",
   1927 "& a < b <<< c << d <<< e& [before 1] e <<< x",            "& a <<< x < b <<< c << d <<< e",
   1928 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",    "& a < b <<< c << d <<< e <<< f < x < g",
   1929 */
   1930 static void TestRedundantRules(void) {
   1931   int32_t i;
   1932 
   1933   static const struct {
   1934       const char *rules;
   1935       const char *expectedRules;
   1936       const char *testdata[8];
   1937       uint32_t testdatalen;
   1938   } tests[] = {
   1939     /* this test conflicts with positioning of CODAN placeholder */
   1940        /*{
   1941         "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
   1942         "&\\u2089<<<x",
   1943         {"\\u2089", "x"}, 2
   1944        }, */
   1945     /* this test conflicts with the [before x] syntax tightening */
   1946       /*{
   1947         "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
   1948         "&\\u0252<<<x",
   1949         {"\\u0252", "x"}, 2
   1950       }, */
   1951     /* this test conflicts with the [before x] syntax tightening */
   1952       /*{
   1953          "& a < b <<< c << d <<< e& [before 1] e <<< x",
   1954          "& a <<< x < b <<< c << d <<< e",
   1955         {"a", "x", "b", "c", "d", "e"}, 6
   1956       }, */
   1957       {
   1958         "& a < b < c < d& [before 1] c < m",
   1959         "& a < b < m < c < d",
   1960         {"a", "b", "m", "c", "d"}, 5
   1961       },
   1962       {
   1963         "& a < b <<< c << d <<< e& [before 3] e <<< x",
   1964         "& a < b <<< c << d <<< x <<< e",
   1965         {"a", "b", "c", "d", "x", "e"}, 6
   1966       },
   1967     /* this test conflicts with the [before x] syntax tightening */
   1968       /* {
   1969         "& a < b <<< c << d <<< e& [before 2] e <<< x",
   1970         "& a < b <<< c <<< x << d <<< e",
   1971         {"a", "b", "c", "x", "d", "e"},, 6
   1972       }, */
   1973       {
   1974         "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
   1975         "& a < b <<< c << d <<< e <<< f < x < g",
   1976         {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
   1977       },
   1978       {
   1979         "& a <<< b << c < d& a < m",
   1980         "& a <<< b << c < m < d",
   1981         {"a", "b", "c", "m", "d"}, 5
   1982       },
   1983       {
   1984         "&a<b<<b\\u0301 &z<b",
   1985         "&a<b\\u0301 &z<b",
   1986         {"a", "b\\u0301", "z", "b"}, 4
   1987       },
   1988       {
   1989         "&z<m<<<q<<<m",
   1990         "&z<q<<<m",
   1991         {"z", "q", "m"},3
   1992       },
   1993       {
   1994         "&z<<<m<q<<<m",
   1995         "&z<q<<<m",
   1996         {"z", "q", "m"}, 3
   1997       },
   1998       {
   1999         "& a < b < c < d& r < c",
   2000         "& a < b < d& r < c",
   2001         {"a", "b", "d"}, 3
   2002       },
   2003       {
   2004         "& a < b < c < d& r < c",
   2005         "& a < b < d& r < c",
   2006         {"r", "c"}, 2
   2007       },
   2008       {
   2009         "& a < b < c < d& c < m",
   2010         "& a < b < c < m < d",
   2011         {"a", "b", "c", "m", "d"}, 5
   2012       },
   2013       {
   2014         "& a < b < c < d& a < m",
   2015         "& a < m < b < c < d",
   2016         {"a", "m", "b", "c", "d"}, 5
   2017       }
   2018   };
   2019 
   2020 
   2021   UCollator *credundant = NULL;
   2022   UCollator *cresulting = NULL;
   2023   UErrorCode status = U_ZERO_ERROR;
   2024   UChar rlz[2048] = { 0 };
   2025   uint32_t rlen = 0;
   2026 
   2027   for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
   2028     log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
   2029     rlen = u_unescape(tests[i].rules, rlz, 2048);
   2030 
   2031     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
   2032     if(status == U_FILE_ACCESS_ERROR) {
   2033       log_data_err("Is your data around?\n");
   2034       return;
   2035     } else if(U_FAILURE(status)) {
   2036       log_err("Error opening collator\n");
   2037       return;
   2038     }
   2039 
   2040     rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
   2041     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
   2042 
   2043     testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
   2044 
   2045     ucol_close(credundant);
   2046     ucol_close(cresulting);
   2047 
   2048     log_verbose("testing using data\n");
   2049 
   2050     genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
   2051   }
   2052 
   2053 }
   2054 
   2055 static void TestExpansionSyntax(void) {
   2056   int32_t i;
   2057 
   2058   const static char *rules[] = {
   2059     "&AE <<< a << b <<< c &d <<< f",
   2060     "&AE <<< a <<< b << c << d < e < f <<< g",
   2061     "&AE <<< B <<< C / D <<< F"
   2062   };
   2063 
   2064   const static char *expectedRules[] = {
   2065     "&A <<< a / E << b / E <<< c /E  &d <<< f",
   2066     "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
   2067     "&A <<< B / E <<< C / ED <<< F / E"
   2068   };
   2069 
   2070   const static char *testdata[][8] = {
   2071     {"AE", "a", "b", "c"},
   2072     {"AE", "a", "b", "c", "d", "e", "f", "g"},
   2073     {"AE", "B", "C"} /* / ED <<< F / E"},*/
   2074   };
   2075 
   2076   const static uint32_t testdatalen[] = {
   2077       4,
   2078       8,
   2079       3
   2080   };
   2081 
   2082 
   2083 
   2084   UCollator *credundant = NULL;
   2085   UCollator *cresulting = NULL;
   2086   UErrorCode status = U_ZERO_ERROR;
   2087   UChar rlz[2048] = { 0 };
   2088   uint32_t rlen = 0;
   2089 
   2090   for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
   2091     log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
   2092     rlen = u_unescape(rules[i], rlz, 2048);
   2093 
   2094     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2095     if(status == U_FILE_ACCESS_ERROR) {
   2096       log_data_err("Is your data around?\n");
   2097       return;
   2098     } else if(U_FAILURE(status)) {
   2099       log_err("Error opening collator\n");
   2100       return;
   2101     }
   2102     rlen = u_unescape(expectedRules[i], rlz, 2048);
   2103     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
   2104 
   2105     /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
   2106     /* as a hard error test, but only in information mode */
   2107     testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
   2108 
   2109     ucol_close(credundant);
   2110     ucol_close(cresulting);
   2111 
   2112     log_verbose("testing using data\n");
   2113 
   2114     genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
   2115   }
   2116 }
   2117 
   2118 static void TestCase(void)
   2119 {
   2120     const static UChar gRules[MAX_TOKEN_LEN] =
   2121     /*" & 0 < 1,\u2461<a,A"*/
   2122     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
   2123 
   2124     const static UChar testCase[][MAX_TOKEN_LEN] =
   2125     {
   2126         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
   2127         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
   2128         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
   2129         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
   2130     };
   2131 
   2132     const static UCollationResult caseTestResults[][9] =
   2133     {
   2134         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
   2135         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
   2136         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
   2137         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
   2138     };
   2139 
   2140     const static UColAttributeValue caseTestAttributes[][2] =
   2141     {
   2142         { UCOL_LOWER_FIRST, UCOL_OFF},
   2143         { UCOL_UPPER_FIRST, UCOL_OFF},
   2144         { UCOL_LOWER_FIRST, UCOL_ON},
   2145         { UCOL_UPPER_FIRST, UCOL_ON}
   2146     };
   2147     int32_t i,j,k;
   2148     UErrorCode status = U_ZERO_ERROR;
   2149     UCollationElements *iter;
   2150     UCollator  *myCollation;
   2151     myCollation = ucol_open("en_US", &status);
   2152 
   2153     if(U_FAILURE(status)){
   2154         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   2155         return;
   2156     }
   2157     log_verbose("Testing different case settings\n");
   2158     ucol_setStrength(myCollation, UCOL_TERTIARY);
   2159 
   2160     for(k = 0; k<4; k++) {
   2161       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
   2162       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
   2163       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
   2164       for (i = 0; i < 3 ; i++) {
   2165         for(j = i+1; j<4; j++) {
   2166           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
   2167         }
   2168       }
   2169     }
   2170     ucol_close(myCollation);
   2171 
   2172     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
   2173     if(U_FAILURE(status)){
   2174         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   2175         return;
   2176     }
   2177     log_verbose("Testing different case settings with custom rules\n");
   2178     ucol_setStrength(myCollation, UCOL_TERTIARY);
   2179 
   2180     for(k = 0; k<4; k++) {
   2181       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
   2182       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
   2183       for (i = 0; i < 3 ; i++) {
   2184         for(j = i+1; j<4; j++) {
   2185           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
   2186           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
   2187           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
   2188           backAndForth(iter);
   2189           ucol_closeElements(iter);
   2190           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
   2191           backAndForth(iter);
   2192           ucol_closeElements(iter);
   2193         }
   2194       }
   2195     }
   2196     ucol_close(myCollation);
   2197     {
   2198       const static char *lowerFirst[] = {
   2199         "h",
   2200         "H",
   2201         "ch",
   2202         "Ch",
   2203         "CH",
   2204         "cha",
   2205         "chA",
   2206         "Cha",
   2207         "ChA",
   2208         "CHa",
   2209         "CHA",
   2210         "i",
   2211         "I"
   2212       };
   2213 
   2214       const static char *upperFirst[] = {
   2215         "H",
   2216         "h",
   2217         "CH",
   2218         "Ch",
   2219         "ch",
   2220         "CHA",
   2221         "CHa",
   2222         "ChA",
   2223         "Cha",
   2224         "chA",
   2225         "cha",
   2226         "I",
   2227         "i"
   2228       };
   2229       log_verbose("mixed case test\n");
   2230       log_verbose("lower first, case level off\n");
   2231       genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
   2232       log_verbose("upper first, case level off\n");
   2233       genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
   2234       log_verbose("lower first, case level on\n");
   2235       genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
   2236       log_verbose("upper first, case level on\n");
   2237       genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
   2238     }
   2239 
   2240 }
   2241 
   2242 static void TestIncrementalNormalize(void) {
   2243 
   2244     /*UChar baseA     =0x61;*/
   2245     UChar baseA     =0x41;
   2246 /*    UChar baseB     = 0x42;*/
   2247     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
   2248     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
   2249     /*
   2250         0x316 is combining grave accent below, cc=220
   2251         0x321 is combining palatalized hook below, cc=202
   2252         0x300 is combining grave accent, cc=230
   2253     */
   2254 
   2255 #define MAXSLEN 2000
   2256     /*int          maxSLen   = 64000;*/
   2257     int          sLen;
   2258     int          i;
   2259 
   2260     UCollator        *coll;
   2261     UErrorCode       status = U_ZERO_ERROR;
   2262     UCollationResult result;
   2263 
   2264     int32_t myQ = getTestOption(QUICK_OPTION);
   2265 
   2266     if(getTestOption(QUICK_OPTION) < 0) {
   2267         setTestOption(QUICK_OPTION, 1);
   2268     }
   2269 
   2270     {
   2271         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
   2272         /*          most buffers along the way.*/
   2273         UChar            strA[MAXSLEN+1];
   2274         UChar            strB[MAXSLEN+1];
   2275 
   2276         coll = ucol_open("en_US", &status);
   2277         if(status == U_FILE_ACCESS_ERROR) {
   2278           log_data_err("Is your data around?\n");
   2279           return;
   2280         } else if(U_FAILURE(status)) {
   2281           log_err("Error opening collator\n");
   2282           return;
   2283         }
   2284         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   2285 
   2286         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
   2287         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
   2288         /*for (sLen = 1000; sLen<1001; sLen++) {*/
   2289         for (sLen = 500; sLen<501; sLen++) {
   2290         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
   2291             strA[0] = baseA;
   2292             strB[0] = baseA;
   2293             for (i=1; i<=sLen-1; i++) {
   2294                 strA[i] = ccMix[i % 3];
   2295                 strB[sLen-i] = ccMix[i % 3];
   2296             }
   2297             strA[sLen]   = 0;
   2298             strB[sLen]   = 0;
   2299 
   2300             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
   2301             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
   2302             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
   2303             doTest(coll, strA, strB, UCOL_EQUAL);
   2304         }
   2305     }
   2306 
   2307     setTestOption(QUICK_OPTION, myQ);
   2308 
   2309 
   2310     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
   2311     /*         of the string.  Checks a couple of edge cases.*/
   2312 
   2313     {
   2314         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
   2315         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
   2316         ucol_setStrength(coll, UCOL_TERTIARY);
   2317         doTest(coll, strA, strB, UCOL_EQUAL);
   2318     }
   2319 
   2320     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
   2321 
   2322     {
   2323       /* New UCA  3.1.1.
   2324        * test below used a code point from Desseret, which sorts differently
   2325        * than d800 dc00
   2326        */
   2327         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
   2328         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
   2329         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
   2330         ucol_setStrength(coll, UCOL_TERTIARY);
   2331         doTest(coll, strA, strB, UCOL_GREATER);
   2332     }
   2333 
   2334     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
   2335 
   2336     {
   2337         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
   2338         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
   2339         char  sortKeyA[50];
   2340         char  sortKeyAz[50];
   2341         char  sortKeyB[50];
   2342         char  sortKeyBz[50];
   2343         int   r;
   2344 
   2345         /* there used to be -3 here. Hmmmm.... */
   2346         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
   2347         result = ucol_strcoll(coll, strA, 3, strB, 3);
   2348         if (result != UCOL_GREATER) {
   2349             log_err("ERROR 1 in test 4\n");
   2350         }
   2351         result = ucol_strcoll(coll, strA, -1, strB, -1);
   2352         if (result != UCOL_EQUAL) {
   2353             log_err("ERROR 2 in test 4\n");
   2354         }
   2355 
   2356         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2357         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2358         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2359         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2360 
   2361         r = strcmp(sortKeyA, sortKeyAz);
   2362         if (r <= 0) {
   2363             log_err("Error 3 in test 4\n");
   2364         }
   2365         r = strcmp(sortKeyA, sortKeyB);
   2366         if (r <= 0) {
   2367             log_err("Error 4 in test 4\n");
   2368         }
   2369         r = strcmp(sortKeyAz, sortKeyBz);
   2370         if (r != 0) {
   2371             log_err("Error 5 in test 4\n");
   2372         }
   2373 
   2374         ucol_setStrength(coll, UCOL_IDENTICAL);
   2375         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2376         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2377         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2378         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2379 
   2380         r = strcmp(sortKeyA, sortKeyAz);
   2381         if (r <= 0) {
   2382             log_err("Error 6 in test 4\n");
   2383         }
   2384         r = strcmp(sortKeyA, sortKeyB);
   2385         if (r <= 0) {
   2386             log_err("Error 7 in test 4\n");
   2387         }
   2388         r = strcmp(sortKeyAz, sortKeyBz);
   2389         if (r != 0) {
   2390             log_err("Error 8 in test 4\n");
   2391         }
   2392         ucol_setStrength(coll, UCOL_TERTIARY);
   2393     }
   2394 
   2395 
   2396     /*  Test 5:  Null characters in non-normal source strings.*/
   2397 
   2398     {
   2399         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
   2400         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
   2401         char  sortKeyA[50];
   2402         char  sortKeyAz[50];
   2403         char  sortKeyB[50];
   2404         char  sortKeyBz[50];
   2405         int   r;
   2406 
   2407         result = ucol_strcoll(coll, strA, 6, strB, 6);
   2408         if (result != UCOL_GREATER) {
   2409             log_err("ERROR 1 in test 5\n");
   2410         }
   2411         result = ucol_strcoll(coll, strA, -1, strB, -1);
   2412         if (result != UCOL_EQUAL) {
   2413             log_err("ERROR 2 in test 5\n");
   2414         }
   2415 
   2416         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2417         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2418         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2419         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2420 
   2421         r = strcmp(sortKeyA, sortKeyAz);
   2422         if (r <= 0) {
   2423             log_err("Error 3 in test 5\n");
   2424         }
   2425         r = strcmp(sortKeyA, sortKeyB);
   2426         if (r <= 0) {
   2427             log_err("Error 4 in test 5\n");
   2428         }
   2429         r = strcmp(sortKeyAz, sortKeyBz);
   2430         if (r != 0) {
   2431             log_err("Error 5 in test 5\n");
   2432         }
   2433 
   2434         ucol_setStrength(coll, UCOL_IDENTICAL);
   2435         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2436         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2437         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2438         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2439 
   2440         r = strcmp(sortKeyA, sortKeyAz);
   2441         if (r <= 0) {
   2442             log_err("Error 6 in test 5\n");
   2443         }
   2444         r = strcmp(sortKeyA, sortKeyB);
   2445         if (r <= 0) {
   2446             log_err("Error 7 in test 5\n");
   2447         }
   2448         r = strcmp(sortKeyAz, sortKeyBz);
   2449         if (r != 0) {
   2450             log_err("Error 8 in test 5\n");
   2451         }
   2452         ucol_setStrength(coll, UCOL_TERTIARY);
   2453     }
   2454 
   2455 
   2456     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
   2457 
   2458     {
   2459         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
   2460         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
   2461 
   2462         result = ucol_strcoll(coll, strA, 5, strB, 5);
   2463         if (result != UCOL_LESS) {
   2464             log_err("Error 1 in test 6\n");
   2465         }
   2466         result = ucol_strcoll(coll, strA, -1, strB, -1);
   2467         if (result != UCOL_EQUAL) {
   2468             log_err("Error 2 in test 6\n");
   2469         }
   2470     }
   2471 
   2472     ucol_close(coll);
   2473 }
   2474 
   2475 
   2476 
   2477 #if 0
   2478 static void TestGetCaseBit(void) {
   2479   static const char *caseBitData[] = {
   2480     "a", "A", "ch", "Ch", "CH",
   2481       "\\uFF9E", "\\u0009"
   2482   };
   2483 
   2484   static const uint8_t results[] = {
   2485     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
   2486       UCOL_UPPER_CASE, UCOL_LOWER_CASE
   2487   };
   2488 
   2489   uint32_t i, blen = 0;
   2490   UChar b[256] = {0};
   2491   UErrorCode status = U_ZERO_ERROR;
   2492   UCollator *UCA = ucol_open("", &status);
   2493   uint8_t res = 0;
   2494 
   2495   for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
   2496     blen = u_unescape(caseBitData[i], b, 256);
   2497     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
   2498     if(results[i] != res) {
   2499       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
   2500     }
   2501   }
   2502 }
   2503 #endif
   2504 
   2505 static void TestHangulTailoring(void) {
   2506     static const char *koreanData[] = {
   2507         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
   2508             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
   2509             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
   2510             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
   2511             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
   2512             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
   2513     };
   2514 
   2515     const char *rules =
   2516         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
   2517         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
   2518         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
   2519         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
   2520         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
   2521         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
   2522 
   2523 
   2524   UErrorCode status = U_ZERO_ERROR;
   2525   UChar rlz[2048] = { 0 };
   2526   uint32_t rlen = u_unescape(rules, rlz, 2048);
   2527 
   2528   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   2529   if(status == U_FILE_ACCESS_ERROR) {
   2530     log_data_err("Is your data around?\n");
   2531     return;
   2532   } else if(U_FAILURE(status)) {
   2533     log_err("Error opening collator\n");
   2534     return;
   2535   }
   2536 
   2537   log_verbose("Using start of korean rules\n");
   2538 
   2539   if(U_SUCCESS(status)) {
   2540     genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   2541   } else {
   2542     log_err("Unable to open collator with rules %s\n", rules);
   2543   }
   2544 
   2545   log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
   2546   ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home  */
   2547   genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   2548 
   2549   ucol_close(coll);
   2550 
   2551   log_verbose("Using ko__LOTUS locale\n");
   2552   genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   2553 }
   2554 
   2555 static void TestCompressOverlap(void) {
   2556     UChar       secstr[150];
   2557     UChar       tertstr[150];
   2558     UErrorCode  status = U_ZERO_ERROR;
   2559     UCollator  *coll;
   2560     char        result[200];
   2561     uint32_t    resultlen;
   2562     int         count = 0;
   2563     char       *tempptr;
   2564 
   2565     coll = ucol_open("", &status);
   2566 
   2567     if (U_FAILURE(status)) {
   2568         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
   2569         return;
   2570     }
   2571     while (count < 149) {
   2572         secstr[count] = 0x0020; /* [06, 05, 05] */
   2573         tertstr[count] = 0x0020;
   2574         count ++;
   2575     }
   2576 
   2577     /* top down compression ----------------------------------- */
   2578     secstr[count] = 0x0332; /* [, 87, 05] */
   2579     tertstr[count] = 0x3000; /* [06, 05, 07] */
   2580 
   2581     /* no compression secstr should have 150 secondary bytes, tertstr should
   2582     have 150 tertiary bytes.
   2583     with correct overlapping compression, secstr should have 4 secondary
   2584     bytes, tertstr should have > 2 tertiary bytes */
   2585     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
   2586     tempptr = uprv_strchr(result, 1) + 1;
   2587     while (*(tempptr + 1) != 1) {
   2588         /* the last secondary collation element is not checked since it is not
   2589         part of the compression */
   2590         if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
   2591             log_err("Secondary compression overlapped\n");
   2592         }
   2593         tempptr ++;
   2594     }
   2595 
   2596     /* tertiary top/bottom/common for en_US is similar to the secondary
   2597     top/bottom/common */
   2598     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
   2599     tempptr = uprv_strrchr(result, 1) + 1;
   2600     while (*(tempptr + 1) != 0) {
   2601         /* the last secondary collation element is not checked since it is not
   2602         part of the compression */
   2603         if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
   2604             log_err("Tertiary compression overlapped\n");
   2605         }
   2606         tempptr ++;
   2607     }
   2608 
   2609     /* bottom up compression ------------------------------------- */
   2610     secstr[count] = 0;
   2611     tertstr[count] = 0;
   2612     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
   2613     tempptr = uprv_strchr(result, 1) + 1;
   2614     while (*(tempptr + 1) != 1) {
   2615         /* the last secondary collation element is not checked since it is not
   2616         part of the compression */
   2617         if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
   2618             log_err("Secondary compression overlapped\n");
   2619         }
   2620         tempptr ++;
   2621     }
   2622 
   2623     /* tertiary top/bottom/common for en_US is similar to the secondary
   2624     top/bottom/common */
   2625     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
   2626     tempptr = uprv_strrchr(result, 1) + 1;
   2627     while (*(tempptr + 1) != 0) {
   2628         /* the last secondary collation element is not checked since it is not
   2629         part of the compression */
   2630         if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
   2631             log_err("Tertiary compression overlapped\n");
   2632         }
   2633         tempptr ++;
   2634     }
   2635 
   2636     ucol_close(coll);
   2637 }
   2638 
   2639 static void TestCyrillicTailoring(void) {
   2640   static const char *test[] = {
   2641     "\\u0410b",
   2642       "\\u0410\\u0306a",
   2643       "\\u04d0A"
   2644   };
   2645 
   2646     /* Russian overrides contractions, so this test is not valid anymore */
   2647     /*genericLocaleStarter("ru", test, 3);*/
   2648 
   2649     genericLocaleStarter("root", test, 3);
   2650     genericRulesStarter("&\\u0410 = \\u0410", test, 3);
   2651     genericRulesStarter("&Z < \\u0410", test, 3);
   2652     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
   2653     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
   2654     genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
   2655     genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
   2656 }
   2657 
   2658 static void TestSuppressContractions(void) {
   2659 
   2660   static const char *testNoCont2[] = {
   2661       "\\u0410\\u0302a",
   2662       "\\u0410\\u0306b",
   2663       "\\u0410c"
   2664   };
   2665   static const char *testNoCont[] = {
   2666       "a\\u0410",
   2667       "A\\u0410\\u0306",
   2668       "\\uFF21\\u0410\\u0302"
   2669   };
   2670 
   2671   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
   2672   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
   2673 }
   2674 
   2675 static void TestContraction(void) {
   2676     const static char *testrules[] = {
   2677         "&A = AB / B",
   2678         "&A = A\\u0306/\\u0306",
   2679         "&c = ch / h"
   2680     };
   2681     const static UChar testdata[][2] = {
   2682         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
   2683         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
   2684         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
   2685     };
   2686     const static UChar testdata2[][2] = {
   2687         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
   2688         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
   2689         {0x0063 /* 'c' */, 0x006C /* 'l' */}
   2690     };
   2691     const static char *testrules3[] = {
   2692         "&z < xyz &xyzw << B",
   2693         "&z < xyz &xyz << B / w",
   2694         "&z < ch &achm << B",
   2695         "&z < ch &a << B / chm",
   2696         "&\\ud800\\udc00w << B",
   2697         "&\\ud800\\udc00 << B / w",
   2698         "&a\\ud800\\udc00m << B",
   2699         "&a << B / \\ud800\\udc00m",
   2700     };
   2701 
   2702     UErrorCode  status   = U_ZERO_ERROR;
   2703     UCollator  *coll;
   2704     UChar       rule[256] = {0};
   2705     uint32_t    rlen     = 0;
   2706     int         i;
   2707 
   2708     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
   2709         UCollationElements *iter1;
   2710         int j = 0;
   2711         log_verbose("Rule %s for testing\n", testrules[i]);
   2712         rlen = u_unescape(testrules[i], rule, 32);
   2713         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2714         if (U_FAILURE(status)) {
   2715             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   2716             return;
   2717         }
   2718         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
   2719         if (U_FAILURE(status)) {
   2720             log_err("Collation iterator creation failed\n");
   2721             return;
   2722         }
   2723         while (j < 2) {
   2724             UCollationElements *iter2 = ucol_openElements(coll,
   2725                                                          &(testdata[i][j]),
   2726                                                          1, &status);
   2727             uint32_t ce;
   2728             if (U_FAILURE(status)) {
   2729                 log_err("Collation iterator creation failed\n");
   2730                 return;
   2731             }
   2732             ce = ucol_next(iter2, &status);
   2733             while (ce != UCOL_NULLORDER) {
   2734                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
   2735                     log_err("Collation elements in contraction split does not match\n");
   2736                     return;
   2737                 }
   2738                 ce = ucol_next(iter2, &status);
   2739             }
   2740             j ++;
   2741             ucol_closeElements(iter2);
   2742         }
   2743         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
   2744             log_err("Collation elements not exhausted\n");
   2745             return;
   2746         }
   2747         ucol_closeElements(iter1);
   2748         ucol_close(coll);
   2749     }
   2750 
   2751     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
   2752     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2753     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
   2754         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   2755                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
   2756                 testdata2[1][1]);
   2757         return;
   2758     }
   2759     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
   2760         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   2761                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
   2762                 testdata2[2][1]);
   2763         return;
   2764     }
   2765     ucol_close(coll);
   2766 
   2767     for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
   2768         UCollator          *coll1,
   2769                            *coll2;
   2770         UCollationElements *iter1,
   2771                            *iter2;
   2772         UChar               ch = 0x0042 /* 'B' */;
   2773         uint32_t            ce;
   2774         rlen = u_unescape(testrules3[i], rule, 32);
   2775         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2776         rlen = u_unescape(testrules3[i + 1], rule, 32);
   2777         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2778         if (U_FAILURE(status)) {
   2779             log_err("Collator creation failed %s\n", testrules[i]);
   2780             return;
   2781         }
   2782         iter1 = ucol_openElements(coll1, &ch, 1, &status);
   2783         iter2 = ucol_openElements(coll2, &ch, 1, &status);
   2784         if (U_FAILURE(status)) {
   2785             log_err("Collation iterator creation failed\n");
   2786             return;
   2787         }
   2788         ce = ucol_next(iter1, &status);
   2789         if (U_FAILURE(status)) {
   2790             log_err("Retrieving ces failed\n");
   2791             return;
   2792         }
   2793         while (ce != UCOL_NULLORDER) {
   2794             if (ce != (uint32_t)ucol_next(iter2, &status)) {
   2795                 log_err("CEs does not match\n");
   2796                 return;
   2797             }
   2798             ce = ucol_next(iter1, &status);
   2799             if (U_FAILURE(status)) {
   2800                 log_err("Retrieving ces failed\n");
   2801                 return;
   2802             }
   2803         }
   2804         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
   2805             log_err("CEs not exhausted\n");
   2806             return;
   2807         }
   2808         ucol_closeElements(iter1);
   2809         ucol_closeElements(iter2);
   2810         ucol_close(coll1);
   2811         ucol_close(coll2);
   2812     }
   2813 }
   2814 
   2815 static void TestExpansion(void) {
   2816     const static char *testrules[] = {
   2817         "&J << K / B & K << M",
   2818         "&J << K / B << M"
   2819     };
   2820     const static UChar testdata[][3] = {
   2821         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
   2822         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
   2823         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
   2824         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
   2825         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
   2826         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
   2827     };
   2828 
   2829     UErrorCode  status   = U_ZERO_ERROR;
   2830     UCollator  *coll;
   2831     UChar       rule[256] = {0};
   2832     uint32_t    rlen     = 0;
   2833     int         i;
   2834 
   2835     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
   2836         int j = 0;
   2837         log_verbose("Rule %s for testing\n", testrules[i]);
   2838         rlen = u_unescape(testrules[i], rule, 32);
   2839         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2840         if (U_FAILURE(status)) {
   2841             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   2842             return;
   2843         }
   2844 
   2845         for (j = 0; j < 5; j ++) {
   2846             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
   2847         }
   2848         ucol_close(coll);
   2849     }
   2850 }
   2851 
   2852 #if 0
   2853 /* this test tests the current limitations of the engine */
   2854 /* it always fail, so it is disabled by default */
   2855 static void TestLimitations(void) {
   2856   /* recursive expansions */
   2857   {
   2858     static const char *rule = "&a=b/c&d=c/e";
   2859     static const char *tlimit01[] = {"add","b","adf"};
   2860     static const char *tlimit02[] = {"aa","b","af"};
   2861     log_verbose("recursive expansions\n");
   2862     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
   2863     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
   2864   }
   2865   /* contractions spanning expansions */
   2866   {
   2867     static const char *rule = "&a<<<c/e&g<<<eh";
   2868     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
   2869     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
   2870     log_verbose("contractions spanning expansions\n");
   2871     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
   2872     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
   2873   }
   2874   /* normalization: nulls in contractions */
   2875   {
   2876     static const char *rule = "&a<<<\\u0000\\u0302";
   2877     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   2878     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   2879     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   2880     static const UColAttributeValue valOn[] = { UCOL_ON };
   2881     static const UColAttributeValue valOff[] = { UCOL_OFF };
   2882 
   2883     log_verbose("NULL in contractions\n");
   2884     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   2885     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   2886     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   2887     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   2888 
   2889   }
   2890   /* normalization: contractions spanning normalization */
   2891   {
   2892     static const char *rule = "&a<<<\\u0000\\u0302";
   2893     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   2894     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   2895     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   2896     static const UColAttributeValue valOn[] = { UCOL_ON };
   2897     static const UColAttributeValue valOff[] = { UCOL_OFF };
   2898 
   2899     log_verbose("contractions spanning normalization\n");
   2900     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   2901     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   2902     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   2903     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   2904 
   2905   }
   2906   /* variable top:  */
   2907   {
   2908     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
   2909     static const char *rule = "&\\u2010<x<[variable top]=z";
   2910     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
   2911     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
   2912     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
   2913     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
   2914     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
   2915     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
   2916     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
   2917 
   2918     log_verbose("variable top\n");
   2919     genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2920     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2921     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2922     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
   2923     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
   2924 
   2925   }
   2926   /* case level */
   2927   {
   2928     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
   2929     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
   2930     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
   2931     static const UColAttribute att[] = { UCOL_CASE_FIRST};
   2932     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
   2933     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
   2934     log_verbose("case level\n");
   2935     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2936     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2937     /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
   2938     /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
   2939   }
   2940 
   2941 }
   2942 #endif
   2943 
   2944 static void TestBocsuCoverage(void) {
   2945   UErrorCode status = U_ZERO_ERROR;
   2946   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
   2947   UChar       test[256] = {0};
   2948   uint32_t    tlen     = u_unescape(testString, test, 32);
   2949   uint8_t key[256]     = {0};
   2950   uint32_t klen         = 0;
   2951 
   2952   UCollator *coll = ucol_open("", &status);
   2953   if(U_SUCCESS(status)) {
   2954   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
   2955 
   2956   klen = ucol_getSortKey(coll, test, tlen, key, 256);
   2957 
   2958   ucol_close(coll);
   2959   } else {
   2960     log_data_err("Couldn't open UCA\n");
   2961   }
   2962 }
   2963 
   2964 static void TestVariableTopSetting(void) {
   2965   UErrorCode status = U_ZERO_ERROR;
   2966   const UChar *current = NULL;
   2967   uint32_t varTopOriginal = 0, varTop1, varTop2;
   2968   UCollator *coll = ucol_open("", &status);
   2969   if(U_SUCCESS(status)) {
   2970 
   2971   uint32_t strength = 0;
   2972   uint16_t specs = 0;
   2973   uint32_t chOffset = 0;
   2974   uint32_t chLen = 0;
   2975   uint32_t exOffset = 0;
   2976   uint32_t exLen = 0;
   2977   uint32_t oldChOffset = 0;
   2978   uint32_t oldChLen = 0;
   2979   uint32_t oldExOffset = 0;
   2980   uint32_t oldExLen = 0;
   2981   uint32_t prefixOffset = 0;
   2982   uint32_t prefixLen = 0;
   2983 
   2984   UBool startOfRules = TRUE;
   2985   UColTokenParser src;
   2986   UColOptionSet opts;
   2987 
   2988   UChar *rulesCopy = NULL;
   2989   uint32_t rulesLen;
   2990 
   2991   UCollationResult result;
   2992 
   2993   UChar first[256] = { 0 };
   2994   UChar second[256] = { 0 };
   2995   UParseError parseError;
   2996   int32_t myQ = getTestOption(QUICK_OPTION);
   2997 
   2998   uprv_memset(&src, 0, sizeof(UColTokenParser));
   2999 
   3000   src.opts = &opts;
   3001 
   3002   if(getTestOption(QUICK_OPTION) <= 0) {
   3003     setTestOption(QUICK_OPTION, 1);
   3004   }
   3005 
   3006   /* this test will fail when normalization is turned on */
   3007   /* therefore we always turn off exhaustive mode for it */
   3008   { /* QUICK > 0*/
   3009     log_verbose("Slide variable top over UCARules\n");
   3010     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
   3011     rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
   3012     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
   3013 
   3014     if(U_SUCCESS(status) && rulesLen > 0) {
   3015       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   3016       src.current = src.source = rulesCopy;
   3017       src.end = rulesCopy+rulesLen;
   3018       src.extraCurrent = src.end;
   3019       src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   3020 
   3021 	  /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
   3022 	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
   3023       while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
   3024         strength = src.parsedToken.strength;
   3025         chOffset = src.parsedToken.charsOffset;
   3026         chLen = src.parsedToken.charsLen;
   3027         exOffset = src.parsedToken.extensionOffset;
   3028         exLen = src.parsedToken.extensionLen;
   3029         prefixOffset = src.parsedToken.prefixOffset;
   3030         prefixLen = src.parsedToken.prefixLen;
   3031         specs = src.parsedToken.flags;
   3032 
   3033         startOfRules = FALSE;
   3034         {
   3035           log_verbose("%04X %d ", *(src.source+chOffset), chLen);
   3036         }
   3037         if(strength == UCOL_PRIMARY) {
   3038           status = U_ZERO_ERROR;
   3039           varTopOriginal = ucol_getVariableTop(coll, &status);
   3040           varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
   3041           if(U_FAILURE(status)) {
   3042             char buffer[256];
   3043             char *buf = buffer;
   3044             uint32_t i = 0, j;
   3045             uint32_t CE = UCOL_NO_MORE_CES;
   3046 
   3047             /* before we start screaming, let's see if there is a problem with the rules */
   3048             UErrorCode collIterateStatus = U_ZERO_ERROR;
   3049             collIterate *s = uprv_new_collIterate(&collIterateStatus);
   3050             uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
   3051 
   3052             CE = ucol_getNextCE(coll, s, &status);
   3053 
   3054             for(i = 0; i < oldChLen; i++) {
   3055               j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
   3056               buf += j;
   3057             }
   3058             if(status == U_PRIMARY_TOO_LONG_ERROR) {
   3059               log_verbose("= Expected failure for %s =", buffer);
   3060             } else {
   3061               if(uprv_collIterateAtEnd(s)) {
   3062                 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
   3063                   oldChOffset, u_errorName(status), buffer);
   3064               } else {
   3065                 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
   3066                   buffer);
   3067               }
   3068             }
   3069             uprv_delete_collIterate(s);
   3070           }
   3071           varTop2 = ucol_getVariableTop(coll, &status);
   3072           if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
   3073             log_err("cannot retrieve set varTop value!\n");
   3074             continue;
   3075           }
   3076 
   3077           if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
   3078 
   3079             u_strncpy(first, src.source+oldChOffset, oldChLen);
   3080             u_strncpy(first+oldChLen, src.source+chOffset, chLen);
   3081             u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
   3082             first[2*oldChLen+chLen] = 0;
   3083 
   3084             if(oldExLen == 0) {
   3085               u_strncpy(second, src.source+chOffset, chLen);
   3086               second[chLen] = 0;
   3087             } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
   3088               u_strncpy(second, src.source+oldExOffset, oldExLen);
   3089               u_strncpy(second+oldChLen, src.source+chOffset, chLen);
   3090               u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
   3091               second[2*oldExLen+chLen] = 0;
   3092             }
   3093             result = ucol_strcoll(coll, first, -1, second, -1);
   3094             if(result == UCOL_EQUAL) {
   3095               doTest(coll, first, second, UCOL_EQUAL);
   3096             } else {
   3097               log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
   3098             }
   3099           }
   3100         }
   3101         if(strength != UCOL_TOK_RESET) {
   3102           oldChOffset = chOffset;
   3103           oldChLen = chLen;
   3104           oldExOffset = exOffset;
   3105           oldExLen = exLen;
   3106         }
   3107       }
   3108       status = U_ZERO_ERROR;
   3109     }
   3110     else {
   3111       log_err("Unexpected failure getting rules %s\n", u_errorName(status));
   3112       return;
   3113     }
   3114     if (U_FAILURE(status)) {
   3115         log_err("Error parsing rules %s\n", u_errorName(status));
   3116         return;
   3117     }
   3118     status = U_ZERO_ERROR;
   3119   }
   3120 
   3121   setTestOption(QUICK_OPTION, myQ);
   3122 
   3123   log_verbose("Testing setting variable top to contractions\n");
   3124   {
   3125     UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
   3126     int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth;
   3127     while(*conts != 0) {
   3128       /*
   3129        * A continuation is NUL-terminated and NUL-padded
   3130        * except if it has the maximum length.
   3131        */
   3132       int32_t contractionLength = maxUCAContractionLength;
   3133       while(contractionLength > 0 && conts[contractionLength - 1] == 0) {
   3134         --contractionLength;
   3135       }
   3136       if(*(conts+1)==0) { /* pre-context */
   3137         varTop1 = ucol_setVariableTop(coll, conts, 1, &status);
   3138       } else {
   3139         varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status);
   3140       }
   3141       if(U_FAILURE(status)) {
   3142         if(status == U_PRIMARY_TOO_LONG_ERROR) {
   3143           /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
   3144            * therefore it is not an error when it complains about them. */
   3145           log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
   3146                       *conts, *(conts+1), *(conts+2));
   3147         } else {
   3148           log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
   3149                   *conts, *(conts+1), *(conts+2), u_errorName(status));
   3150         }
   3151         status = U_ZERO_ERROR;
   3152       }
   3153       conts+=maxUCAContractionLength;
   3154     }
   3155 
   3156     status = U_ZERO_ERROR;
   3157 
   3158     first[0] = 0x0040;
   3159     first[1] = 0x0050;
   3160     first[2] = 0x0000;
   3161 
   3162     ucol_setVariableTop(coll, first, -1, &status);
   3163 
   3164     if(U_SUCCESS(status)) {
   3165       log_err("Invalid contraction succeded in setting variable top!\n");
   3166     }
   3167 
   3168   }
   3169 
   3170   log_verbose("Test restoring variable top\n");
   3171 
   3172   status = U_ZERO_ERROR;
   3173   ucol_restoreVariableTop(coll, varTopOriginal, &status);
   3174   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
   3175     log_err("Couldn't restore old variable top\n");
   3176   }
   3177 
   3178   log_verbose("Testing calling with error set\n");
   3179 
   3180   status = U_INTERNAL_PROGRAM_ERROR;
   3181   varTop1 = ucol_setVariableTop(coll, first, 1, &status);
   3182   varTop2 = ucol_getVariableTop(coll, &status);
   3183   ucol_restoreVariableTop(coll, varTop2, &status);
   3184   varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
   3185   varTop2 = ucol_getVariableTop(NULL, &status);
   3186   ucol_restoreVariableTop(NULL, varTop2, &status);
   3187   if(status != U_INTERNAL_PROGRAM_ERROR) {
   3188     log_err("Bad reaction to passed error!\n");
   3189   }
   3190   uprv_free(src.source);
   3191   ucol_close(coll);
   3192   } else {
   3193     log_data_err("Couldn't open UCA collator\n");
   3194   }
   3195 
   3196 }
   3197 
   3198 static void TestNonChars(void) {
   3199   static const char *test[] = {
   3200       "\\u0000",  /* ignorable */
   3201       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
   3202       "\\uFDD0", "\\uFDEF",
   3203       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
   3204       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
   3205       "\\U0003FFFE", "\\U0003FFFF",
   3206       "\\U0004FFFE", "\\U0004FFFF",
   3207       "\\U0005FFFE", "\\U0005FFFF",
   3208       "\\U0006FFFE", "\\U0006FFFF",
   3209       "\\U0007FFFE", "\\U0007FFFF",
   3210       "\\U0008FFFE", "\\U0008FFFF",
   3211       "\\U0009FFFE", "\\U0009FFFF",
   3212       "\\U000AFFFE", "\\U000AFFFF",
   3213       "\\U000BFFFE", "\\U000BFFFF",
   3214       "\\U000CFFFE", "\\U000CFFFF",
   3215       "\\U000DFFFE", "\\U000DFFFF",
   3216       "\\U000EFFFE", "\\U000EFFFF",
   3217       "\\U000FFFFE", "\\U000FFFFF",
   3218       "\\U0010FFFE", "\\U0010FFFF",
   3219       "\\uFFFF"  /* special character with maximum primary weight */
   3220   };
   3221   UErrorCode status = U_ZERO_ERROR;
   3222   UCollator *coll = ucol_open("en_US", &status);
   3223 
   3224   log_verbose("Test non characters\n");
   3225 
   3226   if(U_SUCCESS(status)) {
   3227     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
   3228   } else {
   3229     log_err_status(status, "Unable to open collator\n");
   3230   }
   3231 
   3232   ucol_close(coll);
   3233 }
   3234 
   3235 static void TestExtremeCompression(void) {
   3236   static char *test[4];
   3237   int32_t j = 0, i = 0;
   3238 
   3239   for(i = 0; i<4; i++) {
   3240     test[i] = (char *)malloc(2048*sizeof(char));
   3241   }
   3242 
   3243   for(j = 20; j < 500; j++) {
   3244     for(i = 0; i<4; i++) {
   3245       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   3246       test[i][j-1] = (char)('a'+i);
   3247       test[i][j] = 0;
   3248     }
   3249     genericLocaleStarter("en_US", (const char **)test, 4);
   3250   }
   3251 
   3252 
   3253   for(i = 0; i<4; i++) {
   3254     free(test[i]);
   3255   }
   3256 }
   3257 
   3258 #if 0
   3259 static void TestExtremeCompression(void) {
   3260   static char *test[4];
   3261   int32_t j = 0, i = 0;
   3262   UErrorCode status = U_ZERO_ERROR;
   3263   UCollator *coll = ucol_open("en_US", status);
   3264   for(i = 0; i<4; i++) {
   3265     test[i] = (char *)malloc(2048*sizeof(char));
   3266   }
   3267   for(j = 10; j < 2048; j++) {
   3268     for(i = 0; i<4; i++) {
   3269       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
   3270       test[i][j-1] = (char)('a'+i);
   3271       test[i][j] = 0;
   3272     }
   3273   }
   3274   genericLocaleStarter("en_US", (const char **)test, 4);
   3275 
   3276   for(j = 10; j < 2048; j++) {
   3277     for(i = 0; i<1; i++) {
   3278       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   3279       test[i][j] = 0;
   3280     }
   3281   }
   3282   for(i = 0; i<4; i++) {
   3283     free(test[i]);
   3284   }
   3285 }
   3286 #endif
   3287 
   3288 static void TestSurrogates(void) {
   3289   static const char *test[] = {
   3290     "z","\\ud900\\udc25",  "\\ud805\\udc50",
   3291        "\\ud800\\udc00y",  "\\ud800\\udc00r",
   3292        "\\ud800\\udc00f",  "\\ud800\\udc00",
   3293        "\\ud800\\udc00c", "\\ud800\\udc00b",
   3294        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
   3295        "\\ud800\\udc00a",
   3296        "c", "b"
   3297   };
   3298 
   3299   static const char *rule =
   3300     "&z < \\ud900\\udc25   < \\ud805\\udc50"
   3301        "< \\ud800\\udc00y  < \\ud800\\udc00r"
   3302        "< \\ud800\\udc00f  << \\ud800\\udc00"
   3303        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
   3304        "< \\ud800\\udc00a  < c < b" ;
   3305 
   3306   genericRulesStarter(rule, test, 14);
   3307 }
   3308 
   3309 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
   3310 static void TestPrefix(void) {
   3311   uint32_t i;
   3312 
   3313   static const struct {
   3314     const char *rules;
   3315     const char *data[50];
   3316     const uint32_t len;
   3317   } tests[] = {
   3318     { "&z <<< z|a",
   3319       {"zz", "za"}, 2 },
   3320 
   3321     { "&z <<< z|   a",
   3322       {"zz", "za"}, 2 },
   3323     { "[strength I]"
   3324       "&a=\\ud900\\udc25"
   3325       "&z<<<\\ud900\\udc25|a",
   3326       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
   3327   };
   3328 
   3329 
   3330   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3331     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3332   }
   3333 }
   3334 
   3335 /* This test uses data suplied by Masashiko Maedera to test the implementation */
   3336 /* JIS X 4061 collation order implementation                                   */
   3337 static void TestNewJapanese(void) {
   3338 
   3339   static const char * const test1[] = {
   3340       "\\u30b7\\u30e3\\u30fc\\u30ec",
   3341       "\\u30b7\\u30e3\\u30a4",
   3342       "\\u30b7\\u30e4\\u30a3",
   3343       "\\u30b7\\u30e3\\u30ec",
   3344       "\\u3061\\u3087\\u3053",
   3345       "\\u3061\\u3088\\u3053",
   3346       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
   3347       "\\u3066\\u30fc\\u305f",
   3348       "\\u30c6\\u30fc\\u30bf",
   3349       "\\u30c6\\u30a7\\u30bf",
   3350       "\\u3066\\u3048\\u305f",
   3351       "\\u3067\\u30fc\\u305f",
   3352       "\\u30c7\\u30fc\\u30bf",
   3353       "\\u30c7\\u30a7\\u30bf",
   3354       "\\u3067\\u3048\\u305f",
   3355       "\\u3066\\u30fc\\u305f\\u30fc",
   3356       "\\u30c6\\u30fc\\u30bf\\u30a1",
   3357       "\\u30c6\\u30a7\\u30bf\\u30fc",
   3358       "\\u3066\\u3047\\u305f\\u3041",
   3359       "\\u3066\\u3048\\u305f\\u30fc",
   3360       "\\u3067\\u30fc\\u305f\\u30fc",
   3361       "\\u30c7\\u30fc\\u30bf\\u30a1",
   3362       "\\u3067\\u30a7\\u305f\\u30a1",
   3363       "\\u30c7\\u3047\\u30bf\\u3041",
   3364       "\\u30c7\\u30a8\\u30bf\\u30a2",
   3365       "\\u3072\\u3086",
   3366       "\\u3073\\u3085\\u3042",
   3367       "\\u3074\\u3085\\u3042",
   3368       "\\u3073\\u3085\\u3042\\u30fc",
   3369       "\\u30d3\\u30e5\\u30a2\\u30fc",
   3370       "\\u3074\\u3085\\u3042\\u30fc",
   3371       "\\u30d4\\u30e5\\u30a2\\u30fc",
   3372       "\\u30d2\\u30e5\\u30a6",
   3373       "\\u30d2\\u30e6\\u30a6",
   3374       "\\u30d4\\u30e5\\u30a6\\u30a2",
   3375       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
   3376       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
   3377       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
   3378       "\\u3072\\u3085\\u3093",
   3379       "\\u3074\\u3085\\u3093",
   3380       "\\u3075\\u30fc\\u308a",
   3381       "\\u30d5\\u30fc\\u30ea",
   3382       "\\u3075\\u3045\\u308a",
   3383       "\\u3075\\u30a5\\u308a",
   3384       "\\u3075\\u30a5\\u30ea",
   3385       "\\u30d5\\u30a6\\u30ea",
   3386       "\\u3076\\u30fc\\u308a",
   3387       "\\u30d6\\u30fc\\u30ea",
   3388       "\\u3076\\u3045\\u308a",
   3389       "\\u30d6\\u30a5\\u308a",
   3390       "\\u3077\\u3046\\u308a",
   3391       "\\u30d7\\u30a6\\u30ea",
   3392       "\\u3075\\u30fc\\u308a\\u30fc",
   3393       "\\u30d5\\u30a5\\u30ea\\u30fc",
   3394       "\\u3075\\u30a5\\u308a\\u30a3",
   3395       "\\u30d5\\u3045\\u308a\\u3043",
   3396       "\\u30d5\\u30a6\\u30ea\\u30fc",
   3397       "\\u3075\\u3046\\u308a\\u3043",
   3398       "\\u30d6\\u30a6\\u30ea\\u30a4",
   3399       "\\u3077\\u30fc\\u308a\\u30fc",
   3400       "\\u3077\\u30a5\\u308a\\u30a4",
   3401       "\\u3077\\u3046\\u308a\\u30fc",
   3402       "\\u30d7\\u30a6\\u30ea\\u30a4",
   3403       "\\u30d5\\u30fd",
   3404       "\\u3075\\u309e",
   3405       "\\u3076\\u309d",
   3406       "\\u3076\\u3075",
   3407       "\\u3076\\u30d5",
   3408       "\\u30d6\\u3075",
   3409       "\\u30d6\\u30d5",
   3410       "\\u3076\\u309e",
   3411       "\\u3076\\u3077",
   3412       "\\u30d6\\u3077",
   3413       "\\u3077\\u309d",
   3414       "\\u30d7\\u30fd",
   3415       "\\u3077\\u3075",
   3416 };
   3417 
   3418   static const char *test2[] = {
   3419     "\\u306f\\u309d", /* H\\u309d */
   3420     "\\u30cf\\u30fd", /* K\\u30fd */
   3421     "\\u306f\\u306f", /* HH */
   3422     "\\u306f\\u30cf", /* HK */
   3423     "\\u30cf\\u30cf", /* KK */
   3424     "\\u306f\\u309e", /* H\\u309e */
   3425     "\\u30cf\\u30fe", /* K\\u30fe */
   3426     "\\u306f\\u3070", /* HH\\u309b */
   3427     "\\u30cf\\u30d0", /* KK\\u309b */
   3428     "\\u306f\\u3071", /* HH\\u309c */
   3429     "\\u30cf\\u3071", /* KH\\u309c */
   3430     "\\u30cf\\u30d1", /* KK\\u309c */
   3431     "\\u3070\\u309d", /* H\\u309b\\u309d */
   3432     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
   3433     "\\u3070\\u306f", /* H\\u309bH */
   3434     "\\u30d0\\u30cf", /* K\\u309bK */
   3435     "\\u3070\\u309e", /* H\\u309b\\u309e */
   3436     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
   3437     "\\u3070\\u3070", /* H\\u309bH\\u309b */
   3438     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
   3439     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
   3440     "\\u3070\\u3071", /* H\\u309bH\\u309c */
   3441     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
   3442     "\\u3071\\u309d", /* H\\u309c\\u309d */
   3443     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
   3444     "\\u3071\\u306f", /* H\\u309cH */
   3445     "\\u30d1\\u30cf", /* K\\u309cK */
   3446     "\\u3071\\u3070", /* H\\u309cH\\u309b */
   3447     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
   3448     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
   3449     "\\u3071\\u3071", /* H\\u309cH\\u309c */
   3450     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
   3451   };
   3452   /*
   3453   static const char *test3[] = {
   3454     "\\u221er\\u221e",
   3455     "\\u221eR#",
   3456     "\\u221et\\u221e",
   3457     "#r\\u221e",
   3458     "#R#",
   3459     "#t%",
   3460     "#T%",
   3461     "8t\\u221e",
   3462     "8T\\u221e",
   3463     "8t#",
   3464     "8T#",
   3465     "8t%",
   3466     "8T%",
   3467     "8t8",
   3468     "8T8",
   3469     "\\u03c9r\\u221e",
   3470     "\\u03a9R%",
   3471     "rr\\u221e",
   3472     "rR\\u221e",
   3473     "Rr\\u221e",
   3474     "RR\\u221e",
   3475     "RT%",
   3476     "rt8",
   3477     "tr\\u221e",
   3478     "tr8",
   3479     "TR8",
   3480     "tt8",
   3481     "\\u30b7\\u30e3\\u30fc\\u30ec",
   3482   };
   3483   */
   3484   static const UColAttribute att[] = { UCOL_STRENGTH };
   3485   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
   3486 
   3487   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
   3488   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
   3489 
   3490   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
   3491   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
   3492   /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
   3493   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
   3494   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
   3495 }
   3496 
   3497 static void TestStrCollIdenticalPrefix(void) {
   3498   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
   3499   const char* test[] = {
   3500     "ab\\ud9b0\\udc70",
   3501     "ab\\ud9b0\\udc71"
   3502   };
   3503   genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
   3504 }
   3505 /* Contractions should have all their canonically equivalent */
   3506 /* strings included */
   3507 static void TestContractionClosure(void) {
   3508   static const struct {
   3509     const char *rules;
   3510     const char *data[10];
   3511     const uint32_t len;
   3512   } tests[] = {
   3513     {   "&b=\\u00e4\\u00e4",
   3514       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
   3515     {   "&b=\\u00C5",
   3516       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
   3517   };
   3518   uint32_t i;
   3519 
   3520 
   3521   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3522     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
   3523   }
   3524 }
   3525 
   3526 /* This tests also fails*/
   3527 static void TestBeforePrefixFailure(void) {
   3528   static const struct {
   3529     const char *rules;
   3530     const char *data[10];
   3531     const uint32_t len;
   3532   } tests[] = {
   3533     { "&g <<< a"
   3534       "&[before 3]\\uff41 <<< x",
   3535       {"x", "\\uff41"}, 2 },
   3536     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3537         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   3538         "&[before 3]\\u30a7<<<\\u30a9",
   3539       {"\\u30a9", "\\u30a7"}, 2 },
   3540     {   "&[before 3]\\u30a7<<<\\u30a9"
   3541         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3542         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
   3543       {"\\u30a9", "\\u30a7"}, 2 },
   3544   };
   3545   uint32_t i;
   3546 
   3547 
   3548   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3549     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3550   }
   3551 
   3552 #if 0
   3553   const char* rule1 =
   3554         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3555         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   3556         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
   3557   const char* rule2 =
   3558         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
   3559         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3560         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
   3561   const char* test[] = {
   3562       "\\u30c6\\u30fc\\u30bf",
   3563       "\\u30c6\\u30a7\\u30bf",
   3564   };
   3565   genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
   3566   genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
   3567 /* this piece of code should be in some sort of verbose mode     */
   3568 /* it gets the collation elements for elements and prints them   */
   3569 /* This is useful when trying to see whether the problem is      */
   3570   {
   3571     UErrorCode status = U_ZERO_ERROR;
   3572     uint32_t i = 0;
   3573     UCollationElements *it = NULL;
   3574     uint32_t CE;
   3575     UChar string[256];
   3576     uint32_t uStringLen;
   3577     UCollator *coll = NULL;
   3578 
   3579     uStringLen = u_unescape(rule1, string, 256);
   3580 
   3581     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   3582 
   3583     /*coll = ucol_open("ja_JP_JIS", &status);*/
   3584     it = ucol_openElements(coll, string, 0, &status);
   3585 
   3586     for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
   3587       log_verbose("%s\n", test[i]);
   3588       uStringLen = u_unescape(test[i], string, 256);
   3589       ucol_setText(it, string, uStringLen, &status);
   3590 
   3591       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
   3592         log_verbose("%08X\n", CE);
   3593       }
   3594       log_verbose("\n");
   3595 
   3596     }
   3597 
   3598     ucol_closeElements(it);
   3599     ucol_close(coll);
   3600   }
   3601 #endif
   3602 }
   3603 
   3604 static void TestPrefixCompose(void) {
   3605   const char* rule1 =
   3606         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
   3607   /*
   3608   const char* test[] = {
   3609       "\\u30c6\\u30fc\\u30bf",
   3610       "\\u30c6\\u30a7\\u30bf",
   3611   };
   3612   */
   3613   {
   3614     UErrorCode status = U_ZERO_ERROR;
   3615     /*uint32_t i = 0;*/
   3616     /*UCollationElements *it = NULL;*/
   3617 /*    uint32_t CE;*/
   3618     UChar string[256];
   3619     uint32_t uStringLen;
   3620     UCollator *coll = NULL;
   3621 
   3622     uStringLen = u_unescape(rule1, string, 256);
   3623 
   3624     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   3625     ucol_close(coll);
   3626   }
   3627 
   3628 
   3629 }
   3630 
   3631 /*
   3632 [last variable] last variable value
   3633 [last primary ignorable] largest CE for primary ignorable
   3634 [last secondary ignorable] largest CE for secondary ignorable
   3635 [last tertiary ignorable] largest CE for tertiary ignorable
   3636 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
   3637 */
   3638 
   3639 static void TestRuleOptions(void) {
   3640   /* values here are hardcoded and are correct for the current UCA
   3641    * when the UCA changes, one might be forced to change these
   3642    * values.
   3643    */
   3644 
   3645   /*
   3646    * These strings contain the last character before [variable top]
   3647    * and the first and second characters (by primary weights) after it.
   3648    * See FractionalUCA.txt. For example:
   3649       [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
   3650       [variable top = 0C FE]
   3651       [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
   3652      and
   3653       00B4; [0D 0C, 05, 05]
   3654    *
   3655    * Note: Starting with UCA 6.0, the [variable top] collation element
   3656    * is not the weight of any character or string,
   3657    * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
   3658    */
   3659 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
   3660 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
   3661 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
   3662 
   3663   /*
   3664    * This string has to match the character that has the [last regular] weight
   3665    * which changes with each UCA version.
   3666    * See the bottom of FractionalUCA.txt which says something like
   3667       [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
   3668    *
   3669    * Note: Starting with UCA 6.0, the [last regular] collation element
   3670    * is not the weight of any character or string,
   3671    * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
   3672    */
   3673 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
   3674 
   3675   static const struct {
   3676     const char *rules;
   3677     const char *data[10];
   3678     const uint32_t len;
   3679   } tests[] = {
   3680     /* - all befores here amount to zero */
   3681     { "&[before 3][first tertiary ignorable]<<<a",
   3682         { "\\u0000", "a"}, 2
   3683     }, /* you cannot go before first tertiary ignorable */
   3684 
   3685     { "&[before 3][last tertiary ignorable]<<<a",
   3686         { "\\u0000", "a"}, 2
   3687     }, /* you cannot go before last tertiary ignorable */
   3688 
   3689     { "&[before 3][first secondary ignorable]<<<a",
   3690         { "\\u0000", "a"}, 2
   3691     }, /* you cannot go before first secondary ignorable */
   3692 
   3693     { "&[before 3][last secondary ignorable]<<<a",
   3694         { "\\u0000", "a"}, 2
   3695     }, /* you cannot go before first secondary ignorable */
   3696 
   3697     /* 'normal' befores */
   3698 
   3699     { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
   3700         {  "c", "b", "\\u0332", "a" }, 4
   3701     },
   3702 
   3703     /* we don't have a code point that corresponds to
   3704      * the last primary ignorable
   3705      */
   3706     { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
   3707         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
   3708     },
   3709 
   3710     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
   3711         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
   3712     },
   3713 
   3714     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
   3715         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
   3716     },
   3717 
   3718     { "&[first regular]<a"
   3719       "&[before 1][first regular]<b",
   3720       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
   3721     },
   3722 
   3723     { "&[before 1][last regular]<b"
   3724       "&[last regular]<a",
   3725         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
   3726     },
   3727 
   3728     { "&[before 1][first implicit]<b"
   3729       "&[first implicit]<a",
   3730         { "b", "\\u4e00", "a", "\\u4e01"}, 4
   3731     },
   3732 
   3733     { "&[before 1][last implicit]<b"
   3734       "&[last implicit]<a",
   3735         { "b", "\\U0010FFFD", "a" }, 3
   3736     },
   3737 
   3738     { "&[last variable]<z"
   3739       "&[last primary ignorable]<x"
   3740       "&[last secondary ignorable]<<y"
   3741       "&[last tertiary ignorable]<<<w"
   3742       "&[top]<u",
   3743       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
   3744     }
   3745 
   3746   };
   3747   uint32_t i;
   3748 
   3749   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3750     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3751   }
   3752 }
   3753 
   3754 
   3755 static void TestOptimize(void) {
   3756   /* this is not really a test - just trying out
   3757    * whether copying of UCA contents will fail
   3758    * Cannot really test, since the functionality
   3759    * remains the same.
   3760    */
   3761   static const struct {
   3762     const char *rules;
   3763     const char *data[10];
   3764     const uint32_t len;
   3765   } tests[] = {
   3766     /* - all befores here amount to zero */
   3767     { "[optimize [\\uAC00-\\uD7FF]]",
   3768     { "a", "b"}, 2}
   3769   };
   3770   uint32_t i;
   3771 
   3772   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3773     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3774   }
   3775 }
   3776 
   3777 /*
   3778 cycheng (at) ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
   3779 weiv    ucol_strcollIter?
   3780 cycheng (at) ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
   3781 weiv    these are the input strings?
   3782 cycheng (at) ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
   3783 weiv    will check - could be a problem with utf-8 iterator
   3784 cycheng (at) ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
   3785 weiv    hmmm
   3786 cycheng (at) ca.ibm.c... note that we have a standalone high surrogate
   3787 weiv    that doesn't sound right
   3788 cycheng (at) ca.ibm.c... we got the same inconsistent results on AIX and Win2000
   3789 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
   3790 cycheng (at) ca.ibm.c... yes
   3791 weiv    and then do the comparison
   3792 cycheng (at) ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
   3793 weiv    utf-16 strings look like a little endian ones in the example you sent me
   3794 weiv    It could be a bug - let me try to test it out
   3795 cycheng (at) ca.ibm.c... ok
   3796 cycheng (at) ca.ibm.c... we can wait till the conf. call
   3797 cycheng (at) ca.ibm.c... next weke
   3798 weiv    that would be great
   3799 weiv    hmmm
   3800 weiv    I might be wrong
   3801 weiv    let me play with it some more
   3802 cycheng (at) ca.ibm.c... ok
   3803 cycheng (at) ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
   3804 cycheng (at) ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
   3805 cycheng (at) ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
   3806 weiv    ok
   3807 cycheng (at) ca.ibm.c... i ask sherman to send you more inconsistent data
   3808 weiv    thanks
   3809 cycheng (at) ca.ibm.c... the 4 strings we sent are just samples
   3810 */
   3811 #if 0
   3812 static void Alexis(void) {
   3813   UErrorCode status = U_ZERO_ERROR;
   3814   UCollator *coll = ucol_open("", &status);
   3815 
   3816 
   3817   const char utf16be[2][4] = {
   3818     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
   3819     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
   3820   };
   3821 
   3822   const char utf8[2][4] = {
   3823     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
   3824     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
   3825   };
   3826 
   3827   UCharIterator iterU161, iterU162;
   3828   UCharIterator iterU81, iterU82;
   3829 
   3830   UCollationResult resU16, resU8;
   3831 
   3832   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
   3833   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
   3834 
   3835   uiter_setUTF8(&iterU81, utf8[0], 4);
   3836   uiter_setUTF8(&iterU82, utf8[1], 4);
   3837 
   3838   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3839 
   3840   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
   3841   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
   3842 
   3843 
   3844   if(resU16 != resU8) {
   3845     log_err("different results\n");
   3846   }
   3847 
   3848   ucol_close(coll);
   3849 }
   3850 #endif
   3851 
   3852 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
   3853 static void Alexis2(void) {
   3854   UErrorCode status = U_ZERO_ERROR;
   3855   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   3856   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   3857   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   3858   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
   3859 
   3860   UConverter *conv = NULL;
   3861 
   3862   UCharIterator U16BEItS, U16BEItT;
   3863   UCharIterator U8ItS, U8ItT;
   3864 
   3865   UCollationResult resU16, resU16BE, resU8;
   3866 
   3867   static const char* const pairs[][2] = {
   3868     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
   3869     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
   3870     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
   3871     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
   3872     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
   3873     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
   3874     { "\\u0020", "\\u0020\\u0000"}
   3875 /*
   3876 5F20 (my result here)
   3877 5F204E008E3F
   3878 5F20 (your result here)
   3879 */
   3880   };
   3881 
   3882   int32_t i = 0;
   3883 
   3884   UCollator *coll = ucol_open("", &status);
   3885   if(status == U_FILE_ACCESS_ERROR) {
   3886     log_data_err("Is your data around?\n");
   3887     return;
   3888   } else if(U_FAILURE(status)) {
   3889     log_err("Error opening collator\n");
   3890     return;
   3891   }
   3892   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3893   conv = ucnv_open("UTF16BE", &status);
   3894   for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
   3895     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   3896     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   3897 
   3898     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
   3899 
   3900     log_verbose("Result of strcoll is %i\n", resU16);
   3901 
   3902     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
   3903     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
   3904 
   3905     /* use the original sizes, as the result from converter is in bytes */
   3906     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
   3907     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
   3908 
   3909     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
   3910 
   3911     log_verbose("Result of U16BE is %i\n", resU16BE);
   3912 
   3913     if(resU16 != resU16BE) {
   3914       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
   3915     }
   3916 
   3917     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
   3918     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
   3919 
   3920     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
   3921     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
   3922 
   3923     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
   3924 
   3925     if(resU16 != resU8) {
   3926       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
   3927     }
   3928 
   3929   }
   3930 
   3931   ucol_close(coll);
   3932   ucnv_close(conv);
   3933 }
   3934 
   3935 static void TestHebrewUCA(void) {
   3936   UErrorCode status = U_ZERO_ERROR;
   3937   static const char *first[] = {
   3938     "d790d6b8d79cd795d6bcd7a9",
   3939     "d790d79cd79ed7a7d799d799d7a1",
   3940     "d790d6b4d79ed795d6bcd7a9",
   3941   };
   3942 
   3943   char utf8String[3][256];
   3944   UChar utf16String[3][256];
   3945 
   3946   int32_t i = 0, j = 0;
   3947   int32_t sizeUTF8[3];
   3948   int32_t sizeUTF16[3];
   3949 
   3950   UCollator *coll = ucol_open("", &status);
   3951   if (U_FAILURE(status)) {
   3952       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
   3953       return;
   3954   }
   3955   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
   3956 
   3957   for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
   3958     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
   3959     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
   3960     log_verbose("%i: ");
   3961     for(j = 0; j < sizeUTF16[i]; j++) {
   3962       /*log_verbose("\\u%04X", utf16String[i][j]);*/
   3963       log_verbose("%04X", utf16String[i][j]);
   3964     }
   3965     log_verbose("\n");
   3966   }
   3967   for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
   3968     for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
   3969       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
   3970     }
   3971   }
   3972 
   3973   ucol_close(coll);
   3974 
   3975 }
   3976 
   3977 static void TestPartialSortKeyTermination(void) {
   3978   static const char* cases[] = {
   3979     "\\u1234\\u1234\\udc00",
   3980     "\\udc00\\ud800\\ud800"
   3981   };
   3982 
   3983   int32_t i = sizeof(UCollator);
   3984 
   3985   UErrorCode status = U_ZERO_ERROR;
   3986 
   3987   UCollator *coll = ucol_open("", &status);
   3988 
   3989   UCharIterator iter;
   3990 
   3991   UChar currCase[256];
   3992   int32_t length = 0;
   3993   int32_t pKeyLen = 0;
   3994 
   3995   uint8_t key[256];
   3996 
   3997   for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
   3998     uint32_t state[2] = {0, 0};
   3999     length = u_unescape(cases[i], currCase, 256);
   4000     uiter_setString(&iter, currCase, length);
   4001     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
   4002 
   4003     log_verbose("Done\n");
   4004 
   4005   }
   4006   ucol_close(coll);
   4007 }
   4008 
   4009 static void TestSettings(void) {
   4010   static const char* cases[] = {
   4011     "apple",
   4012       "Apple"
   4013   };
   4014 
   4015   static const char* locales[] = {
   4016     "",
   4017       "en"
   4018   };
   4019 
   4020   UErrorCode status = U_ZERO_ERROR;
   4021 
   4022   int32_t i = 0, j = 0;
   4023 
   4024   UChar source[256], target[256];
   4025   int32_t sLen = 0, tLen = 0;
   4026 
   4027   UCollator *collateObject = NULL;
   4028   for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
   4029     collateObject = ucol_open(locales[i], &status);
   4030     ucol_setStrength(collateObject, UCOL_PRIMARY);
   4031     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
   4032     for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
   4033       sLen = u_unescape(cases[j-1], source, 256);
   4034       source[sLen] = 0;
   4035       tLen = u_unescape(cases[j], target, 256);
   4036       source[tLen] = 0;
   4037       doTest(collateObject, source, target, UCOL_EQUAL);
   4038     }
   4039     ucol_close(collateObject);
   4040   }
   4041 }
   4042 
   4043 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
   4044     UErrorCode status = U_ZERO_ERROR;
   4045     int32_t errorNo = 0;
   4046     /*const UChar *sourceRules = NULL;*/
   4047     /*int32_t sourceRulesLen = 0;*/
   4048     UColAttributeValue french = UCOL_OFF;
   4049     int32_t cloneSize = 0;
   4050 
   4051     if(!ucol_equals(source, target)) {
   4052         log_err("Same collators, different address not equal\n");
   4053         errorNo++;
   4054     }
   4055     ucol_close(target);
   4056     if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
   4057         /* currently, safeClone is implemented through getRules/openRules
   4058         * so it is the same as the test below - I will comment that test out.
   4059         */
   4060         /* real thing */
   4061         target = ucol_safeClone(source, NULL, &cloneSize, &status);
   4062         if(U_FAILURE(status)) {
   4063             log_err("Error creating clone\n");
   4064             errorNo++;
   4065             return errorNo;
   4066         }
   4067         if(!ucol_equals(source, target)) {
   4068             log_err("Collator different from it's clone\n");
   4069             errorNo++;
   4070         }
   4071         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
   4072         if(french == UCOL_ON) {
   4073             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
   4074         } else {
   4075             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
   4076         }
   4077         if(U_FAILURE(status)) {
   4078             log_err("Error setting attributes\n");
   4079             errorNo++;
   4080             return errorNo;
   4081         }
   4082         if(ucol_equals(source, target)) {
   4083             log_err("Collators same even when options changed\n");
   4084             errorNo++;
   4085         }
   4086         ucol_close(target);
   4087         /* commented out since safeClone uses exactly the same technique */
   4088         /*
   4089         sourceRules = ucol_getRules(source, &sourceRulesLen);
   4090         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   4091         if(U_FAILURE(status)) {
   4092         log_err("Error instantiating target from rules\n");
   4093         errorNo++;
   4094         return errorNo;
   4095         }
   4096         if(!ucol_equals(source, target)) {
   4097         log_err("Collator different from collator that was created from the same rules\n");
   4098         errorNo++;
   4099         }
   4100         ucol_close(target);
   4101         */
   4102     }
   4103     return errorNo;
   4104 }
   4105 
   4106 
   4107 static void TestEquals(void) {
   4108     /* ucol_equals is not currently a public API. There is a chance that it will become
   4109     * something like this, but currently it is only used by RuleBasedCollator::operator==
   4110     */
   4111     /* test whether the two collators instantiated from the same locale are equal */
   4112     UErrorCode status = U_ZERO_ERROR;
   4113     UParseError parseError;
   4114     int32_t noOfLoc = uloc_countAvailable();
   4115     const char *locName = NULL;
   4116     UCollator *source = NULL, *target = NULL;
   4117     int32_t i = 0;
   4118 
   4119     const char* rules[] = {
   4120         "&l < lj <<< Lj <<< LJ",
   4121         "&n < nj <<< Nj <<< NJ",
   4122         "&ae <<< \\u00e4",
   4123         "&AE <<< \\u00c4"
   4124     };
   4125     /*
   4126     const char* badRules[] = {
   4127     "&l <<< Lj",
   4128     "&n < nj <<< nJ <<< NJ",
   4129     "&a <<< \\u00e4",
   4130     "&AE <<< \\u00c4 <<< x"
   4131     };
   4132     */
   4133 
   4134     UChar sourceRules[1024], targetRules[1024];
   4135     int32_t sourceRulesSize = 0, targetRulesSize = 0;
   4136     int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
   4137 
   4138     for(i = 0; i < rulesSize; i++) {
   4139         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
   4140         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
   4141     }
   4142 
   4143     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   4144     if(status == U_FILE_ACCESS_ERROR) {
   4145         log_data_err("Is your data around?\n");
   4146         return;
   4147     } else if(U_FAILURE(status)) {
   4148         log_err("Error opening collator\n");
   4149         return;
   4150     }
   4151     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   4152     if(!ucol_equals(source, target)) {
   4153         log_err("Equivalent collators not equal!\n");
   4154     }
   4155     ucol_close(source);
   4156     ucol_close(target);
   4157 
   4158     source = ucol_open("root", &status);
   4159     target = ucol_open("root", &status);
   4160     log_verbose("Testing root\n");
   4161     if(!ucol_equals(source, source)) {
   4162         log_err("Same collator not equal\n");
   4163     }
   4164     if(TestEqualsForCollator(locName, source, target)) {
   4165         log_err("Errors for root\n", locName);
   4166     }
   4167     ucol_close(source);
   4168 
   4169     for(i = 0; i<noOfLoc; i++) {
   4170         status = U_ZERO_ERROR;
   4171         locName = uloc_getAvailable(i);
   4172         /*if(hasCollationElements(locName)) {*/
   4173         log_verbose("Testing equality for locale %s\n", locName);
   4174         source = ucol_open(locName, &status);
   4175         target = ucol_open(locName, &status);
   4176         if (U_FAILURE(status)) {
   4177             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
   4178             continue;
   4179         }
   4180         if(TestEqualsForCollator(locName, source, target)) {
   4181             log_err("Errors for locale %s\n", locName);
   4182         }
   4183         ucol_close(source);
   4184         /*}*/
   4185     }
   4186 }
   4187 
   4188 static void TestJ2726(void) {
   4189     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
   4190     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
   4191     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
   4192     UErrorCode status = U_ZERO_ERROR;
   4193     UCollator *coll = ucol_open("en", &status);
   4194     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   4195     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   4196     doTest(coll, a, aSpace, UCOL_EQUAL);
   4197     doTest(coll, aSpace, a, UCOL_EQUAL);
   4198     doTest(coll, a, spaceA, UCOL_EQUAL);
   4199     doTest(coll, spaceA, a, UCOL_EQUAL);
   4200     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
   4201     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
   4202     ucol_close(coll);
   4203 }
   4204 
   4205 static void NullRule(void) {
   4206     UChar r[3] = {0};
   4207     UErrorCode status = U_ZERO_ERROR;
   4208     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   4209     if(U_SUCCESS(status)) {
   4210         log_err("This should have been an error!\n");
   4211         ucol_close(coll);
   4212     } else {
   4213         status = U_ZERO_ERROR;
   4214     }
   4215     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   4216     if(U_FAILURE(status)) {
   4217         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
   4218     } else {
   4219         ucol_close(coll);
   4220     }
   4221 }
   4222 
   4223 /**
   4224  * Test for CollationElementIterator previous and next for the whole set of
   4225  * unicode characters with normalization on.
   4226  */
   4227 static void TestNumericCollation(void)
   4228 {
   4229     UErrorCode status = U_ZERO_ERROR;
   4230 
   4231     const static char *basicTestStrings[]={
   4232     "hello1",
   4233     "hello2",
   4234     "hello2002",
   4235     "hello2003",
   4236     "hello123456",
   4237     "hello1234567",
   4238     "hello10000000",
   4239     "hello100000000",
   4240     "hello1000000000",
   4241     "hello10000000000",
   4242     };
   4243 
   4244     const static char *preZeroTestStrings[]={
   4245     "avery10000",
   4246     "avery010000",
   4247     "avery0010000",
   4248     "avery00010000",
   4249     "avery000010000",
   4250     "avery0000010000",
   4251     "avery00000010000",
   4252     "avery000000010000",
   4253     };
   4254 
   4255     const static char *thirtyTwoBitNumericStrings[]={
   4256     "avery42949672960",
   4257     "avery42949672961",
   4258     "avery42949672962",
   4259     "avery429496729610"
   4260     };
   4261 
   4262      const static char *longNumericStrings[]={
   4263      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
   4264         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
   4265         are treated as multiple collation elements. */
   4266     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
   4267     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
   4268     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
   4269     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
   4270     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
   4271     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
   4272     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
   4273     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
   4274     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
   4275     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
   4276     };
   4277 
   4278     const static char *supplementaryDigits[] = {
   4279       "\\uD835\\uDFCE", /* 0 */
   4280       "\\uD835\\uDFCF", /* 1 */
   4281       "\\uD835\\uDFD0", /* 2 */
   4282       "\\uD835\\uDFD1", /* 3 */
   4283       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
   4284       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
   4285       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
   4286       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
   4287       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
   4288       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
   4289     };
   4290 
   4291     const static char *foreignDigits[] = {
   4292       "\\u0661",
   4293         "\\u0662",
   4294         "\\u0663",
   4295       "\\u0661\\u0660",
   4296       "\\u0661\\u0662",
   4297       "\\u0661\\u0663",
   4298       "\\u0662\\u0660",
   4299       "\\u0662\\u0662",
   4300       "\\u0662\\u0663",
   4301       "\\u0663\\u0660",
   4302       "\\u0663\\u0662",
   4303       "\\u0663\\u0663"
   4304     };
   4305 
   4306     const static char *evenZeroes[] = {
   4307       "2000",
   4308       "2001",
   4309         "2002",
   4310         "2003"
   4311     };
   4312 
   4313     UColAttribute att = UCOL_NUMERIC_COLLATION;
   4314     UColAttributeValue val = UCOL_ON;
   4315 
   4316     /* Open our collator. */
   4317     UCollator* coll = ucol_open("root", &status);
   4318     if (U_FAILURE(status)){
   4319         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
   4320               myErrorName(status));
   4321         return;
   4322     }
   4323     genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
   4324     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
   4325     genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
   4326     genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
   4327     genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
   4328     genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
   4329 
   4330     /* Setting up our collator to do digits. */
   4331     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   4332     if (U_FAILURE(status)){
   4333         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
   4334               myErrorName(status));
   4335         return;
   4336     }
   4337 
   4338     /*
   4339        Testing that prepended zeroes still yield the correct collation behavior.
   4340        We expect that every element in our strings array will be equal.
   4341     */
   4342     genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
   4343 
   4344     ucol_close(coll);
   4345 }
   4346 
   4347 static void TestTibetanConformance(void)
   4348 {
   4349     const char* test[] = {
   4350         "\\u0FB2\\u0591\\u0F71\\u0061",
   4351         "\\u0FB2\\u0F71\\u0061"
   4352     };
   4353 
   4354     UErrorCode status = U_ZERO_ERROR;
   4355     UCollator *coll = ucol_open("", &status);
   4356     UChar source[100];
   4357     UChar target[100];
   4358     int result;
   4359     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   4360     if (U_SUCCESS(status)) {
   4361         u_unescape(test[0], source, 100);
   4362         u_unescape(test[1], target, 100);
   4363         doTest(coll, source, target, UCOL_EQUAL);
   4364         result = ucol_strcoll(coll, source, -1,   target, -1);
   4365         log_verbose("result %d\n", result);
   4366         if (UCOL_EQUAL != result) {
   4367             log_err("Tibetan comparison error\n");
   4368         }
   4369     }
   4370     ucol_close(coll);
   4371 
   4372     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
   4373 }
   4374 
   4375 static void TestPinyinProblem(void) {
   4376     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
   4377     genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
   4378 }
   4379 
   4380 #define TST_UCOL_MAX_INPUT 0x220001
   4381 #define topByte 0xFF000000;
   4382 #define bottomByte 0xFF;
   4383 #define fourBytes 0xFFFFFFFF;
   4384 
   4385 
   4386 static void showImplicit(UChar32 i) {
   4387     if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
   4388         log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
   4389     }
   4390 }
   4391 
   4392 static void TestImplicitGeneration(void) {
   4393     UErrorCode status = U_ZERO_ERROR;
   4394     UChar32 last = 0;
   4395     UChar32 current;
   4396     UChar32 i = 0, j = 0;
   4397     UChar32 roundtrip = 0;
   4398     UChar32 lastBottom = 0;
   4399     UChar32 currentBottom = 0;
   4400     UChar32 lastTop = 0;
   4401     UChar32 currentTop = 0;
   4402 
   4403     UCollator *coll = ucol_open("root", &status);
   4404     if(U_FAILURE(status)) {
   4405         log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   4406         return;
   4407     }
   4408 
   4409     uprv_uca_getRawFromImplicit(0xE20303E7);
   4410 
   4411     for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
   4412         current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
   4413 
   4414         /* check that it round-trips AND that all intervening ones are illegal*/
   4415         roundtrip = uprv_uca_getRawFromImplicit(current);
   4416         if (roundtrip != i) {
   4417             log_err("No roundtrip %08X\n", i);
   4418         }
   4419         if (last != 0) {
   4420             for (j = last + 1; j < current; ++j) {
   4421                 roundtrip = uprv_uca_getRawFromImplicit(j);
   4422                 /* raise an error if it *doesn't* find an error*/
   4423                 if (roundtrip != -1) {
   4424                     log_err("Fails to recognize illegal %08X\n", j);
   4425                 }
   4426             }
   4427         }
   4428         /* now do other consistency checks*/
   4429         lastBottom = last & bottomByte;
   4430         currentBottom = current & bottomByte;
   4431         lastTop = last & topByte;
   4432         currentTop = current & topByte;
   4433 
   4434         /* print out some values for spot-checking*/
   4435         if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
   4436             showImplicit(i-3);
   4437             showImplicit(i-2);
   4438             showImplicit(i-1);
   4439             showImplicit(i);
   4440             showImplicit(i+1);
   4441             showImplicit(i+2);
   4442         }
   4443         last = current;
   4444 
   4445         if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
   4446             log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
   4447         }
   4448     }
   4449     showImplicit(TST_UCOL_MAX_INPUT-2);
   4450     showImplicit(TST_UCOL_MAX_INPUT-1);
   4451     showImplicit(TST_UCOL_MAX_INPUT);
   4452     ucol_close(coll);
   4453 }
   4454 
   4455 /**
   4456  * Iterate through the given iterator, checking to see that all the strings
   4457  * in the expected array are present.
   4458  * @param expected array of strings we expect to see, or NULL
   4459  * @param expectedCount number of elements of expected, or 0
   4460  */
   4461 static int32_t checkUEnumeration(const char* msg,
   4462                                  UEnumeration* iter,
   4463                                  const char** expected,
   4464                                  int32_t expectedCount) {
   4465     UErrorCode ec = U_ZERO_ERROR;
   4466     int32_t i = 0, n, j, bit;
   4467     int32_t seenMask = 0;
   4468 
   4469     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
   4470     n = uenum_count(iter, &ec);
   4471     if (!assertSuccess("count", &ec)) return -1;
   4472     log_verbose("%s = [", msg);
   4473     for (;; ++i) {
   4474         const char* s = uenum_next(iter, NULL, &ec);
   4475         if (!assertSuccess("snext", &ec) || s == NULL) break;
   4476         if (i != 0) log_verbose(",");
   4477         log_verbose("%s", s);
   4478         /* check expected list */
   4479         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   4480             if ((seenMask&bit) == 0 &&
   4481                 uprv_strcmp(s, expected[j]) == 0) {
   4482                 seenMask |= bit;
   4483                 break;
   4484             }
   4485         }
   4486     }
   4487     log_verbose("] (%d)\n", i);
   4488     assertTrue("count verified", i==n);
   4489     /* did we see all expected strings? */
   4490     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   4491         if ((seenMask&bit)!=0) {
   4492             log_verbose("Ok: \"%s\" seen\n", expected[j]);
   4493         } else {
   4494             log_err("FAIL: \"%s\" not seen\n", expected[j]);
   4495         }
   4496     }
   4497     return n;
   4498 }
   4499 
   4500 /**
   4501  * Test new API added for separate collation tree.
   4502  */
   4503 static void TestSeparateTrees(void) {
   4504     UErrorCode ec = U_ZERO_ERROR;
   4505     UEnumeration *e = NULL;
   4506     int32_t n = -1;
   4507     UBool isAvailable;
   4508     char loc[256];
   4509 
   4510     static const char* AVAIL[] = { "en", "de" };
   4511 
   4512     static const char* KW[] = { "collation" };
   4513 
   4514     static const char* KWVAL[] = { "phonebook", "stroke" };
   4515 
   4516 #if !UCONFIG_NO_SERVICE
   4517     e = ucol_openAvailableLocales(&ec);
   4518     if (e != NULL) {
   4519         assertSuccess("ucol_openAvailableLocales", &ec);
   4520         assertTrue("ucol_openAvailableLocales!=0", e!=0);
   4521         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
   4522         /* Don't need to check n because we check list */
   4523         uenum_close(e);
   4524     } else {
   4525         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
   4526     }
   4527 #endif
   4528 
   4529     e = ucol_getKeywords(&ec);
   4530     if (e != NULL) {
   4531         assertSuccess("ucol_getKeywords", &ec);
   4532         assertTrue("ucol_getKeywords!=0", e!=0);
   4533         n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
   4534         /* Don't need to check n because we check list */
   4535         uenum_close(e);
   4536     } else {
   4537         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
   4538     }
   4539 
   4540     e = ucol_getKeywordValues(KW[0], &ec);
   4541     if (e != NULL) {
   4542         assertSuccess("ucol_getKeywordValues", &ec);
   4543         assertTrue("ucol_getKeywordValues!=0", e!=0);
   4544         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
   4545         /* Don't need to check n because we check list */
   4546         uenum_close(e);
   4547     } else {
   4548         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
   4549     }
   4550 
   4551     /* Try setting a warning before calling ucol_getKeywordValues */
   4552     ec = U_USING_FALLBACK_WARNING;
   4553     e = ucol_getKeywordValues(KW[0], &ec);
   4554     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
   4555         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
   4556         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
   4557         /* Don't need to check n because we check list */
   4558         uenum_close(e);
   4559     }
   4560 
   4561     /*
   4562 U_DRAFT int32_t U_EXPORT2
   4563 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
   4564                              const char* locale, UBool* isAvailable,
   4565                              UErrorCode* status);
   4566 }
   4567 */
   4568     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
   4569                                      &isAvailable, &ec);
   4570     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   4571         assertEquals("getFunctionalEquivalent(de)", "de", loc);
   4572         assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
   4573                    isAvailable == TRUE);
   4574     }
   4575 
   4576     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
   4577                                      &isAvailable, &ec);
   4578     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   4579         assertEquals("getFunctionalEquivalent(de_DE)", "de", loc);
   4580         assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
   4581                    isAvailable == TRUE);
   4582     }
   4583 }
   4584 
   4585 /* supercedes TestJ784 */
   4586 static void TestBeforePinyin(void) {
   4587     const static char rules[] = {
   4588         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
   4589         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
   4590         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
   4591         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
   4592         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
   4593         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
   4594     };
   4595 
   4596     const static char *test[] = {
   4597         "l\\u0101",
   4598         "la",
   4599         "l\\u0101n",
   4600         "lan ",
   4601         "l\\u0113",
   4602         "le",
   4603         "l\\u0113n",
   4604         "len"
   4605     };
   4606 
   4607     const static char *test2[] = {
   4608         "x\\u0101",
   4609         "x\\u0100",
   4610         "X\\u0101",
   4611         "X\\u0100",
   4612         "x\\u00E1",
   4613         "x\\u00C1",
   4614         "X\\u00E1",
   4615         "X\\u00C1",
   4616         "x\\u01CE",
   4617         "x\\u01CD",
   4618         "X\\u01CE",
   4619         "X\\u01CD",
   4620         "x\\u00E0",
   4621         "x\\u00C0",
   4622         "X\\u00E0",
   4623         "X\\u00C0",
   4624         "xa",
   4625         "xA",
   4626         "Xa",
   4627         "XA",
   4628         "x\\u0101x",
   4629         "x\\u0100x",
   4630         "x\\u00E1x",
   4631         "x\\u00C1x",
   4632         "x\\u01CEx",
   4633         "x\\u01CDx",
   4634         "x\\u00E0x",
   4635         "x\\u00C0x",
   4636         "xax",
   4637         "xAx"
   4638     };
   4639 
   4640     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
   4641     genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
   4642     genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
   4643     genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
   4644 }
   4645 
   4646 static void TestBeforeTightening(void) {
   4647     static const struct {
   4648         const char *rules;
   4649         UErrorCode expectedStatus;
   4650     } tests[] = {
   4651         { "&[before 1]a<x", U_ZERO_ERROR },
   4652         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
   4653         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
   4654         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
   4655         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
   4656         { "&[before 2]a<<x",U_ZERO_ERROR },
   4657         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
   4658         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
   4659         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
   4660         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
   4661         { "&[before 3]a<<<x",U_ZERO_ERROR },
   4662         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
   4663         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
   4664     };
   4665 
   4666     int32_t i = 0;
   4667 
   4668     UErrorCode status = U_ZERO_ERROR;
   4669     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   4670     uint32_t rlen = 0;
   4671 
   4672     UCollator *coll = NULL;
   4673 
   4674 
   4675     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   4676         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
   4677         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   4678         if(status != tests[i].expectedStatus) {
   4679             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
   4680                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
   4681         }
   4682         ucol_close(coll);
   4683         status = U_ZERO_ERROR;
   4684     }
   4685 
   4686 }
   4687 
   4688 /*
   4689 &m < a
   4690 &[before 1] a < x <<< X << q <<< Q < z
   4691 assert: m <<< M < x <<< X << q <<< Q < z < a < n
   4692 
   4693 &m < a
   4694 &[before 2] a << x <<< X << q <<< Q < z
   4695 assert: m <<< M < x <<< X << q <<< Q << a < z < n
   4696 
   4697 &m < a
   4698 &[before 3] a <<< x <<< X << q <<< Q < z
   4699 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
   4700 
   4701 
   4702 &m << a
   4703 &[before 1] a < x <<< X << q <<< Q < z
   4704 assert: x <<< X << q <<< Q < z < m <<< M << a < n
   4705 
   4706 &m << a
   4707 &[before 2] a << x <<< X << q <<< Q < z
   4708 assert: m <<< M << x <<< X << q <<< Q << a < z < n
   4709 
   4710 &m << a
   4711 &[before 3] a <<< x <<< X << q <<< Q < z
   4712 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
   4713 
   4714 
   4715 &m <<< a
   4716 &[before 1] a < x <<< X << q <<< Q < z
   4717 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
   4718 
   4719 &m <<< a
   4720 &[before 2] a << x <<< X << q <<< Q < z
   4721 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
   4722 
   4723 &m <<< a
   4724 &[before 3] a <<< x <<< X << q <<< Q < z
   4725 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
   4726 
   4727 
   4728 &[before 1] s < x <<< X << q <<< Q < z
   4729 assert: r <<< R < x <<< X << q <<< Q < z < s < n
   4730 
   4731 &[before 2] s << x <<< X << q <<< Q < z
   4732 assert: r <<< R < x <<< X << q <<< Q << s < z < n
   4733 
   4734 &[before 3] s <<< x <<< X << q <<< Q < z
   4735 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
   4736 
   4737 
   4738 &[before 1] \u24DC < x <<< X << q <<< Q < z
   4739 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
   4740 
   4741 &[before 2] \u24DC << x <<< X << q <<< Q < z
   4742 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
   4743 
   4744 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
   4745 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
   4746 */
   4747 
   4748 
   4749 #if 0
   4750 /* requires features not yet supported */
   4751 static void TestMoreBefore(void) {
   4752     static const struct {
   4753         const char* rules;
   4754         const char* order[16];
   4755         int32_t size;
   4756     } tests[] = {
   4757         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
   4758         { "m","M","x","X","q","Q","z","a","n" }, 9},
   4759         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
   4760         { "m","M","x","X","q","Q","a","z","n" }, 9},
   4761         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
   4762         { "m","M","x","X","a","q","Q","z","n" }, 9},
   4763         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
   4764         { "x","X","q","Q","z","m","M","a","n" }, 9},
   4765         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
   4766         { "m","M","x","X","q","Q","a","z","n" }, 9},
   4767         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
   4768         { "m","M","x","X","a","q","Q","z","n" }, 9},
   4769         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
   4770         { "x","X","q","Q","z","n","m","a","M" }, 9},
   4771         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
   4772         { "x","X","q","Q","m","a","M","z","n" }, 9},
   4773         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
   4774         { "m","x","X","a","M","q","Q","z","n" }, 9},
   4775         { "&[before 1] s < x <<< X << q <<< Q < z",
   4776         { "r","R","x","X","q","Q","z","s","n" }, 9},
   4777         { "&[before 2] s << x <<< X << q <<< Q < z",
   4778         { "r","R","x","X","q","Q","s","z","n" }, 9},
   4779         { "&[before 3] s <<< x <<< X << q <<< Q < z",
   4780         { "r","R","x","X","s","q","Q","z","n" }, 9},
   4781         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
   4782         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
   4783         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
   4784         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
   4785         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
   4786         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
   4787     };
   4788 
   4789     int32_t i = 0;
   4790 
   4791     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   4792         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
   4793     }
   4794 }
   4795 #endif
   4796 
   4797 static void TestTailorNULL( void ) {
   4798     const static char* rule = "&a <<< '\\u0000'";
   4799     UErrorCode status = U_ZERO_ERROR;
   4800     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   4801     uint32_t rlen = 0;
   4802     UChar a = 1, null = 0;
   4803     UCollationResult res = UCOL_EQUAL;
   4804 
   4805     UCollator *coll = NULL;
   4806 
   4807 
   4808     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
   4809     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   4810 
   4811     if(U_FAILURE(status)) {
   4812         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
   4813     } else {
   4814         res = ucol_strcoll(coll, &a, 1, &null, 1);
   4815 
   4816         if(res != UCOL_LESS) {
   4817             log_err("NULL was not tailored properly!\n");
   4818         }
   4819     }
   4820 
   4821     ucol_close(coll);
   4822 }
   4823 
   4824 static void
   4825 TestUpperFirstQuaternary(void)
   4826 {
   4827   const char* tests[] = { "B", "b", "Bb", "bB" };
   4828   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
   4829   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
   4830   genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
   4831 }
   4832 
   4833 static void
   4834 TestJ4960(void)
   4835 {
   4836   const char* tests[] = { "\\u00e2T", "aT" };
   4837   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
   4838   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
   4839   const char* tests2[] = { "a", "A" };
   4840   const char* rule = "&[first tertiary ignorable]=A=a";
   4841   UColAttribute att2[] = { UCOL_CASE_LEVEL };
   4842   UColAttributeValue attVals2[] = { UCOL_ON };
   4843   /* Test whether we correctly ignore primary ignorables on case level when */
   4844   /* we have only primary & case level */
   4845   genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
   4846   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
   4847   /* and case level */
   4848   genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
   4849   /* Test whether completely ignorable letters have case level info (they shouldn't) */
   4850   genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
   4851 }
   4852 
   4853 static void
   4854 TestJ5223(void)
   4855 {
   4856   static const char *test = "this is a test string";
   4857   UChar ustr[256];
   4858   int32_t ustr_length = u_unescape(test, ustr, 256);
   4859   unsigned char sortkey[256];
   4860   int32_t sortkey_length;
   4861   UErrorCode status = U_ZERO_ERROR;
   4862   static UCollator *coll = NULL;
   4863   coll = ucol_open("root", &status);
   4864   if(U_FAILURE(status)) {
   4865     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   4866     return;
   4867   }
   4868   ucol_setStrength(coll, UCOL_PRIMARY);
   4869   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   4870   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   4871   if (U_FAILURE(status)) {
   4872     log_err("Failed setting atributes\n");
   4873     return;
   4874   }
   4875   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
   4876   if (sortkey_length > 256) return;
   4877 
   4878   /* we mark the position where the null byte should be written in advance */
   4879   sortkey[sortkey_length-1] = 0xAA;
   4880 
   4881   /* we set the buffer size one byte higher than needed */
   4882   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
   4883     sortkey_length+1);
   4884 
   4885   /* no error occurs (for me) */
   4886   if (sortkey[sortkey_length-1] == 0xAA) {
   4887     log_err("Hit bug at first try\n");
   4888   }
   4889 
   4890   /* we mark the position where the null byte should be written again */
   4891   sortkey[sortkey_length-1] = 0xAA;
   4892 
   4893   /* this time we set the buffer size to the exact amount needed */
   4894   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
   4895     sortkey_length);
   4896 
   4897   /* now the trailing null byte is not written */
   4898   if (sortkey[sortkey_length-1] == 0xAA) {
   4899     log_err("Hit bug at second try\n");
   4900   }
   4901 
   4902   ucol_close(coll);
   4903 }
   4904 
   4905 /* Regression test for Thai partial sort key problem */
   4906 static void
   4907 TestJ5232(void)
   4908 {
   4909     const static char *test[] = {
   4910         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
   4911         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
   4912     };
   4913 
   4914     genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
   4915 }
   4916 
   4917 static void
   4918 TestJ5367(void)
   4919 {
   4920     const static char *test[] = { "a", "y" };
   4921     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
   4922     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
   4923 }
   4924 
   4925 static void
   4926 TestVI5913(void)
   4927 {
   4928     UErrorCode status = U_ZERO_ERROR;
   4929     int32_t i, j;
   4930     UCollator *coll =NULL;
   4931     uint8_t  resColl[100], expColl[100];
   4932     int32_t  rLen, tLen, ruleLen, sLen, kLen;
   4933     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &a<0x1FF3-omega with Ypogegrammeni*/
   4934     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
   4935     UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0};  /* &z<a+e with circumflex.*/
   4936     static const UChar tData[][20]={
   4937         {0x1EAC, 0},
   4938         {0x0041, 0x0323, 0x0302, 0},
   4939         {0x1EA0, 0x0302, 0},
   4940         {0x00C2, 0x0323, 0},
   4941         {0x1ED8, 0},  /* O with dot and circumflex */
   4942         {0x1ECC, 0x0302, 0},
   4943         {0x1EB7, 0},
   4944         {0x1EA1, 0x0306, 0},
   4945     };
   4946     static const UChar tailorData[][20]={
   4947         {0x1FA2, 0},  /* Omega with 3 combining marks */
   4948         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
   4949         {0x1FF3, 0x0313, 0x0300, 0},
   4950         {0x1F60, 0x0300, 0x0345, 0},
   4951         {0x1F62, 0x0345, 0},
   4952         {0x1FA0, 0x0300, 0},
   4953     };
   4954     static const UChar tailorData2[][20]={
   4955         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
   4956         {0x0073, 0x0323, 0x030C, 0},
   4957         {0x0073, 0x030C, 0x0323, 0},
   4958     };
   4959     static const UChar tailorData3[][20]={
   4960         {0x007a, 0},  /*  z */
   4961         {0x0061, 0x0065, 0},  /*  a + e */
   4962         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
   4963         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
   4964         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
   4965         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
   4966         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
   4967         {0x00EA, 0},  /* e with circumflex  */
   4968     };
   4969 
   4970     /* Test Vietnamese sort. */
   4971     coll = ucol_open("vi", &status);
   4972     if(U_FAILURE(status)) {
   4973         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
   4974         return;
   4975     }
   4976     log_verbose("\n\nVI collation:");
   4977     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
   4978         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
   4979     }
   4980     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
   4981         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
   4982     }
   4983     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
   4984         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
   4985     }
   4986     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
   4987         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
   4988     }
   4989 
   4990     for (j=0; j<8; j++) {
   4991         tLen = u_strlen(tData[j]);
   4992         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
   4993         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
   4994         for(i = 0; i<rLen; i++) {
   4995             log_verbose(" %02X", resColl[i]);
   4996         }
   4997     }
   4998 
   4999     ucol_close(coll);
   5000 
   5001     /* Test Romanian sort. */
   5002     coll = ucol_open("ro", &status);
   5003     log_verbose("\n\nRO collation:");
   5004     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
   5005         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
   5006     }
   5007     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
   5008         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
   5009     }
   5010     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
   5011         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
   5012     }
   5013 
   5014     for (j=4; j<8; j++) {
   5015         tLen = u_strlen(tData[j]);
   5016         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
   5017         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
   5018         for(i = 0; i<rLen; i++) {
   5019             log_verbose(" %02X", resColl[i]);
   5020         }
   5021     }
   5022     ucol_close(coll);
   5023 
   5024     /* Test the precomposed Greek character with 3 combining marks. */
   5025     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
   5026     ruleLen = u_strlen(rule);
   5027     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5028     if (U_FAILURE(status)) {
   5029         log_err("ucol_openRules failed with %s\n", u_errorName(status));
   5030         return;
   5031     }
   5032     sLen = u_strlen(tailorData[0]);
   5033     for (j=1; j<6; j++) {
   5034         tLen = u_strlen(tailorData[j]);
   5035         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
   5036             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
   5037         }
   5038     }
   5039     /* Test getSortKey. */
   5040     tLen = u_strlen(tailorData[0]);
   5041     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
   5042     for (j=0; j<6; j++) {
   5043         tLen = u_strlen(tailorData[j]);
   5044         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
   5045         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   5046             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5047             for(i = 0; i<rLen; i++) {
   5048                 log_err(" %02X", resColl[i]);
   5049             }
   5050         }
   5051     }
   5052     ucol_close(coll);
   5053 
   5054     log_verbose("\n\nTailoring test for s with caron:");
   5055     ruleLen = u_strlen(rule2);
   5056     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5057     tLen = u_strlen(tailorData2[0]);
   5058     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
   5059     for (j=1; j<3; j++) {
   5060         tLen = u_strlen(tailorData2[j]);
   5061         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
   5062         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   5063             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5064             for(i = 0; i<rLen; i++) {
   5065                 log_err(" %02X", resColl[i]);
   5066             }
   5067         }
   5068     }
   5069     ucol_close(coll);
   5070 
   5071     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
   5072     ruleLen = u_strlen(rule3);
   5073     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5074     tLen = u_strlen(tailorData3[3]);
   5075     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
   5076     for (j=4; j<6; j++) {
   5077         tLen = u_strlen(tailorData3[j]);
   5078         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
   5079 
   5080         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   5081             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5082             for(i = 0; i<rLen; i++) {
   5083                 log_err(" %02X", resColl[i]);
   5084             }
   5085         }
   5086 
   5087         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5088          for(i = 0; i<rLen; i++) {
   5089              log_verbose(" %02X", resColl[i]);
   5090          }
   5091     }
   5092     ucol_close(coll);
   5093 }
   5094 
   5095 static void
   5096 TestTailor6179(void)
   5097 {
   5098     UErrorCode status = U_ZERO_ERROR;
   5099     int32_t i;
   5100     UCollator *coll =NULL;
   5101     uint8_t  resColl[100];
   5102     int32_t  rLen, tLen, ruleLen;
   5103     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
   5104     static const UChar rule1[]={
   5105             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
   5106             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
   5107             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
   5108             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
   5109     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
   5110     static const UChar rule2[]={
   5111             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
   5112             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
   5113             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
   5114             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
   5115             0x3C,0x3C,0x20,0x62,0};
   5116 
   5117     static const UChar tData1[][4]={
   5118         {0x61, 0},
   5119         {0x62, 0},
   5120         { 0xFDD0,0x009E, 0}
   5121     };
   5122     static const UChar tData2[][4]={
   5123         {0x61, 0},
   5124         {0x62, 0},
   5125         { 0xFDD0,0x009E, 0}
   5126      };
   5127 
   5128     /*
   5129      * These values from FractionalUCA.txt will change,
   5130      * and need to be updated here.
   5131      */
   5132     static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0};
   5133     static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0};
   5134     static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
   5135     static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
   5136 
   5137     /* Test [Last Primary ignorable] */
   5138 
   5139     log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
   5140     ruleLen = u_strlen(rule1);
   5141     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5142     if (U_FAILURE(status)) {
   5143         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
   5144         return;
   5145     }
   5146     tLen = u_strlen(tData1[0]);
   5147     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
   5148     if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
   5149         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
   5150         for(i = 0; i<rLen; i++) {
   5151             log_err(" %02X", resColl[i]);
   5152         }
   5153         log_err("\n");
   5154     }
   5155     tLen = u_strlen(tData1[1]);
   5156     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
   5157     if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
   5158         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
   5159         for(i = 0; i<rLen; i++) {
   5160             log_err(" %02X", resColl[i]);
   5161         }
   5162         log_err("\n");
   5163     }
   5164     ucol_close(coll);
   5165 
   5166 
   5167     /* Test [Last Secondary ignorable] */
   5168     log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
   5169     ruleLen = u_strlen(rule1);
   5170     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5171     if (U_FAILURE(status)) {
   5172         log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
   5173         return;
   5174     }
   5175     tLen = u_strlen(tData2[0]);
   5176     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
   5177     if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
   5178         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
   5179         for(i = 0; i<rLen; i++) {
   5180             log_err(" %02X", resColl[i]);
   5181         }
   5182         log_err("\n");
   5183     }
   5184 if(isICUVersionAtLeast(52, 0, 1)) {  /* TODO: debug & fix, see ticket #8982 */
   5185     tLen = u_strlen(tData2[1]);
   5186     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
   5187     if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
   5188         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
   5189         for(i = 0; i<rLen; i++) {
   5190             log_err(" %02X", resColl[i]);
   5191         }
   5192         log_err("\n");
   5193     }
   5194 }
   5195     ucol_close(coll);
   5196 }
   5197 
   5198 static void
   5199 TestUCAPrecontext(void)
   5200 {
   5201     UErrorCode status = U_ZERO_ERROR;
   5202     int32_t i, j;
   5203     UCollator *coll =NULL;
   5204     uint8_t  resColl[100], prevColl[100];
   5205     int32_t  rLen, tLen, ruleLen;
   5206     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
   5207     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
   5208     /* & l middle-dot << a  a is an expansion. */
   5209 
   5210     UChar tData1[][20]={
   5211             { 0xb7, 0},  /* standalone middle dot(0xb7) */
   5212             { 0x387, 0}, /* standalone middle dot(0x387) */
   5213             { 0x61, 0},  /* a */
   5214             { 0x6C, 0},  /* l */
   5215             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
   5216             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
   5217             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
   5218             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
   5219             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
   5220             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
   5221             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
   5222      };
   5223 
   5224     log_verbose("\n\nEN collation:");
   5225     coll = ucol_open("en", &status);
   5226     if (U_FAILURE(status)) {
   5227         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
   5228         return;
   5229     }
   5230     for (j=0; j<11; j++) {
   5231         tLen = u_strlen(tData1[j]);
   5232         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5233         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5234             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5235                     j, tData1[j]);
   5236         }
   5237         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5238         for(i = 0; i<rLen; i++) {
   5239             log_verbose(" %02X", resColl[i]);
   5240         }
   5241         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5242      }
   5243      ucol_close(coll);
   5244 
   5245 
   5246      log_verbose("\n\nJA collation:");
   5247      coll = ucol_open("ja", &status);
   5248      if (U_FAILURE(status)) {
   5249          log_err("Tailoring test: &z <<a|- failed!");
   5250          return;
   5251      }
   5252      for (j=0; j<11; j++) {
   5253          tLen = u_strlen(tData1[j]);
   5254          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5255          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5256              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5257                      j, tData1[j]);
   5258          }
   5259          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5260          for(i = 0; i<rLen; i++) {
   5261              log_verbose(" %02X", resColl[i]);
   5262          }
   5263          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5264       }
   5265       ucol_close(coll);
   5266 
   5267 
   5268       log_verbose("\n\nTailoring test: & middle dot < a ");
   5269       ruleLen = u_strlen(rule1);
   5270       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5271       if (U_FAILURE(status)) {
   5272           log_err("Tailoring test: & middle dot < a failed!");
   5273           return;
   5274       }
   5275       for (j=0; j<11; j++) {
   5276           tLen = u_strlen(tData1[j]);
   5277           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5278           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5279               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5280                       j, tData1[j]);
   5281           }
   5282           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5283           for(i = 0; i<rLen; i++) {
   5284               log_verbose(" %02X", resColl[i]);
   5285           }
   5286           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5287        }
   5288        ucol_close(coll);
   5289 
   5290 
   5291        log_verbose("\n\nTailoring test: & l middle-dot << a ");
   5292        ruleLen = u_strlen(rule2);
   5293        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5294        if (U_FAILURE(status)) {
   5295            log_err("Tailoring test: & l middle-dot << a failed!");
   5296            return;
   5297        }
   5298        for (j=0; j<11; j++) {
   5299            tLen = u_strlen(tData1[j]);
   5300            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5301            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5302                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5303                        j, tData1[j]);
   5304            }
   5305            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
   5306                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
   5307                        j, tData1[j]);
   5308            }
   5309            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5310            for(i = 0; i<rLen; i++) {
   5311                log_verbose(" %02X", resColl[i]);
   5312            }
   5313            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5314         }
   5315         ucol_close(coll);
   5316 }
   5317 
   5318 static void
   5319 TestOutOfBuffer5468(void)
   5320 {
   5321     static const char *test = "\\u4e00";
   5322     UChar ustr[256];
   5323     int32_t ustr_length = u_unescape(test, ustr, 256);
   5324     unsigned char shortKeyBuf[1];
   5325     int32_t sortkey_length;
   5326     UErrorCode status = U_ZERO_ERROR;
   5327     static UCollator *coll = NULL;
   5328 
   5329     coll = ucol_open("root", &status);
   5330     if(U_FAILURE(status)) {
   5331       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   5332       return;
   5333     }
   5334     ucol_setStrength(coll, UCOL_PRIMARY);
   5335     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   5336     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   5337     if (U_FAILURE(status)) {
   5338       log_err("Failed setting atributes\n");
   5339       return;
   5340     }
   5341 
   5342     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
   5343     if (sortkey_length != 4) {
   5344         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
   5345     }
   5346     log_verbose("length of sortKey is %d", sortkey_length);
   5347     ucol_close(coll);
   5348 }
   5349 
   5350 #define TSKC_DATA_SIZE 5
   5351 #define TSKC_BUF_SIZE  50
   5352 static void
   5353 TestSortKeyConsistency(void)
   5354 {
   5355     UErrorCode icuRC = U_ZERO_ERROR;
   5356     UCollator* ucol;
   5357     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
   5358 
   5359     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
   5360     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
   5361     int32_t i, j, i2;
   5362 
   5363     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
   5364     if (U_FAILURE(icuRC))
   5365     {
   5366         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
   5367         return;
   5368     }
   5369 
   5370     for (i = 0; i < TSKC_DATA_SIZE; i++)
   5371     {
   5372         UCharIterator uiter;
   5373         uint32_t state[2] = { 0, 0 };
   5374         int32_t dataLen = i+1;
   5375         for (j=0; j<TSKC_BUF_SIZE; j++)
   5376             bufFull[i][j] = bufPart[i][j] = 0;
   5377 
   5378         /* Full sort key */
   5379         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
   5380 
   5381         /* Partial sort key */
   5382         uiter_setString(&uiter, data, dataLen);
   5383         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
   5384         if (U_FAILURE(icuRC))
   5385         {
   5386             log_err("ucol_nextSortKeyPart failed\n");
   5387             ucol_close(ucol);
   5388             return;
   5389         }
   5390 
   5391         for (i2=0; i2<i; i2++)
   5392         {
   5393             UBool fullMatch = TRUE;
   5394             UBool partMatch = TRUE;
   5395             for (j=0; j<TSKC_BUF_SIZE; j++)
   5396             {
   5397                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
   5398                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
   5399             }
   5400             if (fullMatch != partMatch) {
   5401                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
   5402                                   : "partial key was consistent, but full key changed\n");
   5403                 ucol_close(ucol);
   5404                 return;
   5405             }
   5406         }
   5407     }
   5408 
   5409     /*=============================================*/
   5410    ucol_close(ucol);
   5411 }
   5412 
   5413 /* ticket: 6101 */
   5414 static void TestCroatianSortKey(void) {
   5415     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
   5416     UErrorCode status = U_ZERO_ERROR;
   5417     UCollator *ucol;
   5418     UCharIterator iter;
   5419 
   5420     static const UChar text[] = { 0x0044, 0xD81A };
   5421 
   5422     size_t length = sizeof(text)/sizeof(*text);
   5423 
   5424     uint8_t textSortKey[32];
   5425     size_t lenSortKey = 32;
   5426     size_t actualSortKeyLen;
   5427     uint32_t uStateInfo[2] = { 0, 0 };
   5428 
   5429     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
   5430     if (U_FAILURE(status)) {
   5431         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
   5432         return;
   5433     }
   5434 
   5435     uiter_setString(&iter, text, length);
   5436 
   5437     actualSortKeyLen = ucol_nextSortKeyPart(
   5438         ucol, &iter, (uint32_t*)uStateInfo,
   5439         textSortKey, lenSortKey, &status
   5440         );
   5441 
   5442     if (actualSortKeyLen == lenSortKey) {
   5443         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
   5444     }
   5445 
   5446     ucol_close(ucol);
   5447 }
   5448 
   5449 /* ticket: 6140 */
   5450 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
   5451  * they are both Hiragana and Katakana
   5452  */
   5453 #define SORTKEYLEN 50
   5454 static void TestHiragana(void) {
   5455     UErrorCode status = U_ZERO_ERROR;
   5456     UCollator* ucol;
   5457     UCollationResult strcollresult;
   5458     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
   5459     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
   5460     int32_t data1Len = sizeof(data1)/sizeof(*data1);
   5461     int32_t data2Len = sizeof(data2)/sizeof(*data2);
   5462     int32_t i, j;
   5463     uint8_t sortKey1[SORTKEYLEN];
   5464     uint8_t sortKey2[SORTKEYLEN];
   5465 
   5466     UCharIterator uiter1;
   5467     UCharIterator uiter2;
   5468     uint32_t state1[2] = { 0, 0 };
   5469     uint32_t state2[2] = { 0, 0 };
   5470     int32_t keySize1;
   5471     int32_t keySize2;
   5472 
   5473     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
   5474             &status);
   5475     if (U_FAILURE(status)) {
   5476         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
   5477         return;
   5478     }
   5479 
   5480     /* Start of full sort keys */
   5481     /* Full sort key1 */
   5482     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
   5483     /* Full sort key2 */
   5484     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
   5485     if (keySize1 == keySize2) {
   5486         for (i = 0; i < keySize1; i++) {
   5487             if (sortKey1[i] != sortKey2[i]) {
   5488                 log_err("Full sort keys are different. Should be equal.");
   5489             }
   5490         }
   5491     } else {
   5492         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
   5493     }
   5494     /* End of full sort keys */
   5495 
   5496     /* Start of partial sort keys */
   5497     /* Partial sort key1 */
   5498     uiter_setString(&uiter1, data1, data1Len);
   5499     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
   5500     /* Partial sort key2 */
   5501     uiter_setString(&uiter2, data2, data2Len);
   5502     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
   5503     if (U_SUCCESS(status) && keySize1 == keySize2) {
   5504         for (j = 0; j < keySize1; j++) {
   5505             if (sortKey1[j] != sortKey2[j]) {
   5506                 log_err("Partial sort keys are different. Should be equal");
   5507             }
   5508         }
   5509     } else {
   5510         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
   5511     }
   5512     /* End of partial sort keys */
   5513 
   5514     /* Start of strcoll */
   5515     /* Use ucol_strcoll() to determine ordering */
   5516     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
   5517     if (strcollresult != UCOL_EQUAL) {
   5518         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
   5519     }
   5520 
   5521     ucol_close(ucol);
   5522 }
   5523 
   5524 /* Convenient struct for running collation tests */
   5525 typedef struct {
   5526   const UChar source[MAX_TOKEN_LEN];  /* String on left */
   5527   const UChar target[MAX_TOKEN_LEN];  /* String on right */
   5528   UCollationResult result;            /* -1, 0 or +1, depending on collation */
   5529 } OneTestCase;
   5530 
   5531 /*
   5532  * Utility function to test one collation test case.
   5533  * @param testcases Array of test cases.
   5534  * @param n_testcases Size of the array testcases.
   5535  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
   5536  * @param n_rules Size of the array str_rules.
   5537  */
   5538 static void doTestOneTestCase(const OneTestCase testcases[],
   5539                               int n_testcases,
   5540                               const char* str_rules[],
   5541                               int n_rules)
   5542 {
   5543   int rule_no, testcase_no;
   5544   UChar rule[500];
   5545   int32_t length = 0;
   5546   UErrorCode status = U_ZERO_ERROR;
   5547   UParseError parse_error;
   5548   UCollator  *myCollation;
   5549 
   5550   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
   5551 
   5552     length = u_unescape(str_rules[rule_no], rule, 500);
   5553     if (length == 0) {
   5554         log_err("ERROR: The rule cannot be unescaped: %s\n");
   5555         return;
   5556     }
   5557     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
   5558     if(U_FAILURE(status)){
   5559         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5560         return;
   5561     }
   5562     log_verbose("Testing the <<* syntax\n");
   5563     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   5564     ucol_setStrength(myCollation, UCOL_TERTIARY);
   5565     for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
   5566       doTest(myCollation,
   5567              testcases[testcase_no].source,
   5568              testcases[testcase_no].target,
   5569              testcases[testcase_no].result
   5570              );
   5571     }
   5572     ucol_close(myCollation);
   5573   }
   5574 }
   5575 
   5576 const static OneTestCase rangeTestcases[] = {
   5577   { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
   5578   { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
   5579   { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
   5580 
   5581   { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
   5582   { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
   5583   { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
   5584   { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
   5585   { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
   5586 
   5587   { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
   5588   { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
   5589   { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
   5590   { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
   5591 
   5592   { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
   5593   { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
   5594   { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
   5595   { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
   5596   { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
   5597   { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
   5598   { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
   5599   { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
   5600 };
   5601 
   5602 static int nRangeTestcases = LEN(rangeTestcases);
   5603 
   5604 const static OneTestCase rangeTestcasesSupplemental[] = {
   5605   { {0xfffe},                            {0xffff},                          UCOL_LESS }, /* U+FFFE < U+FFFF */
   5606   { {0xffff},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFF < U+10000 */
   5607   { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
   5608   { {0xfffe},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+FFFE < U+10001 */
   5609   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
   5610   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
   5611   { {0xfffe},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+FFFE < U+10001 */
   5612 };
   5613 
   5614 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
   5615 
   5616 const static OneTestCase rangeTestcasesQwerty[] = {
   5617   { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
   5618   { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
   5619 
   5620   { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
   5621   { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
   5622 
   5623   { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
   5624   { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
   5625 
   5626   { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
   5627   { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
   5628 
   5629   { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
   5630     {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
   5631   { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
   5632     {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
   5633 };
   5634 
   5635 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
   5636 
   5637 static void TestSameStrengthList(void)
   5638 {
   5639   const char* strRules[] = {
   5640     /* Normal */
   5641     "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
   5642 
   5643     /* Lists */
   5644     "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
   5645   };
   5646   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   5647 }
   5648 
   5649 static void TestSameStrengthListQuoted(void)
   5650 {
   5651   const char* strRules[] = {
   5652     /* Lists with quoted characters */
   5653     "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
   5654     "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
   5655 
   5656     "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
   5657     "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
   5658 
   5659     "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
   5660     "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
   5661   };
   5662   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   5663 }
   5664 
   5665 static void TestSameStrengthListSupplemental(void)
   5666 {
   5667   const char* strRules[] = {
   5668     "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
   5669     "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
   5670     "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
   5671     "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
   5672   };
   5673   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
   5674 }
   5675 
   5676 static void TestSameStrengthListQwerty(void)
   5677 {
   5678   const char* strRules[] = {
   5679     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
   5680     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
   5681     "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
   5682     "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
   5683     "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
   5684 
   5685     /* Quoted characters also will work if two quoted characters are not consecutive.  */
   5686     "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
   5687 
   5688     /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
   5689     /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
   5690 
   5691  };
   5692   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
   5693 }
   5694 
   5695 static void TestSameStrengthListQuotedQwerty(void)
   5696 {
   5697   const char* strRules[] = {
   5698     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
   5699     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
   5700     "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
   5701 
   5702     /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
   5703     /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
   5704    };
   5705   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
   5706 }
   5707 
   5708 static void TestSameStrengthListRanges(void)
   5709 {
   5710   const char* strRules[] = {
   5711     "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
   5712   };
   5713   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   5714 }
   5715 
   5716 static void TestSameStrengthListSupplementalRanges(void)
   5717 {
   5718   const char* strRules[] = {
   5719     "&\\ufffe<*\\uffff-\\U00010002",
   5720   };
   5721   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
   5722 }
   5723 
   5724 static void TestSpecialCharacters(void)
   5725 {
   5726   const char* strRules[] = {
   5727     /* Normal */
   5728     "&';'<'+'<','<'-'<'&'<'*'",
   5729 
   5730     /* List */
   5731     "&';'<*'+,-&*'",
   5732 
   5733     /* Range */
   5734     "&';'<*'+'-'-&*'",
   5735   };
   5736 
   5737   const static OneTestCase specialCharacterStrings[] = {
   5738     { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
   5739     { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
   5740     { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
   5741     { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
   5742   };
   5743   doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
   5744 }
   5745 
   5746 static void TestPrivateUseCharacters(void)
   5747 {
   5748   const char* strRules[] = {
   5749     /* Normal */
   5750     "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
   5751     "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
   5752   };
   5753 
   5754   const static OneTestCase privateUseCharacterStrings[] = {
   5755     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   5756     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   5757     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   5758     { {0xe2da}, {0xe2db}, UCOL_LESS },
   5759     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   5760     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   5761   };
   5762   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5763 }
   5764 
   5765 static void TestPrivateUseCharactersInList(void)
   5766 {
   5767   const char* strRules[] = {
   5768     /* List */
   5769     "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
   5770     /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
   5771     "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
   5772   };
   5773 
   5774   const static OneTestCase privateUseCharacterStrings[] = {
   5775     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   5776     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   5777     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   5778     { {0xe2da}, {0xe2db}, UCOL_LESS },
   5779     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   5780     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   5781   };
   5782   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5783 }
   5784 
   5785 static void TestPrivateUseCharactersInRange(void)
   5786 {
   5787   const char* strRules[] = {
   5788     /* Range */
   5789     "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
   5790     "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
   5791     /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
   5792   };
   5793 
   5794   const static OneTestCase privateUseCharacterStrings[] = {
   5795     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   5796     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   5797     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   5798     { {0xe2da}, {0xe2db}, UCOL_LESS },
   5799     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   5800     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   5801   };
   5802   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5803 }
   5804 
   5805 static void TestInvalidListsAndRanges(void)
   5806 {
   5807   const char* invalidRules[] = {
   5808     /* Range not in starred expression */
   5809     "&\\ufffe<\\uffff-\\U00010002",
   5810 
   5811     /* Range without start */
   5812     "&a<*-c",
   5813 
   5814     /* Range without end */
   5815     "&a<*b-",
   5816 
   5817     /* More than one hyphen */
   5818     "&a<*b-g-l",
   5819 
   5820     /* Range in the wrong order */
   5821     "&a<*k-b",
   5822 
   5823   };
   5824 
   5825   UChar rule[500];
   5826   UErrorCode status = U_ZERO_ERROR;
   5827   UParseError parse_error;
   5828   int n_rules = LEN(invalidRules);
   5829   int rule_no;
   5830   int length;
   5831   UCollator  *myCollation;
   5832 
   5833   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
   5834 
   5835     length = u_unescape(invalidRules[rule_no], rule, 500);
   5836     if (length == 0) {
   5837         log_err("ERROR: The rule cannot be unescaped: %s\n");
   5838         return;
   5839     }
   5840     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
   5841     if(!U_FAILURE(status)){
   5842       log_err("ERROR: Could not cause a failure as expected: \n");
   5843     }
   5844     status = U_ZERO_ERROR;
   5845   }
   5846 }
   5847 
   5848 /*
   5849  * This test ensures that characters placed before a character in a different script have the same lead byte
   5850  * in their collation key before and after script reordering.
   5851  */
   5852 static void TestBeforeRuleWithScriptReordering(void)
   5853 {
   5854     UParseError error;
   5855     UErrorCode status = U_ZERO_ERROR;
   5856     UCollator  *myCollation;
   5857     char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
   5858     UChar rules[500];
   5859     uint32_t rulesLength = 0;
   5860     int32_t reorderCodes[1] = {USCRIPT_GREEK};
   5861     UCollationResult collResult;
   5862 
   5863     uint8_t baseKey[256];
   5864     uint32_t baseKeyLength;
   5865     uint8_t beforeKey[256];
   5866     uint32_t beforeKeyLength;
   5867 
   5868     UChar base[] = { 0x03b1 }; /* base */
   5869     int32_t baseLen = sizeof(base)/sizeof(*base);
   5870 
   5871     UChar before[] = { 0x0e01 }; /* ko kai */
   5872     int32_t beforeLen = sizeof(before)/sizeof(*before);
   5873 
   5874     /*UChar *data[] = { before, base };
   5875     genericRulesStarter(srules, data, 2);*/
   5876 
   5877     log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
   5878 
   5879 
   5880     /* build collator */
   5881     log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
   5882 
   5883     rulesLength = u_unescape(srules, rules, LEN(rules));
   5884     myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   5885     if(U_FAILURE(status)) {
   5886         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5887         return;
   5888     }
   5889 
   5890     /* check collation results - before rule applied but not script reordering */
   5891     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
   5892     if (collResult != UCOL_GREATER) {
   5893         log_err("Collation result not correct before script reordering = %d\n", collResult);
   5894     }
   5895 
   5896     /* check the lead byte of the collation keys before script reordering */
   5897     baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
   5898     beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
   5899     if (baseKey[0] != beforeKey[0]) {
   5900       log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
   5901    }
   5902 
   5903     /* reorder the scripts */
   5904     ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
   5905     if(U_FAILURE(status)) {
   5906         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
   5907         return;
   5908     }
   5909 
   5910     /* check collation results - before rule applied and after script reordering */
   5911     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
   5912     if (collResult != UCOL_GREATER) {
   5913         log_err("Collation result not correct after script reordering = %d\n", collResult);
   5914     }
   5915 
   5916     /* check the lead byte of the collation keys after script reordering */
   5917     ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
   5918     ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
   5919     if (baseKey[0] != beforeKey[0]) {
   5920         log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
   5921     }
   5922 
   5923     ucol_close(myCollation);
   5924 }
   5925 
   5926 /*
   5927  * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
   5928  */
   5929 static void TestNonLeadBytesDuringCollationReordering(void)
   5930 {
   5931     UErrorCode status = U_ZERO_ERROR;
   5932     UCollator  *myCollation;
   5933     int32_t reorderCodes[1] = {USCRIPT_GREEK};
   5934 
   5935     uint8_t baseKey[256];
   5936     uint32_t baseKeyLength;
   5937     uint8_t reorderKey[256];
   5938     uint32_t reorderKeyLength;
   5939 
   5940     UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
   5941 
   5942     uint32_t i;
   5943 
   5944 
   5945     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   5946 
   5947     /* build collator tertiary */
   5948     myCollation = ucol_open("", &status);
   5949     ucol_setStrength(myCollation, UCOL_TERTIARY);
   5950     if(U_FAILURE(status)) {
   5951         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5952         return;
   5953     }
   5954     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
   5955 
   5956     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   5957     if(U_FAILURE(status)) {
   5958         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   5959         return;
   5960     }
   5961     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
   5962 
   5963     if (baseKeyLength != reorderKeyLength) {
   5964         log_err("Key lengths not the same during reordering.\n");
   5965         return;
   5966     }
   5967 
   5968     for (i = 1; i < baseKeyLength; i++) {
   5969         if (baseKey[i] != reorderKey[i]) {
   5970             log_err("Collation key bytes not the same at position %d.\n", i);
   5971             return;
   5972         }
   5973     }
   5974     ucol_close(myCollation);
   5975 
   5976     /* build collator quaternary */
   5977     myCollation = ucol_open("", &status);
   5978     ucol_setStrength(myCollation, UCOL_QUATERNARY);
   5979     if(U_FAILURE(status)) {
   5980         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5981         return;
   5982     }
   5983     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
   5984 
   5985     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   5986     if(U_FAILURE(status)) {
   5987         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   5988         return;
   5989     }
   5990     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
   5991 
   5992     if (baseKeyLength != reorderKeyLength) {
   5993         log_err("Key lengths not the same during reordering.\n");
   5994         return;
   5995     }
   5996 
   5997     for (i = 1; i < baseKeyLength; i++) {
   5998         if (baseKey[i] != reorderKey[i]) {
   5999             log_err("Collation key bytes not the same at position %d.\n", i);
   6000             return;
   6001         }
   6002     }
   6003     ucol_close(myCollation);
   6004 }
   6005 
   6006 /*
   6007  * Test reordering API.
   6008  */
   6009 static void TestReorderingAPI(void)
   6010 {
   6011     UErrorCode status = U_ZERO_ERROR;
   6012     UCollator  *myCollation;
   6013     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   6014     int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS};
   6015     int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   6016     UCollationResult collResult;
   6017     int32_t retrievedReorderCodesLength;
   6018     int32_t retrievedReorderCodes[10];
   6019     UChar greekString[] = { 0x03b1 };
   6020     UChar punctuationString[] = { 0x203e };
   6021     int loopIndex;
   6022 
   6023     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   6024 
   6025     /* build collator tertiary */
   6026     myCollation = ucol_open("", &status);
   6027     ucol_setStrength(myCollation, UCOL_TERTIARY);
   6028     if(U_FAILURE(status)) {
   6029         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6030         return;
   6031     }
   6032 
   6033     /* set the reorderding */
   6034     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   6035     if (U_FAILURE(status)) {
   6036         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   6037         return;
   6038     }
   6039 
   6040     /* get the reordering */
   6041     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   6042     if (status != U_BUFFER_OVERFLOW_ERROR) {
   6043         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
   6044         return;
   6045     }
   6046     status = U_ZERO_ERROR;
   6047     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   6048         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   6049         return;
   6050     }
   6051     /* now let's really get it */
   6052     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   6053     if (U_FAILURE(status)) {
   6054         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   6055         return;
   6056     }
   6057     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   6058         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   6059         return;
   6060     }
   6061     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   6062         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   6063             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   6064             return;
   6065         }
   6066     }
   6067     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   6068     if (collResult != UCOL_LESS) {
   6069         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
   6070         return;
   6071     }
   6072 
   6073     /* clear the reordering */
   6074     ucol_setReorderCodes(myCollation, NULL, 0, &status);
   6075     if (U_FAILURE(status)) {
   6076         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
   6077         return;
   6078     }
   6079 
   6080     /* get the reordering again */
   6081     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   6082     if (retrievedReorderCodesLength != 0) {
   6083         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
   6084         return;
   6085     }
   6086 
   6087     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   6088     if (collResult != UCOL_GREATER) {
   6089         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   6090         return;
   6091     }
   6092 
   6093     /* test for error condition on duplicate reorder codes */
   6094     ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
   6095     if (!U_FAILURE(status)) {
   6096         log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
   6097         return;
   6098     }
   6099 
   6100     status = U_ZERO_ERROR;
   6101     /* test for reorder codes after a reset code */
   6102     ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status);
   6103     if (!U_FAILURE(status)) {
   6104         log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
   6105         return;
   6106     }
   6107 
   6108     ucol_close(myCollation);
   6109 }
   6110 
   6111 /*
   6112  * Test reordering API.
   6113  */
   6114 static void TestReorderingAPIWithRuleCreatedCollator(void)
   6115 {
   6116     UErrorCode status = U_ZERO_ERROR;
   6117     UCollator  *myCollation;
   6118     UChar rules[90];
   6119     int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
   6120     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   6121     UCollationResult collResult;
   6122     int32_t retrievedReorderCodesLength;
   6123     int32_t retrievedReorderCodes[10];
   6124     UChar greekString[] = { 0x03b1 };
   6125     UChar punctuationString[] = { 0x203e };
   6126     UChar hanString[] = { 0x65E5, 0x672C };
   6127     int loopIndex;
   6128 
   6129     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   6130 
   6131     /* build collator from rules */
   6132     u_uastrcpy(rules, "[reorder Hani Grek]");
   6133     myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
   6134     if(U_FAILURE(status)) {
   6135         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6136         return;
   6137     }
   6138 
   6139     /* get the reordering */
   6140     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   6141     if (U_FAILURE(status)) {
   6142         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   6143         return;
   6144     }
   6145     if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
   6146         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
   6147         return;
   6148     }
   6149     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   6150         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
   6151             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   6152             return;
   6153         }
   6154     }
   6155     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString));
   6156     if (collResult != UCOL_GREATER) {
   6157         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
   6158         return;
   6159     }
   6160 
   6161 
   6162     /* set the reorderding */
   6163     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   6164     if (U_FAILURE(status)) {
   6165         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   6166         return;
   6167     }
   6168 
   6169     /* get the reordering */
   6170     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   6171     if (status != U_BUFFER_OVERFLOW_ERROR) {
   6172         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
   6173         return;
   6174     }
   6175     status = U_ZERO_ERROR;
   6176     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   6177         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   6178         return;
   6179     }
   6180     /* now let's really get it */
   6181     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   6182     if (U_FAILURE(status)) {
   6183         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   6184         return;
   6185     }
   6186     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   6187         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   6188         return;
   6189     }
   6190     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   6191         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   6192             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   6193             return;
   6194         }
   6195     }
   6196     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   6197     if (collResult != UCOL_LESS) {
   6198         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
   6199         return;
   6200     }
   6201 
   6202     /* clear the reordering */
   6203     ucol_setReorderCodes(myCollation, NULL, 0, &status);
   6204     if (U_FAILURE(status)) {
   6205         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
   6206         return;
   6207     }
   6208 
   6209     /* get the reordering again */
   6210     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   6211     if (retrievedReorderCodesLength != 0) {
   6212         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
   6213         return;
   6214     }
   6215 
   6216     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   6217     if (collResult != UCOL_GREATER) {
   6218         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   6219         return;
   6220     }
   6221 
   6222     ucol_close(myCollation);
   6223 }
   6224 
   6225 static int compareUScriptCodes(const void * a, const void * b)
   6226 {
   6227   return ( *(int32_t*)a - *(int32_t*)b );
   6228 }
   6229 
   6230 static void TestEquivalentReorderingScripts(void) {
   6231     UErrorCode status = U_ZERO_ERROR;
   6232     int32_t equivalentScripts[50];
   6233     int32_t equivalentScriptsLength;
   6234     int loopIndex;
   6235     int32_t equivalentScriptsResult[] = {
   6236         USCRIPT_BOPOMOFO,
   6237         USCRIPT_LISU,
   6238         USCRIPT_LYCIAN,
   6239         USCRIPT_CARIAN,
   6240         USCRIPT_LYDIAN,
   6241         USCRIPT_YI,
   6242         USCRIPT_OLD_ITALIC,
   6243         USCRIPT_GOTHIC,
   6244         USCRIPT_DESERET,
   6245         USCRIPT_SHAVIAN,
   6246         USCRIPT_OSMANYA,
   6247         USCRIPT_LINEAR_B,
   6248         USCRIPT_CYPRIOT,
   6249         USCRIPT_OLD_SOUTH_ARABIAN,
   6250         USCRIPT_AVESTAN,
   6251         USCRIPT_IMPERIAL_ARAMAIC,
   6252         USCRIPT_INSCRIPTIONAL_PARTHIAN,
   6253         USCRIPT_INSCRIPTIONAL_PAHLAVI,
   6254         USCRIPT_UGARITIC,
   6255         USCRIPT_OLD_PERSIAN,
   6256         USCRIPT_CUNEIFORM,
   6257         USCRIPT_EGYPTIAN_HIEROGLYPHS,
   6258         USCRIPT_PHONETIC_POLLARD,
   6259         USCRIPT_SORA_SOMPENG,
   6260         USCRIPT_MEROITIC_CURSIVE,
   6261         USCRIPT_MEROITIC_HIEROGLYPHS
   6262     };
   6263 
   6264     qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes);
   6265 
   6266     /* UScript.GOTHIC */
   6267     equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
   6268     if (U_FAILURE(status)) {
   6269         log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
   6270         return;
   6271     }
   6272     /*
   6273     fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
   6274     fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
   6275     for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
   6276         fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
   6277     }
   6278     */
   6279     if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
   6280         log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
   6281         return;
   6282     }
   6283     for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
   6284         if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
   6285             log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
   6286             return;
   6287         }
   6288     }
   6289 
   6290     /* UScript.SHAVIAN */
   6291     equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
   6292     if (U_FAILURE(status)) {
   6293         log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
   6294         return;
   6295     }
   6296     if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
   6297         log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
   6298         return;
   6299     }
   6300     for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
   6301         if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
   6302             log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
   6303             return;
   6304         }
   6305     }
   6306 }
   6307 
   6308 static void TestReorderingAcrossCloning(void)
   6309 {
   6310     UErrorCode status = U_ZERO_ERROR;
   6311     UCollator  *myCollation;
   6312     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   6313     UCollator *clonedCollation;
   6314     int32_t bufferSize;
   6315     int32_t retrievedReorderCodesLength;
   6316     int32_t retrievedReorderCodes[10];
   6317     int loopIndex;
   6318 
   6319     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   6320 
   6321     /* build collator tertiary */
   6322     myCollation = ucol_open("", &status);
   6323     ucol_setStrength(myCollation, UCOL_TERTIARY);
   6324     if(U_FAILURE(status)) {
   6325         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6326         return;
   6327     }
   6328 
   6329     /* set the reorderding */
   6330     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   6331     if (U_FAILURE(status)) {
   6332         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   6333         return;
   6334     }
   6335 
   6336     /* clone the collator */
   6337     clonedCollation = ucol_safeClone(myCollation, NULL, &bufferSize, &status);
   6338     if (U_FAILURE(status)) {
   6339         log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
   6340         return;
   6341     }
   6342 
   6343     /* get the reordering */
   6344     retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   6345     if (U_FAILURE(status)) {
   6346         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   6347         return;
   6348     }
   6349     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   6350         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   6351         return;
   6352     }
   6353     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   6354         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   6355             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   6356             return;
   6357         }
   6358     }
   6359 
   6360     /*uprv_free(buffer);*/
   6361     ucol_close(myCollation);
   6362     ucol_close(clonedCollation);
   6363 }
   6364 
   6365 /*
   6366  * Utility function to test one collation reordering test case set.
   6367  * @param testcases Array of test cases.
   6368  * @param n_testcases Size of the array testcases.
   6369  * @param reorderTokens Array of reordering codes.
   6370  * @param reorderTokensLen Size of the array reorderTokens.
   6371  */
   6372 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
   6373 {
   6374     uint32_t testCaseNum;
   6375     UErrorCode status = U_ZERO_ERROR;
   6376     UCollator  *myCollation;
   6377 
   6378     myCollation = ucol_open("", &status);
   6379     if (U_FAILURE(status)) {
   6380         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6381         return;
   6382     }
   6383     ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
   6384     if(U_FAILURE(status)) {
   6385         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
   6386         return;
   6387     }
   6388 
   6389     for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
   6390         doTest(myCollation,
   6391             testCases[testCaseNum].source,
   6392             testCases[testCaseNum].target,
   6393             testCases[testCaseNum].result
   6394         );
   6395     }
   6396     ucol_close(myCollation);
   6397 }
   6398 
   6399 static void TestGreekFirstReorder(void)
   6400 {
   6401     const char* strRules[] = {
   6402         "[reorder Grek]"
   6403     };
   6404 
   6405     const int32_t apiRules[] = {
   6406         USCRIPT_GREEK
   6407     };
   6408 
   6409     const static OneTestCase privateUseCharacterStrings[] = {
   6410         { {0x0391}, {0x0391}, UCOL_EQUAL },
   6411         { {0x0041}, {0x0391}, UCOL_GREATER },
   6412         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
   6413         { {0x0060}, {0x0391}, UCOL_LESS },
   6414         { {0x0391}, {0xe2dc}, UCOL_LESS },
   6415         { {0x0391}, {0x0060}, UCOL_GREATER },
   6416     };
   6417 
   6418     /* Test rules creation */
   6419     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6420 
   6421     /* Test collation reordering API */
   6422     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   6423 }
   6424 
   6425 static void TestGreekLastReorder(void)
   6426 {
   6427     const char* strRules[] = {
   6428         "[reorder Zzzz Grek]"
   6429     };
   6430 
   6431     const int32_t apiRules[] = {
   6432         USCRIPT_UNKNOWN, USCRIPT_GREEK
   6433     };
   6434 
   6435     const static OneTestCase privateUseCharacterStrings[] = {
   6436         { {0x0391}, {0x0391}, UCOL_EQUAL },
   6437         { {0x0041}, {0x0391}, UCOL_LESS },
   6438         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
   6439         { {0x0060}, {0x0391}, UCOL_LESS },
   6440         { {0x0391}, {0xe2dc}, UCOL_GREATER },
   6441     };
   6442 
   6443     /* Test rules creation */
   6444     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6445 
   6446     /* Test collation reordering API */
   6447     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   6448 }
   6449 
   6450 static void TestNonScriptReorder(void)
   6451 {
   6452     const char* strRules[] = {
   6453         "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
   6454     };
   6455 
   6456     const int32_t apiRules[] = {
   6457         USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
   6458         UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
   6459         UCOL_REORDER_CODE_CURRENCY
   6460     };
   6461 
   6462     const static OneTestCase privateUseCharacterStrings[] = {
   6463         { {0x0391}, {0x0041}, UCOL_LESS },
   6464         { {0x0041}, {0x0391}, UCOL_GREATER },
   6465         { {0x0060}, {0x0041}, UCOL_LESS },
   6466         { {0x0060}, {0x0391}, UCOL_GREATER },
   6467         { {0x0024}, {0x0041}, UCOL_GREATER },
   6468     };
   6469 
   6470     /* Test rules creation */
   6471     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6472 
   6473     /* Test collation reordering API */
   6474     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   6475 }
   6476 
   6477 static void TestHaniReorder(void)
   6478 {
   6479     const char* strRules[] = {
   6480         "[reorder Hani]"
   6481     };
   6482     const int32_t apiRules[] = {
   6483         USCRIPT_HAN
   6484     };
   6485 
   6486     const static OneTestCase privateUseCharacterStrings[] = {
   6487         { {0x4e00}, {0x0041}, UCOL_LESS },
   6488         { {0x4e00}, {0x0060}, UCOL_GREATER },
   6489         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
   6490         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
   6491         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
   6492         { {0xfa27}, {0x0041}, UCOL_LESS },
   6493         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
   6494     };
   6495 
   6496     /* Test rules creation */
   6497     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6498 
   6499     /* Test collation reordering API */
   6500     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   6501 }
   6502 
   6503 static void TestHaniReorderWithOtherRules(void)
   6504 {
   6505     const char* strRules[] = {
   6506         "[reorder Hani] &b<a"
   6507     };
   6508     /*const int32_t apiRules[] = {
   6509         USCRIPT_HAN
   6510     };*/
   6511 
   6512     const static OneTestCase privateUseCharacterStrings[] = {
   6513         { {0x4e00}, {0x0041}, UCOL_LESS },
   6514         { {0x4e00}, {0x0060}, UCOL_GREATER },
   6515         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
   6516         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
   6517         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
   6518         { {0xfa27}, {0x0041}, UCOL_LESS },
   6519         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
   6520         { {0x0062}, {0x0061}, UCOL_LESS },
   6521     };
   6522 
   6523     /* Test rules creation */
   6524     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6525 }
   6526 
   6527 static void TestMultipleReorder(void)
   6528 {
   6529     const char* strRules[] = {
   6530         "[reorder Grek Zzzz DIGIT Latn Hani]"
   6531     };
   6532 
   6533     const int32_t apiRules[] = {
   6534         USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
   6535     };
   6536 
   6537     const static OneTestCase collationTestCases[] = {
   6538         { {0x0391}, {0x0041}, UCOL_LESS},
   6539         { {0x0031}, {0x0041}, UCOL_LESS},
   6540         { {0x0041}, {0x4e00}, UCOL_LESS},
   6541     };
   6542 
   6543     /* Test rules creation */
   6544     doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
   6545 
   6546     /* Test collation reordering API */
   6547     doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
   6548 }
   6549 
   6550 /*
   6551  * Test that covers issue reported in ticket 8814
   6552  */
   6553 static void TestReorderWithNumericCollation(void)
   6554 {
   6555     UErrorCode status = U_ZERO_ERROR;
   6556     UCollator  *myCollation;
   6557     UCollator  *myReorderCollation;
   6558     int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
   6559     /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
   6560     UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
   6561     UChar fortyS[] = { 0x0053 };
   6562     UChar fortyThreeP[] = { 0x0050 };
   6563     uint8_t fortyS_sortKey[128];
   6564     int32_t fortyS_sortKey_Length;
   6565     uint8_t fortyThreeP_sortKey[128];
   6566     int32_t fortyThreeP_sortKey_Length;
   6567     uint8_t fortyS_sortKey_reorder[128];
   6568     int32_t fortyS_sortKey_reorder_Length;
   6569     uint8_t fortyThreeP_sortKey_reorder[128];
   6570     int32_t fortyThreeP_sortKey_reorder_Length;
   6571     UCollationResult collResult;
   6572     UCollationResult collResultReorder;
   6573 
   6574     log_verbose("Testing reordering with and without numeric collation\n");
   6575 
   6576     /* build collator tertiary with numeric */
   6577     myCollation = ucol_open("", &status);
   6578     /*
   6579     ucol_setStrength(myCollation, UCOL_TERTIARY);
   6580     */
   6581     ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   6582     if(U_FAILURE(status)) {
   6583         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6584         return;
   6585     }
   6586 
   6587     /* build collator tertiary with numeric and reordering */
   6588     myReorderCollation = ucol_open("", &status);
   6589     /*
   6590     ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
   6591     */
   6592     ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   6593     ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status);
   6594     if(U_FAILURE(status)) {
   6595         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6596         return;
   6597     }
   6598 
   6599     fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128);
   6600     fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128);
   6601     fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128);
   6602     fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
   6603 
   6604     if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
   6605         log_err_status(status, "ERROR: couldn't generate sort keys\n");
   6606         return;
   6607     }
   6608     collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
   6609     collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
   6610     /*
   6611     fprintf(stderr, "\tcollResult = %x\n", collResult);
   6612     fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
   6613     fprintf(stderr, "\nfortyS\n");
   6614     for (i = 0; i < fortyS_sortKey_Length; i++) {
   6615         fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
   6616     }
   6617     fprintf(stderr, "\nfortyThreeP\n");
   6618     for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
   6619         fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
   6620     }
   6621     */
   6622     if (collResult != collResultReorder) {
   6623         log_err_status(status, "ERROR: collation results should have been the same.\n");
   6624         return;
   6625     }
   6626 
   6627     ucol_close(myCollation);
   6628     ucol_close(myReorderCollation);
   6629 }
   6630 
   6631 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
   6632 {
   6633   for (; *a == *b; ++a, ++b) {
   6634     if (*a == 0) {
   6635       return 0;
   6636     }
   6637   }
   6638   return (*a < *b ? -1 : 1);
   6639 }
   6640 
   6641 static void TestImportRulesDeWithPhonebook(void)
   6642 {
   6643   const char* normalRules[] = {
   6644     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
   6645     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
   6646     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
   6647   };
   6648   const OneTestCase normalTests[] = {
   6649     { {0x00e6}, {0x00c6}, UCOL_LESS},
   6650     { {0x00fc}, {0x00dc}, UCOL_GREATER},
   6651   };
   6652 
   6653   const char* importRules[] = {
   6654     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
   6655     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
   6656     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
   6657   };
   6658   const OneTestCase importTests[] = {
   6659     { {0x00e6}, {0x00c6}, UCOL_LESS},
   6660     { {0x00fc}, {0x00dc}, UCOL_LESS},
   6661   };
   6662 
   6663   doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules));
   6664   // BEGIN Android-remove. Android does not use rule-based collation0
   6665   // doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules));
   6666   // END Android-remove
   6667 }
   6668 
   6669 #if 0
   6670 static void TestImportRulesFiWithEor(void)
   6671 {
   6672   /* DUCET. */
   6673   const char* defaultRules[] = {
   6674     "&a<b",                                    /* Dummy rule. */
   6675   };
   6676 
   6677   const OneTestCase defaultTests[] = {
   6678     { {0x0110}, {0x00F0}, UCOL_LESS},
   6679     { {0x00a3}, {0x00a5}, UCOL_LESS},
   6680     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
   6681   };
   6682 
   6683   /* European Ordering rules: ignore currency characters. */
   6684   const char* eorRules[] = {
   6685     "[import root-u-co-eor]",
   6686   };
   6687 
   6688   const OneTestCase eorTests[] = {
   6689     { {0x0110}, {0x00F0}, UCOL_LESS},
   6690     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
   6691     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
   6692   };
   6693 
   6694   const char* fiStdRules[] = {
   6695     "[import fi-u-co-standard]",
   6696   };
   6697 
   6698   const OneTestCase fiStdTests[] = {
   6699     { {0x0110}, {0x00F0}, UCOL_GREATER},
   6700     { {0x00a3}, {0x00a5}, UCOL_LESS},
   6701     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
   6702   };
   6703 
   6704   /* Both European Ordering Rules and Fi Standard Rules. */
   6705   const char* eorFiStdRules[] = {
   6706     "[import root-u-co-eor][import fi-u-co-standard]",
   6707   };
   6708 
   6709   /* This is essentially same as the one before once fi.txt is updated with import. */
   6710   const char* fiEorRules[] = {
   6711     "[import fi-u-co-eor]",
   6712   };
   6713 
   6714   const OneTestCase fiEorTests[] = {
   6715     { {0x0110}, {0x00F0}, UCOL_GREATER},
   6716     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
   6717     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
   6718   };
   6719 
   6720   doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
   6721   doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));
   6722   doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));
   6723   doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules));
   6724 
   6725   /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
   6726         eor{
   6727             Sequence{
   6728                 "[import root-u-co-eor][import fi-u-co-standard]"
   6729             }
   6730             Version{"21.0"}
   6731         }
   6732   */
   6733   /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
   6734 
   6735 }
   6736 #endif
   6737 
   6738 #if 0
   6739 /*
   6740  * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
   6741  * the resource files are built with -includeUnihanColl option.
   6742  * TODO: Uncomment this function and make it work when unihan rules are built by default.
   6743  */
   6744 static void TestImportRulesCJKWithUnihan(void)
   6745 {
   6746   /* DUCET. */
   6747   const char* defaultRules[] = {
   6748     "&a<b",                                    /* Dummy rule. */
   6749   };
   6750 
   6751   const OneTestCase defaultTests[] = {
   6752     { {0x3402}, {0x4e1e}, UCOL_GREATER},
   6753   };
   6754 
   6755   /* European Ordering rules: ignore currency characters. */
   6756   const char* unihanRules[] = {
   6757     "[import ko-u-co-unihan]",
   6758   };
   6759 
   6760   const OneTestCase unihanTests[] = {
   6761     { {0x3402}, {0x4e1e}, UCOL_LESS},
   6762   };
   6763 
   6764   doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
   6765   doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules));
   6766 
   6767 }
   6768 #endif
   6769 
   6770 static void TestImport(void)
   6771 {
   6772     UCollator* vicoll;
   6773     UCollator* escoll;
   6774     UCollator* viescoll;
   6775     UCollator* importviescoll;
   6776     UParseError error;
   6777     UErrorCode status = U_ZERO_ERROR;
   6778     UChar* virules;
   6779     int32_t viruleslength;
   6780     UChar* esrules;
   6781     int32_t esruleslength;
   6782     UChar* viesrules;
   6783     int32_t viesruleslength;
   6784     char srules[500] = "[import vi][import es]";
   6785     UChar rules[500];
   6786     uint32_t length = 0;
   6787     int32_t itemCount;
   6788     int32_t i, k;
   6789     UChar32 start;
   6790     UChar32 end;
   6791     UChar str[500];
   6792     int32_t strLength;
   6793 
   6794     uint8_t sk1[500];
   6795     uint8_t sk2[500];
   6796 
   6797     UBool b;
   6798     USet* tailoredSet;
   6799     USet* importTailoredSet;
   6800 
   6801 
   6802     vicoll = ucol_open("vi", &status);
   6803     if(U_FAILURE(status)){
   6804         log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
   6805         return;
   6806     }
   6807 
   6808     virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
   6809     escoll = ucol_open("es", &status);
   6810     esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
   6811     viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
   6812     viesrules[0] = 0;
   6813     u_strcat(viesrules, virules);
   6814     u_strcat(viesrules, esrules);
   6815     viesruleslength = viruleslength + esruleslength;
   6816     viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   6817 
   6818     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
   6819     length = u_unescape(srules, rules, 500);
   6820     importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
   6821     if(U_FAILURE(status)){
   6822         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   6823         return;
   6824     }
   6825 
   6826     tailoredSet = ucol_getTailoredSet(viescoll, &status);
   6827     importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
   6828 
   6829     if(!uset_equals(tailoredSet, importTailoredSet)){
   6830         log_err("Tailored sets not equal");
   6831     }
   6832 
   6833     uset_close(importTailoredSet);
   6834 
   6835     itemCount = uset_getItemCount(tailoredSet);
   6836 
   6837     for( i = 0; i < itemCount; i++){
   6838         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
   6839         if(strLength < 2){
   6840             for (; start <= end; start++){
   6841                 k = 0;
   6842                 U16_APPEND(str, k, 500, start, b);
   6843                 ucol_getSortKey(viescoll, str, 1, sk1, 500);
   6844                 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
   6845                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
   6846                     log_err("Sort key for %s not equal\n", str);
   6847                     break;
   6848                 }
   6849             }
   6850         }else{
   6851             ucol_getSortKey(viescoll, str, strLength, sk1, 500);
   6852             ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
   6853             if(compare_uint8_t_arrays(sk1, sk2) != 0){
   6854                 log_err("ZZSort key for %s not equal\n", str);
   6855                 break;
   6856             }
   6857 
   6858         }
   6859     }
   6860 
   6861     uset_close(tailoredSet);
   6862 
   6863     uprv_free(viesrules);
   6864 
   6865     ucol_close(vicoll);
   6866     ucol_close(escoll);
   6867     ucol_close(viescoll);
   6868     ucol_close(importviescoll);
   6869 }
   6870 
   6871 static void TestImportWithType(void)
   6872 {
   6873     UCollator* vicoll;
   6874     UCollator* decoll;
   6875     UCollator* videcoll;
   6876     UCollator* importvidecoll;
   6877     UParseError error;
   6878     UErrorCode status = U_ZERO_ERROR;
   6879     const UChar* virules;
   6880     int32_t viruleslength;
   6881     const UChar* derules;
   6882     int32_t deruleslength;
   6883     UChar* viderules;
   6884     int32_t videruleslength;
   6885     const char srules[500] = "[import vi][import de-u-co-phonebk]";
   6886     UChar rules[500];
   6887     uint32_t length = 0;
   6888     int32_t itemCount;
   6889     int32_t i, k;
   6890     UChar32 start;
   6891     UChar32 end;
   6892     UChar str[500];
   6893     int32_t strLength;
   6894 
   6895     uint8_t sk1[500];
   6896     uint8_t sk2[500];
   6897 
   6898     USet* tailoredSet;
   6899     USet* importTailoredSet;
   6900 
   6901     vicoll = ucol_open("vi", &status);
   6902     if(U_FAILURE(status)){
   6903         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   6904         return;
   6905     }
   6906     virules = ucol_getRules(vicoll, &viruleslength);
   6907     /* decoll = ucol_open("de@collation=phonebook", &status); */
   6908     decoll = ucol_open("de-u-co-phonebk", &status);
   6909     if(U_FAILURE(status)){
   6910         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   6911         return;
   6912     }
   6913 
   6914 
   6915     derules = ucol_getRules(decoll, &deruleslength);
   6916     viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
   6917     viderules[0] = 0;
   6918     u_strcat(viderules, virules);
   6919     u_strcat(viderules, derules);
   6920     videruleslength = viruleslength + deruleslength;
   6921     videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   6922 
   6923     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
   6924     length = u_unescape(srules, rules, 500);
   6925     importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
   6926     if(U_FAILURE(status)){
   6927         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   6928         return;
   6929     }
   6930 
   6931     tailoredSet = ucol_getTailoredSet(videcoll, &status);
   6932     importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
   6933 
   6934     if(!uset_equals(tailoredSet, importTailoredSet)){
   6935         log_err("Tailored sets not equal");
   6936     }
   6937 
   6938     uset_close(importTailoredSet);
   6939 
   6940     itemCount = uset_getItemCount(tailoredSet);
   6941 
   6942     for( i = 0; i < itemCount; i++){
   6943         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
   6944         if(strLength < 2){
   6945             for (; start <= end; start++){
   6946                 k = 0;
   6947                 U16_APPEND_UNSAFE(str, k, start);
   6948                 ucol_getSortKey(videcoll, str, 1, sk1, 500);
   6949                 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
   6950                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
   6951                     log_err("Sort key for %s not equal\n", str);
   6952                     break;
   6953                 }
   6954             }
   6955         }else{
   6956             ucol_getSortKey(videcoll, str, strLength, sk1, 500);
   6957             ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
   6958             if(compare_uint8_t_arrays(sk1, sk2) != 0){
   6959                 log_err("Sort key for %s not equal\n", str);
   6960                 break;
   6961             }
   6962 
   6963         }
   6964     }
   6965 
   6966     uset_close(tailoredSet);
   6967 
   6968     uprv_free(viderules);
   6969 
   6970     ucol_close(videcoll);
   6971     ucol_close(importvidecoll);
   6972     ucol_close(vicoll);
   6973     ucol_close(decoll);
   6974 }
   6975 
   6976 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
   6977 static const UChar longUpperStr1[]= { /* 155 chars */
   6978     0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
   6979     0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
   6980     0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
   6981     0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
   6982     0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
   6983     0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
   6984     0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
   6985     0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
   6986     0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
   6987     0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
   6988 };
   6989 
   6990 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
   6991 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
   6992     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   6993     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   6994     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   6995     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   6996     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
   6997 };
   6998 
   6999 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
   7000 static const UChar longUpperStr3[]= { /* 324 chars */
   7001     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7002     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7003     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7004     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7005     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7006     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7007     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7008     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7009     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7010     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7011     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7012     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
   7013 };
   7014 
   7015 #define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
   7016 
   7017 typedef struct {
   7018     const UChar * longUpperStrPtr;
   7019     int32_t       longUpperStrLen;
   7020 } LongUpperStrItem;
   7021 
   7022 /* String pointers must be in reverse collation order of the corresponding strings */
   7023 static const LongUpperStrItem longUpperStrItems[] = {
   7024     { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
   7025     { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
   7026     { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
   7027     { NULL,          0                           }
   7028 };
   7029 
   7030 enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with collation changes */
   7031 
   7032 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
   7033 static void TestCaseLevelBufferOverflow(void)
   7034 {
   7035     UErrorCode status = U_ZERO_ERROR;
   7036     UCollator * ucol = ucol_open("root", &status);
   7037     if ( U_SUCCESS(status) ) {
   7038         ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
   7039         if ( U_SUCCESS(status) ) {
   7040             const LongUpperStrItem * itemPtr;
   7041             uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
   7042             for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
   7043                 int32_t sortKeyLen;
   7044                 if (itemPtr > longUpperStrItems) {
   7045                     uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
   7046                 }
   7047                 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
   7048                 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
   7049                     log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
   7050                     break;
   7051                 }
   7052                 if ( itemPtr > longUpperStrItems ) {
   7053                     int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
   7054                     if (compareResult >= 0) {
   7055                         log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
   7056                     }
   7057                 }
   7058             }
   7059         } else {
   7060             log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
   7061         }
   7062         ucol_close(ucol);
   7063     } else {
   7064         log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
   7065     }
   7066 }
   7067 
   7068 
   7069 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
   7070 
   7071 void addMiscCollTest(TestNode** root)
   7072 {
   7073     TEST(TestRuleOptions);
   7074     TEST(TestBeforePrefixFailure);
   7075     TEST(TestContractionClosure);
   7076     TEST(TestPrefixCompose);
   7077     TEST(TestStrCollIdenticalPrefix);
   7078     TEST(TestPrefix);
   7079     TEST(TestNewJapanese);
   7080     /*TEST(TestLimitations);*/
   7081     TEST(TestNonChars);
   7082     TEST(TestExtremeCompression);
   7083     TEST(TestSurrogates);
   7084     /* BEGIN android-removed
   7085        To save space, Android does not include the collation tailoring rules.
   7086        We skip the tailing tests for collations. */
   7087     /* TEST(TestVariableTopSetting); */
   7088     /* END android-removed */
   7089     TEST(TestBocsuCoverage);
   7090     TEST(TestCyrillicTailoring);
   7091     TEST(TestCase);
   7092     TEST(IncompleteCntTest);
   7093     TEST(BlackBirdTest);
   7094     TEST(FunkyATest);
   7095     TEST(BillFairmanTest);
   7096     TEST(RamsRulesTest);
   7097     TEST(IsTailoredTest);
   7098     TEST(TestCollations);
   7099     TEST(TestChMove);
   7100     TEST(TestImplicitTailoring);
   7101     TEST(TestFCDProblem);
   7102     TEST(TestEmptyRule);
   7103     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
   7104     TEST(TestJ815);
   7105     /*TEST(TestJ831);*/ /* we changed lv locale */
   7106     TEST(TestBefore);
   7107     TEST(TestRedundantRules);
   7108     TEST(TestExpansionSyntax);
   7109     TEST(TestHangulTailoring);
   7110     TEST(TestUCARules);
   7111     TEST(TestIncrementalNormalize);
   7112     TEST(TestComposeDecompose);
   7113     TEST(TestCompressOverlap);
   7114     TEST(TestContraction);
   7115     TEST(TestExpansion);
   7116     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
   7117     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
   7118     TEST(TestOptimize);
   7119     TEST(TestSuppressContractions);
   7120     TEST(Alexis2);
   7121     TEST(TestHebrewUCA);
   7122     TEST(TestPartialSortKeyTermination);
   7123     TEST(TestSettings);
   7124     TEST(TestEquals);
   7125     TEST(TestJ2726);
   7126     TEST(NullRule);
   7127     TEST(TestNumericCollation);
   7128     TEST(TestTibetanConformance);
   7129     TEST(TestPinyinProblem);
   7130     TEST(TestImplicitGeneration);
   7131     TEST(TestSeparateTrees);
   7132     TEST(TestBeforePinyin);
   7133     TEST(TestBeforeTightening);
   7134     /*TEST(TestMoreBefore);*/
   7135     TEST(TestTailorNULL);
   7136     TEST(TestUpperFirstQuaternary);
   7137     TEST(TestJ4960);
   7138     TEST(TestJ5223);
   7139     TEST(TestJ5232);
   7140     TEST(TestJ5367);
   7141     TEST(TestHiragana);
   7142     TEST(TestSortKeyConsistency);
   7143     TEST(TestVI5913);  /* VI, RO tailored rules */
   7144     TEST(TestCroatianSortKey);
   7145     TEST(TestTailor6179);
   7146     TEST(TestUCAPrecontext);
   7147     TEST(TestOutOfBuffer5468);
   7148     TEST(TestSameStrengthList);
   7149 
   7150     TEST(TestSameStrengthListQuoted);
   7151     TEST(TestSameStrengthListSupplemental);
   7152     TEST(TestSameStrengthListQwerty);
   7153     TEST(TestSameStrengthListQuotedQwerty);
   7154     TEST(TestSameStrengthListRanges);
   7155     TEST(TestSameStrengthListSupplementalRanges);
   7156     TEST(TestSpecialCharacters);
   7157     TEST(TestPrivateUseCharacters);
   7158     TEST(TestPrivateUseCharactersInList);
   7159     TEST(TestPrivateUseCharactersInRange);
   7160     TEST(TestInvalidListsAndRanges);
   7161     TEST(TestImportRulesDeWithPhonebook);
   7162     /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
   7163     /* TEST(TestImportRulesCJKWithUnihan); */
   7164     /* BEGIN android-removed: Due to Android does not include reverse UCA table.
   7165     TEST(TestImport);
   7166     TEST(TestImportWithType);
   7167     END android-removed */
   7168 
   7169     TEST(TestBeforeRuleWithScriptReordering);
   7170     TEST(TestNonLeadBytesDuringCollationReordering);
   7171     TEST(TestReorderingAPI);
   7172     TEST(TestReorderingAPIWithRuleCreatedCollator);
   7173     TEST(TestEquivalentReorderingScripts);
   7174     TEST(TestGreekFirstReorder);
   7175     TEST(TestGreekLastReorder);
   7176     TEST(TestNonScriptReorder);
   7177     TEST(TestHaniReorder);
   7178     TEST(TestHaniReorderWithOtherRules);
   7179     TEST(TestMultipleReorder);
   7180     TEST(TestReorderingAcrossCloning);
   7181     TEST(TestReorderWithNumericCollation);
   7182 
   7183     TEST(TestCaseLevelBufferOverflow);
   7184 }
   7185 
   7186 #endif /* #if !UCONFIG_NO_COLLATION */
   7187