Home | History | Annotate | Download | only in cintltst
      1 
      2 /********************************************************************
      3  * COPYRIGHT:
      4  * Copyright (c) 2001-2013, International Business Machines Corporation and
      5  * others. All Rights Reserved.
      6  ********************************************************************/
      7 /*******************************************************************************
      8 *
      9 * File cmsccoll.C
     10 *
     11 *******************************************************************************/
     12 /**
     13  * These are the tests specific to ICU 1.8 and above, that I didn't know where
     14  * to fit.
     15  */
     16 
     17 #include <stdio.h>
     18 
     19 #include "unicode/utypes.h"
     20 
     21 #if !UCONFIG_NO_COLLATION
     22 
     23 #include "unicode/ucol.h"
     24 #include "unicode/ucoleitr.h"
     25 #include "unicode/uloc.h"
     26 #include "cintltst.h"
     27 #include "ccolltst.h"
     28 #include "callcoll.h"
     29 #include "unicode/ustring.h"
     30 #include "string.h"
     31 #include "ucol_imp.h"
     32 #include "ucol_tok.h"
     33 #include "cmemory.h"
     34 #include "cstring.h"
     35 #include "uassert.h"
     36 #include "unicode/parseerr.h"
     37 #include "unicode/ucnv.h"
     38 #include "unicode/ures.h"
     39 #include "unicode/uscript.h"
     40 #include "unicode/utf16.h"
     41 #include "uparse.h"
     42 #include "putilimp.h"
     43 
     44 
     45 #define LEN(a) (sizeof(a)/sizeof(a[0]))
     46 
     47 #define MAX_TOKEN_LEN 16
     48 
     49 typedef UCollationResult tst_strcoll(void *collator, const int object,
     50                         const UChar *source, const int sLen,
     51                         const UChar *target, const int tLen);
     52 
     53 
     54 
     55 const static char cnt1[][10] = {
     56 
     57   "AA",
     58   "AC",
     59   "AZ",
     60   "AQ",
     61   "AB",
     62   "ABZ",
     63   "ABQ",
     64   "Z",
     65   "ABC",
     66   "Q",
     67   "B"
     68 };
     69 
     70 const static char cnt2[][10] = {
     71   "DA",
     72   "DAD",
     73   "DAZ",
     74   "MAR",
     75   "Z",
     76   "DAVIS",
     77   "MARK",
     78   "DAV",
     79   "DAVI"
     80 };
     81 
     82 static void IncompleteCntTest(void)
     83 {
     84   UErrorCode status = U_ZERO_ERROR;
     85   UChar temp[90];
     86   UChar t1[90];
     87   UChar t2[90];
     88 
     89   UCollator *coll =  NULL;
     90   uint32_t i = 0, j = 0;
     91   uint32_t size = 0;
     92 
     93   u_uastrcpy(temp, " & Z < ABC < Q < B");
     94 
     95   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
     96 
     97   if(U_SUCCESS(status)) {
     98     size = sizeof(cnt1)/sizeof(cnt1[0]);
     99     for(i = 0; i < size-1; i++) {
    100       for(j = i+1; j < size; j++) {
    101         UCollationElements *iter;
    102         u_uastrcpy(t1, cnt1[i]);
    103         u_uastrcpy(t2, cnt1[j]);
    104         doTest(coll, t1, t2, UCOL_LESS);
    105         /* synwee : added collation element iterator test */
    106         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    107         if (U_FAILURE(status)) {
    108           log_err("Creation of iterator failed\n");
    109           break;
    110         }
    111         backAndForth(iter);
    112         ucol_closeElements(iter);
    113       }
    114     }
    115   }
    116 
    117   ucol_close(coll);
    118 
    119 
    120   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
    121   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
    122 
    123   if(U_SUCCESS(status)) {
    124     size = sizeof(cnt2)/sizeof(cnt2[0]);
    125     for(i = 0; i < size-1; i++) {
    126       for(j = i+1; j < size; j++) {
    127         UCollationElements *iter;
    128         u_uastrcpy(t1, cnt2[i]);
    129         u_uastrcpy(t2, cnt2[j]);
    130         doTest(coll, t1, t2, UCOL_LESS);
    131 
    132         /* synwee : added collation element iterator test */
    133         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    134         if (U_FAILURE(status)) {
    135           log_err("Creation of iterator failed\n");
    136           break;
    137         }
    138         backAndForth(iter);
    139         ucol_closeElements(iter);
    140       }
    141     }
    142   }
    143 
    144   ucol_close(coll);
    145 
    146 
    147 }
    148 
    149 const static char shifted[][20] = {
    150   "black bird",
    151   "black-bird",
    152   "blackbird",
    153   "black Bird",
    154   "black-Bird",
    155   "blackBird",
    156   "black birds",
    157   "black-birds",
    158   "blackbirds"
    159 };
    160 
    161 const static UCollationResult shiftedTert[] = {
    162   UCOL_EQUAL,
    163   UCOL_EQUAL,
    164   UCOL_EQUAL,
    165   UCOL_LESS,
    166   UCOL_EQUAL,
    167   UCOL_EQUAL,
    168   UCOL_LESS,
    169   UCOL_EQUAL,
    170   UCOL_EQUAL
    171 };
    172 
    173 const static char nonignorable[][20] = {
    174   "black bird",
    175   "black Bird",
    176   "black birds",
    177   "black-bird",
    178   "black-Bird",
    179   "black-birds",
    180   "blackbird",
    181   "blackBird",
    182   "blackbirds"
    183 };
    184 
    185 static void BlackBirdTest(void) {
    186   UErrorCode status = U_ZERO_ERROR;
    187   UChar t1[90];
    188   UChar t2[90];
    189 
    190   uint32_t i = 0, j = 0;
    191   uint32_t size = 0;
    192   UCollator *coll = ucol_open("en_US", &status);
    193 
    194   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
    195   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
    196 
    197   if(U_SUCCESS(status)) {
    198     size = sizeof(nonignorable)/sizeof(nonignorable[0]);
    199     for(i = 0; i < size-1; i++) {
    200       for(j = i+1; j < size; j++) {
    201         u_uastrcpy(t1, nonignorable[i]);
    202         u_uastrcpy(t2, nonignorable[j]);
    203         doTest(coll, t1, t2, UCOL_LESS);
    204       }
    205     }
    206   }
    207 
    208   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
    209   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
    210 
    211   if(U_SUCCESS(status)) {
    212     size = sizeof(shifted)/sizeof(shifted[0]);
    213     for(i = 0; i < size-1; i++) {
    214       for(j = i+1; j < size; j++) {
    215         u_uastrcpy(t1, shifted[i]);
    216         u_uastrcpy(t2, shifted[j]);
    217         doTest(coll, t1, t2, UCOL_LESS);
    218       }
    219     }
    220   }
    221 
    222   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
    223   if(U_SUCCESS(status)) {
    224     size = sizeof(shifted)/sizeof(shifted[0]);
    225     for(i = 1; i < size; i++) {
    226       u_uastrcpy(t1, shifted[i-1]);
    227       u_uastrcpy(t2, shifted[i]);
    228       doTest(coll, t1, t2, shiftedTert[i]);
    229     }
    230   }
    231 
    232   ucol_close(coll);
    233 }
    234 
    235 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
    236     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
    237     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
    238     {0x0041/*'A'*/, 0x0300, 0x0000},
    239     {0x00C0, 0x0301, 0x0000},
    240     /* this would work with forced normalization */
    241     {0x00C0, 0x0316, 0x0000}
    242 };
    243 
    244 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
    245     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    246     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
    247     {0x00C0, 0},
    248     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    249     /* this would work with forced normalization */
    250     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
    251 };
    252 
    253 const static UCollationResult results[] = {
    254     UCOL_GREATER,
    255     UCOL_EQUAL,
    256     UCOL_EQUAL,
    257     UCOL_GREATER,
    258     UCOL_EQUAL
    259 };
    260 
    261 static void FunkyATest(void)
    262 {
    263 
    264     int32_t i;
    265     UErrorCode status = U_ZERO_ERROR;
    266     UCollator  *myCollation;
    267     myCollation = ucol_open("en_US", &status);
    268     if(U_FAILURE(status)){
    269         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    270         return;
    271     }
    272     log_verbose("Testing some A letters, for some reason\n");
    273     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    274     ucol_setStrength(myCollation, UCOL_TERTIARY);
    275     for (i = 0; i < 4 ; i++)
    276     {
    277         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    278     }
    279     ucol_close(myCollation);
    280 }
    281 
    282 UColAttributeValue caseFirst[] = {
    283     UCOL_OFF,
    284     UCOL_LOWER_FIRST,
    285     UCOL_UPPER_FIRST
    286 };
    287 
    288 
    289 UColAttributeValue alternateHandling[] = {
    290     UCOL_NON_IGNORABLE,
    291     UCOL_SHIFTED
    292 };
    293 
    294 UColAttributeValue caseLevel[] = {
    295     UCOL_OFF,
    296     UCOL_ON
    297 };
    298 
    299 UColAttributeValue strengths[] = {
    300     UCOL_PRIMARY,
    301     UCOL_SECONDARY,
    302     UCOL_TERTIARY,
    303     UCOL_QUATERNARY,
    304     UCOL_IDENTICAL
    305 };
    306 
    307 #if 0
    308 static const char * strengthsC[] = {
    309     "UCOL_PRIMARY",
    310     "UCOL_SECONDARY",
    311     "UCOL_TERTIARY",
    312     "UCOL_QUATERNARY",
    313     "UCOL_IDENTICAL"
    314 };
    315 
    316 static const char * caseFirstC[] = {
    317     "UCOL_OFF",
    318     "UCOL_LOWER_FIRST",
    319     "UCOL_UPPER_FIRST"
    320 };
    321 
    322 
    323 static const char * alternateHandlingC[] = {
    324     "UCOL_NON_IGNORABLE",
    325     "UCOL_SHIFTED"
    326 };
    327 
    328 static const char * caseLevelC[] = {
    329     "UCOL_OFF",
    330     "UCOL_ON"
    331 };
    332 
    333 /* not used currently - does not test only prints */
    334 static void PrintMarkDavis(void)
    335 {
    336   UErrorCode status = U_ZERO_ERROR;
    337   UChar m[256];
    338   uint8_t sortkey[256];
    339   UCollator *coll = ucol_open("en_US", &status);
    340   uint32_t h,i,j,k, sortkeysize;
    341   uint32_t sizem = 0;
    342   char buffer[512];
    343   uint32_t len = 512;
    344 
    345   log_verbose("PrintMarkDavis");
    346 
    347   u_uastrcpy(m, "Mark Davis");
    348   sizem = u_strlen(m);
    349 
    350 
    351   m[1] = 0xe4;
    352 
    353   for(i = 0; i<sizem; i++) {
    354     fprintf(stderr, "\\u%04X ", m[i]);
    355   }
    356   fprintf(stderr, "\n");
    357 
    358   for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
    359     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
    360     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
    361 
    362     for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
    363       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
    364       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
    365 
    366       for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
    367         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
    368         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
    369 
    370         for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
    371           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
    372           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
    373           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
    374           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
    375         }
    376 
    377       }
    378 
    379     }
    380 
    381   }
    382 }
    383 #endif
    384 
    385 static void BillFairmanTest(void) {
    386 /*
    387 ** check for actual locale via ICU resource bundles
    388 **
    389 ** lp points to the original locale ("fr_FR_....")
    390 */
    391 
    392     UResourceBundle *lr,*cr;
    393     UErrorCode              lec = U_ZERO_ERROR;
    394     const char *lp = "fr_FR_you_ll_never_find_this_locale";
    395 
    396     log_verbose("BillFairmanTest\n");
    397 
    398     lr = ures_open(NULL,lp,&lec);
    399     if (lr) {
    400         cr = ures_getByKey(lr,"collations",0,&lec);
    401         if (cr) {
    402             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
    403             if (lp) {
    404                 if (U_SUCCESS(lec)) {
    405                     if(strcmp(lp, "fr") != 0) {
    406                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
    407                     }
    408                 }
    409             }
    410             ures_close(cr);
    411         }
    412         ures_close(lr);
    413     }
    414 }
    415 
    416 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
    417     UChar source[256] = { '\0'};
    418     UChar target[256] = { '\0'};
    419     UChar preP = 0x31a3;
    420     UChar preQ = 0x310d;
    421 /*
    422     UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
    423     UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
    424 */
    425     /*log_verbose("Testing primary\n");*/
    426 
    427     doTest(col, p, q, UCOL_LESS);
    428 /*
    429     UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
    430 
    431     if(result!=UCOL_LESS){
    432        aescstrdup(p,utfSource,256);
    433        aescstrdup(q,utfTarget,256);
    434        fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
    435     }
    436 */
    437     source[0] = preP;
    438     u_strcpy(source+1,p);
    439     target[0] = preQ;
    440     u_strcpy(target+1,q);
    441     doTest(col, source, target, UCOL_LESS);
    442 /*
    443     fprintf(file,"Primary swamps 2nd failed  source: %s target: %s \n", utfSource,utfTarget);
    444 */
    445 }
    446 
    447 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
    448     UChar source[256] = { '\0'};
    449     UChar target[256] = { '\0'};
    450 
    451     /*log_verbose("Testing secondary\n");*/
    452 
    453     doTest(col, p, q, UCOL_LESS);
    454 /*
    455     fprintf(file,"secondary failed  source: %s target: %s \n", utfSource,utfTarget);
    456 */
    457     source[0] = 0x0053;
    458     u_strcpy(source+1,p);
    459     target[0]= 0x0073;
    460     u_strcpy(target+1,q);
    461 
    462     doTest(col, source, target, UCOL_LESS);
    463 /*
    464     fprintf(file,"secondary swamps 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
    465 */
    466 
    467 
    468     u_strcpy(source,p);
    469     source[u_strlen(p)] = 0x62;
    470     source[u_strlen(p)+1] = 0;
    471 
    472 
    473     u_strcpy(target,q);
    474     target[u_strlen(q)] = 0x61;
    475     target[u_strlen(q)+1] = 0;
    476 
    477     doTest(col, source, target, UCOL_GREATER);
    478 
    479 /*
    480     fprintf(file,"secondary is swamped by 1  failed  source: %s target: %s \n",utfSource,utfTarget);
    481 */
    482 }
    483 
    484 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
    485     UChar source[256] = { '\0'};
    486     UChar target[256] = { '\0'};
    487 
    488     /*log_verbose("Testing tertiary\n");*/
    489 
    490     doTest(col, p, q, UCOL_LESS);
    491 /*
    492     fprintf(file,"Tertiary failed  source: %s target: %s \n",utfSource,utfTarget);
    493 */
    494     source[0] = 0x0020;
    495     u_strcpy(source+1,p);
    496     target[0]= 0x002D;
    497     u_strcpy(target+1,q);
    498 
    499     doTest(col, source, target, UCOL_LESS);
    500 /*
    501     fprintf(file,"Tertiary swamps 4th failed  source: %s target: %s \n", utfSource,utfTarget);
    502 */
    503 
    504     u_strcpy(source,p);
    505     source[u_strlen(p)] = 0xE0;
    506     source[u_strlen(p)+1] = 0;
    507 
    508     u_strcpy(target,q);
    509     target[u_strlen(q)] = 0x61;
    510     target[u_strlen(q)+1] = 0;
    511 
    512     doTest(col, source, target, UCOL_GREATER);
    513 
    514 /*
    515     fprintf(file,"Tertiary is swamped by 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
    516 */
    517 }
    518 
    519 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
    520 /*
    521     UChar source[256] = { '\0'};
    522     UChar target[256] = { '\0'};
    523 */
    524 
    525     doTest(col, p, q, UCOL_EQUAL);
    526 /*
    527     fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
    528 */
    529 }
    530 
    531 static void testCollator(UCollator *coll, UErrorCode *status) {
    532   const UChar *rules = NULL, *current = NULL;
    533   int32_t ruleLen = 0;
    534   uint32_t strength = 0;
    535   uint32_t chOffset = 0; uint32_t chLen = 0;
    536   uint32_t exOffset = 0; uint32_t exLen = 0;
    537   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
    538   uint32_t firstEx = 0;
    539 /*  uint32_t rExpsLen = 0; */
    540   uint32_t firstLen = 0;
    541   UBool varT = FALSE; UBool top_ = TRUE;
    542   uint16_t specs = 0;
    543   UBool startOfRules = TRUE;
    544   UBool lastReset = FALSE;
    545   UBool before = FALSE;
    546   uint32_t beforeStrength = 0;
    547   UColTokenParser src;
    548   UColOptionSet opts;
    549 
    550   UChar first[256];
    551   UChar second[256];
    552   UChar tempB[256];
    553   uint32_t tempLen;
    554   UChar *rulesCopy = NULL;
    555   UParseError parseError;
    556 
    557   uprv_memset(&src, 0, sizeof(UColTokenParser));
    558 
    559   src.opts = &opts;
    560 
    561   rules = ucol_getRules(coll, &ruleLen);
    562   if(U_SUCCESS(*status) && ruleLen > 0) {
    563     rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
    564     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
    565     src.current = src.source = rulesCopy;
    566     src.end = rulesCopy+ruleLen;
    567     src.extraCurrent = src.end;
    568     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
    569     *first = *second = 0;
    570 
    571 	/* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
    572 	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
    573     while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
    574       strength = src.parsedToken.strength;
    575       chOffset = src.parsedToken.charsOffset;
    576       chLen = src.parsedToken.charsLen;
    577       exOffset = src.parsedToken.extensionOffset;
    578       exLen = src.parsedToken.extensionLen;
    579       prefixOffset = src.parsedToken.prefixOffset;
    580       prefixLen = src.parsedToken.prefixLen;
    581       specs = src.parsedToken.flags;
    582 
    583       startOfRules = FALSE;
    584       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
    585       (void)varT;    /* Suppress set but not used warning. */
    586       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
    587       if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
    588         second[0] = 0;
    589       } else {
    590         u_strncpy(second,src.source+chOffset, chLen);
    591         second[chLen] = 0;
    592 
    593         if(exLen > 0 && firstEx == 0) {
    594           u_strncat(first, src.source+exOffset, exLen);
    595           first[firstLen+exLen] = 0;
    596         }
    597 
    598         if(lastReset == TRUE && prefixLen != 0) {
    599           u_strncpy(first+prefixLen, first, firstLen);
    600           u_strncpy(first, src.source+prefixOffset, prefixLen);
    601           first[firstLen+prefixLen] = 0;
    602           firstLen = firstLen+prefixLen;
    603         }
    604 
    605         if(before == TRUE) { /* swap first and second */
    606           u_strcpy(tempB, first);
    607           u_strcpy(first, second);
    608           u_strcpy(second, tempB);
    609 
    610           tempLen = firstLen;
    611           firstLen = chLen;
    612           chLen = tempLen;
    613 
    614           tempLen = firstEx;
    615           firstEx = exLen;
    616           exLen = tempLen;
    617           if(beforeStrength < strength) {
    618             strength = beforeStrength;
    619           }
    620         }
    621       }
    622       lastReset = FALSE;
    623 
    624       switch(strength){
    625       case UCOL_IDENTICAL:
    626           testEquality(coll,first,second);
    627           break;
    628       case UCOL_PRIMARY:
    629           testPrimary(coll,first,second);
    630           break;
    631       case UCOL_SECONDARY:
    632           testSecondary(coll,first,second);
    633           break;
    634       case UCOL_TERTIARY:
    635           testTertiary(coll,first,second);
    636           break;
    637       case UCOL_TOK_RESET:
    638         lastReset = TRUE;
    639         before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
    640         if(before) {
    641           beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
    642         }
    643         break;
    644       default:
    645           break;
    646       }
    647 
    648       if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
    649         before = FALSE;
    650       } else {
    651         firstLen = chLen;
    652         firstEx = exLen;
    653         u_strcpy(first, second);
    654       }
    655     }
    656     uprv_free(src.source);
    657     uprv_free(src.reorderCodes);
    658   }
    659 }
    660 
    661 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
    662   UCollator *UCA = (UCollator *)collator;
    663   return ucol_strcoll(UCA, source, sLen, target, tLen);
    664 }
    665 
    666 /*
    667 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
    668 #if U_PLATFORM_HAS_WIN32_API
    669   LCID lcid = (LCID)collator;
    670   return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
    671 #else
    672   return 0;
    673 #endif
    674 }
    675 */
    676 
    677 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
    678                                      UChar s1, UChar s2,
    679                                      const UChar *s, const uint32_t sLen,
    680                                      const UChar *t, const uint32_t tLen) {
    681   UChar source[256] = {0};
    682   UChar target[256] = {0};
    683 
    684   source[0] = s1;
    685   u_strcpy(source+1, s);
    686   target[0] = s2;
    687   u_strcpy(target+1, t);
    688 
    689   return func(collator, opts, source, sLen+1, target, tLen+1);
    690 }
    691 
    692 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
    693                                    UChar s1, UChar s2,
    694                                    const UChar *s, const uint32_t sLen,
    695                                    const UChar *t, const uint32_t tLen) {
    696   UChar source[256] = {0};
    697   UChar target[256] = {0};
    698 
    699   u_strcpy(source, s);
    700   source[sLen] = s1;
    701   u_strcpy(target, t);
    702   target[tLen] = s2;
    703 
    704   return func(collator, opts, source, sLen+1, target, tLen+1);
    705 }
    706 
    707 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
    708                               const UChar *s, const uint32_t sLen,
    709                               const UChar *t, const uint32_t tLen,
    710                               UCollationResult result) {
    711   /*UChar fPrimary = 0x6d;*/
    712   /*UChar sPrimary = 0x6e;*/
    713   UChar fSecondary = 0x310d;
    714   UChar sSecondary = 0x31a3;
    715   UChar fTertiary = 0x310f;
    716   UChar sTertiary = 0x31b7;
    717 
    718   UCollationResult oposite;
    719   if(result == UCOL_EQUAL) {
    720     return UCOL_IDENTICAL;
    721   } else if(result == UCOL_GREATER) {
    722     oposite = UCOL_LESS;
    723   } else {
    724     oposite = UCOL_GREATER;
    725   }
    726 
    727   if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
    728     return UCOL_PRIMARY;
    729   } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
    730     (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
    731     return UCOL_SECONDARY;
    732   } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
    733     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
    734     return UCOL_TERTIARY;
    735   } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
    736     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
    737     return UCOL_QUATERNARY;
    738   } else {
    739     return UCOL_IDENTICAL;
    740   }
    741 }
    742 
    743 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
    744   uint32_t i = 0;
    745 
    746   if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
    747     buffer[0] = '=';
    748     buffer[1] = '=';
    749     buffer[2] = '\0';
    750   } else if(res == UCOL_GREATER) {
    751     for(i = 0; i<strength+1; i++) {
    752       buffer[i] = '>';
    753     }
    754     buffer[strength+1] = '\0';
    755   } else {
    756     for(i = 0; i<strength+1; i++) {
    757       buffer[i] = '<';
    758     }
    759     buffer[strength+1] = '\0';
    760   }
    761 
    762   return buffer;
    763 }
    764 
    765 
    766 
    767 static void logFailure (const char *platform, const char *test,
    768                         const UChar *source, const uint32_t sLen,
    769                         const UChar *target, const uint32_t tLen,
    770                         UCollationResult realRes, uint32_t realStrength,
    771                         UCollationResult expRes, uint32_t expStrength, UBool error) {
    772 
    773   uint32_t i = 0;
    774 
    775   char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
    776   static int32_t maxOutputLength = 0;
    777   int32_t outputLength;
    778 
    779   *sEsc = *tEsc = *s = *t = 0;
    780   if(error == TRUE) {
    781     log_err("Difference between expected and generated order. Run test with -v for more info\n");
    782   } else if(getTestOption(VERBOSITY_OPTION) == 0) {
    783     return;
    784   }
    785   for(i = 0; i<sLen; i++) {
    786     sprintf(b, "%04X", source[i]);
    787     strcat(sEsc, "\\u");
    788     strcat(sEsc, b);
    789     strcat(s, b);
    790     strcat(s, " ");
    791     if(source[i] < 0x80) {
    792       sprintf(b, "(%c)", source[i]);
    793       strcat(sEsc, b);
    794     }
    795   }
    796   for(i = 0; i<tLen; i++) {
    797     sprintf(b, "%04X", target[i]);
    798     strcat(tEsc, "\\u");
    799     strcat(tEsc, b);
    800     strcat(t, b);
    801     strcat(t, " ");
    802     if(target[i] < 0x80) {
    803       sprintf(b, "(%c)", target[i]);
    804       strcat(tEsc, b);
    805     }
    806   }
    807 /*
    808   strcpy(output, "[[ ");
    809   strcat(output, sEsc);
    810   strcat(output, getRelationSymbol(expRes, expStrength, relation));
    811   strcat(output, tEsc);
    812 
    813   strcat(output, " : ");
    814 
    815   strcat(output, sEsc);
    816   strcat(output, getRelationSymbol(realRes, realStrength, relation));
    817   strcat(output, tEsc);
    818   strcat(output, " ]] ");
    819 
    820   log_verbose("%s", output);
    821 */
    822 
    823 
    824   strcpy(output, "DIFF: ");
    825 
    826   strcat(output, s);
    827   strcat(output, " : ");
    828   strcat(output, t);
    829 
    830   strcat(output, test);
    831   strcat(output, ": ");
    832 
    833   strcat(output, sEsc);
    834   strcat(output, getRelationSymbol(expRes, expStrength, relation));
    835   strcat(output, tEsc);
    836 
    837   strcat(output, " ");
    838 
    839   strcat(output, platform);
    840   strcat(output, ": ");
    841 
    842   strcat(output, sEsc);
    843   strcat(output, getRelationSymbol(realRes, realStrength, relation));
    844   strcat(output, tEsc);
    845 
    846   outputLength = (int32_t)strlen(output);
    847   if(outputLength > maxOutputLength) {
    848     maxOutputLength = outputLength;
    849     U_ASSERT(outputLength < sizeof(output));
    850   }
    851 
    852   log_verbose("%s\n", output);
    853 
    854 }
    855 
    856 /*
    857 static void printOutRules(const UChar *rules) {
    858   uint32_t len = u_strlen(rules);
    859   uint32_t i = 0;
    860   char toPrint;
    861   uint32_t line = 0;
    862 
    863   fprintf(stdout, "Rules:");
    864 
    865   for(i = 0; i<len; i++) {
    866     if(rules[i]<0x7f && rules[i]>=0x20) {
    867       toPrint = (char)rules[i];
    868       if(toPrint == '&') {
    869         line = 1;
    870         fprintf(stdout, "\n&");
    871       } else if(toPrint == ';') {
    872         fprintf(stdout, "<<");
    873         line+=2;
    874       } else if(toPrint == ',') {
    875         fprintf(stdout, "<<<");
    876         line+=3;
    877       } else {
    878         fprintf(stdout, "%c", toPrint);
    879         line++;
    880       }
    881     } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
    882       fprintf(stdout, "\\u%04X", rules[i]);
    883       line+=6;
    884     }
    885     if(line>72) {
    886       fprintf(stdout, "\n");
    887       line = 0;
    888     }
    889   }
    890 
    891   log_verbose("\n");
    892 
    893 }
    894 */
    895 
    896 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
    897   uint32_t diffs = 0;
    898   UCollationResult realResult;
    899   uint32_t realStrength;
    900 
    901   uint32_t sLen = u_strlen(first);
    902   uint32_t tLen = u_strlen(second);
    903 
    904   realResult = func(collator, opts, first, sLen, second, tLen);
    905   realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
    906 
    907   if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) {
    908     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
    909     diffs++;
    910   } else if(realResult != UCOL_LESS || realStrength != strength) {
    911     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
    912     diffs++;
    913   }
    914   return diffs;
    915 }
    916 
    917 
    918 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
    919   const UChar *rules = NULL, *current = NULL;
    920   int32_t ruleLen = 0;
    921   uint32_t strength = 0;
    922   uint32_t chOffset = 0; uint32_t chLen = 0;
    923   uint32_t exOffset = 0; uint32_t exLen = 0;
    924   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
    925 /*  uint32_t rExpsLen = 0; */
    926   uint32_t firstLen = 0, secondLen = 0;
    927   UBool varT = FALSE; UBool top_ = TRUE;
    928   uint16_t specs = 0;
    929   UBool startOfRules = TRUE;
    930   UColTokenParser src;
    931   UColOptionSet opts;
    932 
    933   UChar first[256];
    934   UChar second[256];
    935   UChar *rulesCopy = NULL;
    936 
    937   uint32_t UCAdiff = 0;
    938   uint32_t Windiff = 1;
    939   UParseError parseError;
    940 
    941   (void)top_;      /* Suppress set but not used warnings. */
    942   (void)varT;
    943   (void)secondLen;
    944   (void)prefixLen;
    945   (void)prefixOffset;
    946 
    947   uprv_memset(&src, 0, sizeof(UColTokenParser));
    948   src.opts = &opts;
    949 
    950   rules = ucol_getRules(coll, &ruleLen);
    951 
    952   /*printOutRules(rules);*/
    953 
    954   if(U_SUCCESS(*status) && ruleLen > 0) {
    955     rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
    956     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
    957     src.current = src.source = rulesCopy;
    958     src.end = rulesCopy+ruleLen;
    959     src.extraCurrent = src.end;
    960     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
    961     *first = *second = 0;
    962 
    963     /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
    964        the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
    965     while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
    966       strength = src.parsedToken.strength;
    967       chOffset = src.parsedToken.charsOffset;
    968       chLen = src.parsedToken.charsLen;
    969       exOffset = src.parsedToken.extensionOffset;
    970       exLen = src.parsedToken.extensionLen;
    971       prefixOffset = src.parsedToken.prefixOffset;
    972       prefixLen = src.parsedToken.prefixLen;
    973       specs = src.parsedToken.flags;
    974 
    975       startOfRules = FALSE;
    976       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
    977       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
    978 
    979       u_strncpy(second,src.source+chOffset, chLen);
    980       second[chLen] = 0;
    981       secondLen = chLen;
    982 
    983       if(exLen > 0) {
    984         u_strncat(first, src.source+exOffset, exLen);
    985         first[firstLen+exLen] = 0;
    986         firstLen += exLen;
    987       }
    988 
    989       if(strength != UCOL_TOK_RESET) {
    990         if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
    991           UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
    992           /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
    993         }
    994       }
    995 
    996 
    997       firstLen = chLen;
    998       u_strcpy(first, second);
    999 
   1000     }
   1001     if(UCAdiff != 0 && Windiff != 0) {
   1002       log_verbose("\n");
   1003     }
   1004     if(UCAdiff == 0) {
   1005       log_verbose("No immediate difference with %s!\n", refName);
   1006     }
   1007     if(Windiff == 0) {
   1008       log_verbose("No immediate difference with Win32!\n");
   1009     }
   1010     uprv_free(src.source);
   1011     uprv_free(src.reorderCodes);
   1012   }
   1013 }
   1014 
   1015 /*
   1016  * Takes two CEs (lead and continuation) and
   1017  * compares them as CEs should be compared:
   1018  * primary vs. primary, secondary vs. secondary
   1019  * tertiary vs. tertiary
   1020  */
   1021 static int32_t compareCEs(uint32_t s1, uint32_t s2,
   1022                    uint32_t t1, uint32_t t2) {
   1023   uint32_t s = 0, t = 0;
   1024   if(s1 == t1 && s2 == t2) {
   1025     return 0;
   1026   }
   1027   s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
   1028   t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
   1029   if(s < t) {
   1030     return -1;
   1031   } else if(s > t) {
   1032     return 1;
   1033   } else {
   1034     s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
   1035     t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
   1036     if(s < t) {
   1037       return -1;
   1038     } else if(s > t) {
   1039       return 1;
   1040     } else {
   1041       s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
   1042       t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
   1043       if(s < t) {
   1044         return -1;
   1045       } else {
   1046         return 1;
   1047       }
   1048     }
   1049   }
   1050 }
   1051 
   1052 typedef struct {
   1053   uint32_t startCE;
   1054   uint32_t startContCE;
   1055   uint32_t limitCE;
   1056   uint32_t limitContCE;
   1057 } indirectBoundaries;
   1058 
   1059 /* these values are used for finding CE values for indirect positioning. */
   1060 /* Indirect positioning is a mechanism for allowing resets on symbolic   */
   1061 /* values. It only works for resets and you cannot tailor indirect names */
   1062 /* An indirect name can define either an anchor point or a range. An     */
   1063 /* anchor point behaves in exactly the same way as a code point in reset */
   1064 /* would, except that it cannot be tailored. A range (we currently only  */
   1065 /* know for the [top] range will explicitly set the upper bound for      */
   1066 /* generated CEs, thus allowing for better control over how many CEs can */
   1067 /* be squeezed between in the range without performance penalty.         */
   1068 /* In that respect, we use [top] for tailoring of locales that use CJK   */
   1069 /* characters. Other indirect values are currently a pure convenience,   */
   1070 /* they can be used to assure that the CEs will be always positioned in  */
   1071 /* the same place relative to a point with known properties (e.g. first  */
   1072 /* primary ignorable). */
   1073 static indirectBoundaries ucolIndirectBoundaries[15];
   1074 static UBool indirectBoundariesSet = FALSE;
   1075 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
   1076     /* Set values for the top - TODO: once we have values for all the indirects, we are going */
   1077     /* to initalize here. */
   1078     ucolIndirectBoundaries[indexR].startCE = start[0];
   1079     ucolIndirectBoundaries[indexR].startContCE = start[1];
   1080     if(end) {
   1081         ucolIndirectBoundaries[indexR].limitCE = end[0];
   1082         ucolIndirectBoundaries[indexR].limitContCE = end[1];
   1083     } else {
   1084         ucolIndirectBoundaries[indexR].limitCE = 0;
   1085         ucolIndirectBoundaries[indexR].limitContCE = 0;
   1086     }
   1087 }
   1088 
   1089 static void testCEs(UCollator *coll, UErrorCode *status) {
   1090     const UChar *rules = NULL, *current = NULL;
   1091     int32_t ruleLen = 0;
   1092 
   1093     uint32_t strength = 0;
   1094     uint32_t maxStrength = UCOL_IDENTICAL;
   1095     uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
   1096     uint32_t lastCE;
   1097     uint32_t lastContCE;
   1098 
   1099     int32_t result = 0;
   1100     uint32_t chOffset = 0; uint32_t chLen = 0;
   1101     uint32_t exOffset = 0; uint32_t exLen = 0;
   1102     uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
   1103     uint32_t oldOffset = 0;
   1104 
   1105     /* uint32_t rExpsLen = 0; */
   1106     /* uint32_t firstLen = 0; */
   1107     uint16_t specs = 0;
   1108     UBool varT = FALSE; UBool top_ = TRUE;
   1109     UBool startOfRules = TRUE;
   1110     UBool before = FALSE;
   1111     UColTokenParser src;
   1112     UColOptionSet opts;
   1113     UParseError parseError;
   1114     UChar *rulesCopy = NULL;
   1115     collIterate *c = uprv_new_collIterate(status);
   1116     UCAConstants *consts = NULL;
   1117     uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
   1118         UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
   1119     const char *colLoc;
   1120     UCollator *UCA = ucol_open("root", status);
   1121 
   1122     (void)varT;             /* Suppress set but not used warnings. */
   1123     (void)prefixLen;
   1124     (void)prefixOffset;
   1125     (void)exLen;
   1126     (void)exOffset;
   1127 
   1128     if (U_FAILURE(*status)) {
   1129         log_err("Could not open root collator %s\n", u_errorName(*status));
   1130         uprv_delete_collIterate(c);
   1131         return;
   1132     }
   1133 
   1134     colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
   1135     if (U_FAILURE(*status)) {
   1136         log_err("Could not get collator name: %s\n", u_errorName(*status));
   1137         ucol_close(UCA);
   1138         uprv_delete_collIterate(c);
   1139         return;
   1140     }
   1141 
   1142     uprv_memset(&src, 0, sizeof(UColTokenParser));
   1143 
   1144     consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
   1145     UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
   1146     /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
   1147     UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
   1148     UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
   1149 
   1150     baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
   1151 
   1152     src.opts = &opts;
   1153 
   1154     rules = ucol_getRules(coll, &ruleLen);
   1155 
   1156     src.invUCA = ucol_initInverseUCA(status);
   1157 
   1158     if(indirectBoundariesSet == FALSE) {
   1159         /* UCOL_RESET_TOP_VALUE */
   1160         setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
   1161         /* UCOL_FIRST_PRIMARY_IGNORABLE */
   1162         setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
   1163         /* UCOL_LAST_PRIMARY_IGNORABLE */
   1164         setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
   1165         /* UCOL_FIRST_SECONDARY_IGNORABLE */
   1166         setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
   1167         /* UCOL_LAST_SECONDARY_IGNORABLE */
   1168         setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
   1169         /* UCOL_FIRST_TERTIARY_IGNORABLE */
   1170         setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
   1171         /* UCOL_LAST_TERTIARY_IGNORABLE */
   1172         setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
   1173         /* UCOL_FIRST_VARIABLE */
   1174         setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
   1175         /* UCOL_LAST_VARIABLE */
   1176         setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
   1177         /* UCOL_FIRST_NON_VARIABLE */
   1178         setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
   1179         /* UCOL_LAST_NON_VARIABLE */
   1180         setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
   1181         /* UCOL_FIRST_IMPLICIT */
   1182         setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
   1183         /* UCOL_LAST_IMPLICIT */
   1184         setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
   1185         /* UCOL_FIRST_TRAILING */
   1186         setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
   1187         /* UCOL_LAST_TRAILING */
   1188         setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
   1189         ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
   1190         indirectBoundariesSet = TRUE;
   1191     }
   1192 
   1193 
   1194     if(U_SUCCESS(*status) && ruleLen > 0) {
   1195         rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
   1196         uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
   1197         src.current = src.source = rulesCopy;
   1198         src.end = rulesCopy+ruleLen;
   1199         src.extraCurrent = src.end;
   1200         src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   1201 
   1202 	    /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
   1203 	       the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
   1204         while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
   1205             strength = src.parsedToken.strength;
   1206             chOffset = src.parsedToken.charsOffset;
   1207             chLen = src.parsedToken.charsLen;
   1208             exOffset = src.parsedToken.extensionOffset;
   1209             exLen = src.parsedToken.extensionLen;
   1210             prefixOffset = src.parsedToken.prefixOffset;
   1211             prefixLen = src.parsedToken.prefixLen;
   1212             specs = src.parsedToken.flags;
   1213 
   1214             startOfRules = FALSE;
   1215             varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
   1216             top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
   1217 
   1218             uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
   1219 
   1220             currCE = ucol_getNextCE(coll, c, status);
   1221             if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
   1222                 log_verbose("Thai prevowel detected. Will pick next CE\n");
   1223                 currCE = ucol_getNextCE(coll, c, status);
   1224             }
   1225 
   1226             currContCE = ucol_getNextCE(coll, c, status);
   1227             if(!isContinuation(currContCE)) {
   1228                 currContCE = 0;
   1229             }
   1230 
   1231             /* we need to repack CEs here */
   1232 
   1233             if(strength == UCOL_TOK_RESET) {
   1234                 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
   1235                 if(top_ == TRUE) {
   1236                     int32_t tokenIndex = src.parsedToken.indirectIndex;
   1237 
   1238                     nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
   1239                     nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
   1240                 } else {
   1241                     nextCE = baseCE = currCE;
   1242                     nextContCE = baseContCE = currContCE;
   1243                 }
   1244                 maxStrength = UCOL_IDENTICAL;
   1245             } else {
   1246                 if(strength < maxStrength) {
   1247                     maxStrength = strength;
   1248                     if(baseCE == UCOL_RESET_TOP_VALUE) {
   1249                         log_verbose("Resetting to [top]\n");
   1250                         nextCE = UCOL_NEXT_TOP_VALUE;
   1251                         nextContCE = UCOL_NEXT_TOP_CONT;
   1252                     } else {
   1253                         result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
   1254                     }
   1255                     if(result < 0) {
   1256                         if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
   1257                             log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
   1258                             return;
   1259                         } else {
   1260                             log_err("%s: couldn't find the CE\n", colLoc);
   1261                             return;
   1262                         }
   1263                     }
   1264                 }
   1265 
   1266                 currCE &= 0xFFFFFF3F;
   1267                 currContCE &= 0xFFFFFFBF;
   1268 
   1269                 if(maxStrength == UCOL_IDENTICAL) {
   1270                     if(baseCE != currCE || baseContCE != currContCE) {
   1271                         log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
   1272                     }
   1273                 } else {
   1274                     if(strength == UCOL_IDENTICAL) {
   1275                         if(lastCE != currCE || lastContCE != currContCE) {
   1276                             log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
   1277                         }
   1278                     } else {
   1279                         if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
   1280                             /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
   1281                             log_err("%s: current CE is not less than base CE\n", colLoc);
   1282                         }
   1283                         if(!before) {
   1284                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
   1285                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
   1286                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
   1287                             }
   1288                         } else {
   1289                             before = FALSE;
   1290                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
   1291                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
   1292                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
   1293                             }
   1294                         }
   1295                     }
   1296                 }
   1297 
   1298             }
   1299 
   1300             oldOffset = chOffset;
   1301             lastCE = currCE & 0xFFFFFF3F;
   1302             lastContCE = currContCE & 0xFFFFFFBF;
   1303         }
   1304         uprv_free(src.source);
   1305         uprv_free(src.reorderCodes);
   1306     }
   1307     ucol_close(UCA);
   1308     uprv_delete_collIterate(c);
   1309 }
   1310 
   1311 #if 0
   1312 /* these locales are now picked from index RB */
   1313 static const char* localesToTest[] = {
   1314 "ar", "bg", "ca", "cs", "da",
   1315 "el", "en_BE", "en_US_POSIX",
   1316 "es", "et", "fi", "fr", "hi",
   1317 "hr", "hu", "is", "iw", "ja",
   1318 "ko", "lt", "lv", "mk", "mt",
   1319 "nb", "nn", "nn_NO", "pl", "ro",
   1320 "ru", "sh", "sk", "sl", "sq",
   1321 "sr", "sv", "th", "tr", "uk",
   1322 "vi", "zh", "zh_TW"
   1323 };
   1324 #endif
   1325 
   1326 static const char* rulesToTest[] = {
   1327   /* Funky fa rule */
   1328   "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
   1329   /*"& Z < p, P",*/
   1330     /* Cui Mins rules */
   1331     "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
   1332     "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
   1333     "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
   1334     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
   1335     "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
   1336     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
   1337     "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U"  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
   1338 };
   1339 
   1340 
   1341 static void TestCollations(void) {
   1342     int32_t noOfLoc = uloc_countAvailable();
   1343     int32_t i = 0, j = 0;
   1344 
   1345     UErrorCode status = U_ZERO_ERROR;
   1346     char cName[256];
   1347     UChar name[256];
   1348     int32_t nameSize;
   1349 
   1350 
   1351     const char *locName = NULL;
   1352     UCollator *coll = NULL;
   1353     UCollator *UCA = ucol_open("", &status);
   1354     UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
   1355     if (U_FAILURE(status)) {
   1356         log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
   1357         return;
   1358     }
   1359     ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
   1360 
   1361     for(i = 0; i<noOfLoc; i++) {
   1362         status = U_ZERO_ERROR;
   1363         locName = uloc_getAvailable(i);
   1364         if(uprv_strcmp("ja", locName) == 0) {
   1365             log_verbose("Don't know how to test prefixes\n");
   1366             continue;
   1367         }
   1368         if(hasCollationElements(locName)) {
   1369             nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
   1370             for(j = 0; j<nameSize; j++) {
   1371                 cName[j] = (char)name[j];
   1372             }
   1373             cName[nameSize] = 0;
   1374             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
   1375             coll = ucol_open(locName, &status);
   1376             if(U_SUCCESS(status)) {
   1377                 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
   1378                 ucol_close(coll);
   1379             } else {
   1380                 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
   1381                 status = U_ZERO_ERROR;
   1382             }
   1383         }
   1384     }
   1385     ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
   1386     ucol_close(UCA);
   1387 }
   1388 
   1389 static void RamsRulesTest(void) {
   1390     UErrorCode status = U_ZERO_ERROR;
   1391     int32_t i = 0;
   1392     UCollator *coll = NULL;
   1393     UChar rule[2048];
   1394     uint32_t ruleLen;
   1395     int32_t noOfLoc = uloc_countAvailable();
   1396     const char *locName = NULL;
   1397 
   1398     log_verbose("RamsRulesTest\n");
   1399 
   1400     if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
   1401         /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
   1402         return;
   1403     }
   1404 
   1405     for(i = 0; i<noOfLoc; i++) {
   1406         locName = uloc_getAvailable(i);
   1407         if(hasCollationElements(locName)) {
   1408             if (uprv_strcmp("ja", locName)==0) {
   1409                 log_verbose("Don't know how to test Japanese because of prefixes\n");
   1410                 continue;
   1411             }
   1412             if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
   1413                 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
   1414                 continue;
   1415             }
   1416             if (uprv_strcmp("bn", locName)==0 ||
   1417                 uprv_strcmp("bs", locName)==0 ||            /* Add due to import per cldrbug 5647 */
   1418                 uprv_strcmp("bs_Cyrl", locName)==0 ||       /* Add due to import per cldrbug 5647 */
   1419                 uprv_strcmp("en_US_POSIX", locName)==0 ||
   1420                 uprv_strcmp("fa", locName)==0 ||            /* Add in #10222 with CLDR 24 integration */
   1421                 uprv_strcmp("fa_AF", locName)==0 ||         /* Add due to import per cldrbug 5647 */
   1422                 uprv_strcmp("gl", locName)==0 ||            /* Add due to import per cldrbug 5647 */
   1423                 uprv_strcmp("gl_ES", locName)==0 ||         /* Add due to import per cldrbug 5647 */
   1424                 uprv_strcmp("he", locName)==0 ||            /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
   1425                 uprv_strcmp("he_IL", locName)==0 ||         /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
   1426                 uprv_strcmp("km", locName)==0 ||
   1427                 uprv_strcmp("km_KH", locName)==0 ||
   1428                 uprv_strcmp("my", locName)==0 ||
   1429                 uprv_strcmp("ps", locName)==0 ||            /* Add in #10222 with CLDR 24 integration */
   1430                 uprv_strcmp("si", locName)==0 ||
   1431                 uprv_strcmp("si_LK", locName)==0 ||
   1432                 uprv_strcmp("sr_Latn", locName)==0 ||       /* Add due to import per cldrbug 5647 */
   1433                 uprv_strcmp("th", locName)==0 ||
   1434                 uprv_strcmp("th_TH", locName)==0 ||
   1435                 uprv_strcmp("zh", locName)==0 ||
   1436                 uprv_strcmp("zh_Hant", locName)==0
   1437             ) {
   1438               if(log_knownIssue("6040", NULL)) {
   1439                 log_verbose("Can't test %s - TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
   1440                 continue;
   1441               }
   1442             }
   1443             log_verbose("Testing locale %s\n", locName);
   1444             status = U_ZERO_ERROR;
   1445             coll = ucol_open(locName, &status);
   1446             if(U_SUCCESS(status)) {
   1447               if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
   1448                 if(coll->image->jamoSpecial == TRUE) {
   1449                   log_err("%s has special JAMOs\n", locName);
   1450                 }
   1451                 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
   1452                 testCollator(coll, &status);
   1453                 testCEs(coll, &status);
   1454               } else {
   1455                 log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
   1456               }
   1457               ucol_close(coll);
   1458             } else {
   1459               log_err("Could not open %s: %s\n", locName, u_errorName(status));
   1460             }
   1461         }
   1462     }
   1463 
   1464     for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
   1465         log_verbose("Testing rule: %s\n", rulesToTest[i]);
   1466         ruleLen = u_unescape(rulesToTest[i], rule, 2048);
   1467         status = U_ZERO_ERROR;
   1468         coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   1469         if(U_SUCCESS(status)) {
   1470             testCollator(coll, &status);
   1471             testCEs(coll, &status);
   1472             ucol_close(coll);
   1473         } else {
   1474           log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
   1475         }
   1476     }
   1477 
   1478 }
   1479 
   1480 static void IsTailoredTest(void) {
   1481     UErrorCode status = U_ZERO_ERROR;
   1482     uint32_t i = 0;
   1483     UCollator *coll = NULL;
   1484     UChar rule[2048];
   1485     UChar tailored[2048];
   1486     UChar notTailored[2048];
   1487     uint32_t ruleLen, tailoredLen, notTailoredLen;
   1488 
   1489     log_verbose("IsTailoredTest\n");
   1490 
   1491     u_uastrcpy(rule, "&Z < A, B, C;c < d");
   1492     ruleLen = u_strlen(rule);
   1493 
   1494     u_uastrcpy(tailored, "ABCcd");
   1495     tailoredLen = u_strlen(tailored);
   1496 
   1497     u_uastrcpy(notTailored, "ZabD");
   1498     notTailoredLen = u_strlen(notTailored);
   1499 
   1500     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   1501     if(U_SUCCESS(status)) {
   1502         for(i = 0; i<tailoredLen; i++) {
   1503             if(!ucol_isTailored(coll, tailored[i], &status)) {
   1504                 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
   1505             }
   1506         }
   1507         for(i = 0; i<notTailoredLen; i++) {
   1508             if(ucol_isTailored(coll, notTailored[i], &status)) {
   1509                 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
   1510             }
   1511         }
   1512         ucol_close(coll);
   1513     }
   1514     else {
   1515         log_err_status(status, "Can't tailor rules\n");
   1516     }
   1517     /* Code coverage */
   1518     status = U_ZERO_ERROR;
   1519     coll = ucol_open("ja", &status);
   1520     if(!ucol_isTailored(coll, 0x4E9C, &status)) {
   1521         log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
   1522     }
   1523     ucol_close(coll);
   1524 }
   1525 
   1526 
   1527 const static char chTest[][20] = {
   1528   "c",
   1529   "C",
   1530   "ca", "cb", "cx", "cy", "CZ",
   1531   "c\\u030C", "C\\u030C",
   1532   "h",
   1533   "H",
   1534   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
   1535   "ch", "cH", "Ch", "CH",
   1536   "cha", "charly", "che", "chh", "chch", "chr",
   1537   "i", "I", "iarly",
   1538   "r", "R",
   1539   "r\\u030C", "R\\u030C",
   1540   "s",
   1541   "S",
   1542   "s\\u030C", "S\\u030C",
   1543   "z", "Z",
   1544   "z\\u030C", "Z\\u030C"
   1545 };
   1546 
   1547 static void TestChMove(void) {
   1548     UChar t1[256] = {0};
   1549     UChar t2[256] = {0};
   1550 
   1551     uint32_t i = 0, j = 0;
   1552     uint32_t size = 0;
   1553     UErrorCode status = U_ZERO_ERROR;
   1554 
   1555     UCollator *coll = ucol_open("cs", &status);
   1556 
   1557     if(U_SUCCESS(status)) {
   1558         size = sizeof(chTest)/sizeof(chTest[0]);
   1559         for(i = 0; i < size-1; i++) {
   1560             for(j = i+1; j < size; j++) {
   1561                 u_unescape(chTest[i], t1, 256);
   1562                 u_unescape(chTest[j], t2, 256);
   1563                 doTest(coll, t1, t2, UCOL_LESS);
   1564             }
   1565         }
   1566     }
   1567     else {
   1568         log_data_err("Can't open collator");
   1569     }
   1570     ucol_close(coll);
   1571 }
   1572 
   1573 
   1574 
   1575 
   1576 const static char impTest[][20] = {
   1577   "\\u4e00",
   1578     "a",
   1579     "A",
   1580     "b",
   1581     "B",
   1582     "\\u4e01"
   1583 };
   1584 
   1585 
   1586 static void TestImplicitTailoring(void) {
   1587   static const struct {
   1588     const char *rules;
   1589     const char *data[10];
   1590     const uint32_t len;
   1591   } tests[] = {
   1592       { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
   1593       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
   1594       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
   1595       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
   1596   };
   1597 
   1598   int32_t i = 0;
   1599 
   1600   for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   1601       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   1602   }
   1603 
   1604 /*
   1605   UChar t1[256] = {0};
   1606   UChar t2[256] = {0};
   1607 
   1608   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
   1609 
   1610   uint32_t i = 0, j = 0;
   1611   uint32_t size = 0;
   1612   uint32_t ruleLen = 0;
   1613   UErrorCode status = U_ZERO_ERROR;
   1614   UCollator *coll = NULL;
   1615   ruleLen = u_unescape(rule, t1, 256);
   1616 
   1617   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
   1618 
   1619   if(U_SUCCESS(status)) {
   1620     size = sizeof(impTest)/sizeof(impTest[0]);
   1621     for(i = 0; i < size-1; i++) {
   1622       for(j = i+1; j < size; j++) {
   1623         u_unescape(impTest[i], t1, 256);
   1624         u_unescape(impTest[j], t2, 256);
   1625         doTest(coll, t1, t2, UCOL_LESS);
   1626       }
   1627     }
   1628   }
   1629   else {
   1630     log_err("Can't open collator");
   1631   }
   1632   ucol_close(coll);
   1633   */
   1634 }
   1635 
   1636 static void TestFCDProblem(void) {
   1637   UChar t1[256] = {0};
   1638   UChar t2[256] = {0};
   1639 
   1640   const char *s1 = "\\u0430\\u0306\\u0325";
   1641   const char *s2 = "\\u04D1\\u0325";
   1642 
   1643   UErrorCode status = U_ZERO_ERROR;
   1644   UCollator *coll = ucol_open("", &status);
   1645   u_unescape(s1, t1, 256);
   1646   u_unescape(s2, t2, 256);
   1647 
   1648   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
   1649   doTest(coll, t1, t2, UCOL_EQUAL);
   1650 
   1651   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   1652   doTest(coll, t1, t2, UCOL_EQUAL);
   1653 
   1654   ucol_close(coll);
   1655 }
   1656 
   1657 /*
   1658 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
   1659 We're only using NFC/NFD in this test.
   1660 */
   1661 #define NORM_BUFFER_TEST_LEN 18
   1662 typedef struct {
   1663   UChar32 u;
   1664   UChar NFC[NORM_BUFFER_TEST_LEN];
   1665   UChar NFD[NORM_BUFFER_TEST_LEN];
   1666 } tester;
   1667 
   1668 static void TestComposeDecompose(void) {
   1669     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
   1670     static const UChar UNICODESET_STR[] = {
   1671         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
   1672         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
   1673         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
   1674     };
   1675     int32_t noOfLoc;
   1676     int32_t i = 0, j = 0;
   1677 
   1678     UErrorCode status = U_ZERO_ERROR;
   1679     const char *locName = NULL;
   1680     uint32_t nfcSize;
   1681     uint32_t nfdSize;
   1682     tester **t;
   1683     uint32_t noCases = 0;
   1684     UCollator *coll = NULL;
   1685     UChar32 u = 0;
   1686     UChar comp[NORM_BUFFER_TEST_LEN];
   1687     uint32_t len = 0;
   1688     UCollationElements *iter;
   1689     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
   1690     int32_t charsToTestSize;
   1691 
   1692     noOfLoc = uloc_countAvailable();
   1693 
   1694     coll = ucol_open("", &status);
   1695     if (U_FAILURE(status)) {
   1696         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
   1697         return;
   1698     }
   1699     charsToTestSize = uset_size(charsToTest);
   1700     if (charsToTestSize <= 0) {
   1701         log_err("Set was zero. Missing data?\n");
   1702         return;
   1703     }
   1704     t = (tester **)malloc(charsToTestSize * sizeof(tester *));
   1705     t[0] = (tester *)malloc(sizeof(tester));
   1706     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
   1707 
   1708     for(u = 0; u < charsToTestSize; u++) {
   1709         UChar32 ch = uset_charAt(charsToTest, u);
   1710         len = 0;
   1711         U16_APPEND_UNSAFE(comp, len, ch);
   1712         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
   1713         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
   1714 
   1715         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
   1716           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
   1717             t[noCases]->u = ch;
   1718             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
   1719                 u_strncpy(t[noCases]->NFC, comp, len);
   1720                 t[noCases]->NFC[len] = 0;
   1721             }
   1722             noCases++;
   1723             t[noCases] = (tester *)malloc(sizeof(tester));
   1724             uprv_memset(t[noCases], 0, sizeof(tester));
   1725         }
   1726     }
   1727     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
   1728     uset_close(charsToTest);
   1729     charsToTest = NULL;
   1730 
   1731     for(u=0; u<(UChar32)noCases; u++) {
   1732         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
   1733             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
   1734             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
   1735         }
   1736     }
   1737     /*
   1738     for(u = 0; u < charsToTestSize; u++) {
   1739       if(!(u&0xFFFF)) {
   1740         log_verbose("%08X ", u);
   1741       }
   1742       uprv_memset(t[noCases], 0, sizeof(tester));
   1743       t[noCases]->u = u;
   1744       len = 0;
   1745       U16_APPEND_UNSAFE(comp, len, u);
   1746       comp[len] = 0;
   1747       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
   1748       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
   1749       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
   1750       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
   1751     }
   1752     */
   1753 
   1754     ucol_close(coll);
   1755 
   1756     log_verbose("Testing locales, number of cases = %i\n", noCases);
   1757     for(i = 0; i<noOfLoc; i++) {
   1758         status = U_ZERO_ERROR;
   1759         locName = uloc_getAvailable(i);
   1760         if(hasCollationElements(locName)) {
   1761             char cName[256];
   1762             UChar name[256];
   1763             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
   1764 
   1765             for(j = 0; j<nameSize; j++) {
   1766                 cName[j] = (char)name[j];
   1767             }
   1768             cName[nameSize] = 0;
   1769             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
   1770 
   1771             coll = ucol_open(locName, &status);
   1772             ucol_setStrength(coll, UCOL_IDENTICAL);
   1773             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
   1774 
   1775             for(u=0; u<(UChar32)noCases; u++) {
   1776                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
   1777                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
   1778                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
   1779                     log_verbose("Testing NFC\n");
   1780                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
   1781                     backAndForth(iter);
   1782                     log_verbose("Testing NFD\n");
   1783                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
   1784                     backAndForth(iter);
   1785                 }
   1786             }
   1787             ucol_closeElements(iter);
   1788             ucol_close(coll);
   1789         }
   1790     }
   1791     for(u = 0; u <= (UChar32)noCases; u++) {
   1792         free(t[u]);
   1793     }
   1794     free(t);
   1795 }
   1796 
   1797 static void TestEmptyRule(void) {
   1798   UErrorCode status = U_ZERO_ERROR;
   1799   UChar rulez[] = { 0 };
   1800   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
   1801 
   1802   ucol_close(coll);
   1803 }
   1804 
   1805 static void TestUCARules(void) {
   1806   UErrorCode status = U_ZERO_ERROR;
   1807   UChar b[256];
   1808   UChar *rules = b;
   1809   uint32_t ruleLen = 0;
   1810   UCollator *UCAfromRules = NULL;
   1811   UCollator *coll = ucol_open("", &status);
   1812   if(status == U_FILE_ACCESS_ERROR) {
   1813     log_data_err("Is your data around?\n");
   1814     return;
   1815   } else if(U_FAILURE(status)) {
   1816     log_err("Error opening collator\n");
   1817     return;
   1818   }
   1819   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
   1820 
   1821   log_verbose("TestUCARules\n");
   1822   if(ruleLen > 256) {
   1823     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
   1824     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
   1825   }
   1826   log_verbose("Rules length is %d\n", ruleLen);
   1827   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   1828   if(U_SUCCESS(status)) {
   1829     ucol_close(UCAfromRules);
   1830   } else {
   1831     log_verbose("Unable to create a collator from UCARules!\n");
   1832   }
   1833 /*
   1834   u_unescape(blah, b, 256);
   1835   ucol_getSortKey(coll, b, 1, res, 256);
   1836 */
   1837   ucol_close(coll);
   1838   if(rules != b) {
   1839     free(rules);
   1840   }
   1841 }
   1842 
   1843 
   1844 /* Pinyin tonal order */
   1845 /*
   1846     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
   1847           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
   1848     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
   1849     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
   1850     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
   1851     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
   1852       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
   1853 .. (\u00fc)
   1854 
   1855 However, in testing we got the following order:
   1856     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
   1857           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
   1858     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
   1859 .. (\u0113)
   1860     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
   1861     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
   1862     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
   1863 .. (\u01d8)
   1864       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
   1865 */
   1866 
   1867 static void TestBefore(void) {
   1868   const static char *data[] = {
   1869       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
   1870       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
   1871       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
   1872       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
   1873       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
   1874       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
   1875   };
   1876   genericRulesStarter(
   1877     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
   1878     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
   1879     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
   1880     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
   1881     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
   1882     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
   1883     data, sizeof(data)/sizeof(data[0]));
   1884 }
   1885 
   1886 #if 0
   1887 /* superceded by TestBeforePinyin */
   1888 static void TestJ784(void) {
   1889   const static char *data[] = {
   1890       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
   1891       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
   1892       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
   1893       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
   1894       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
   1895       "\\u00fc",
   1896            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
   1897   };
   1898   genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
   1899 }
   1900 #endif
   1901 
   1902 #if 0
   1903 /* superceded by the changes to the lv locale */
   1904 static void TestJ831(void) {
   1905   const static char *data[] = {
   1906     "I",
   1907       "i",
   1908       "Y",
   1909       "y"
   1910   };
   1911   genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
   1912 }
   1913 #endif
   1914 
   1915 static void TestJ815(void) {
   1916   const static char *data[] = {
   1917     "aa",
   1918       "Aa",
   1919       "ab",
   1920       "Ab",
   1921       "ad",
   1922       "Ad",
   1923       "ae",
   1924       "Ae",
   1925       "\\u00e6",
   1926       "\\u00c6",
   1927       "af",
   1928       "Af",
   1929       "b",
   1930       "B"
   1931   };
   1932   genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
   1933   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
   1934 }
   1935 
   1936 
   1937 /*
   1938 "& a < b < c < d& r < c",                                   "& a < b < d& r < c",
   1939 "& a < b < c < d& c < m",                                   "& a < b < c < m < d",
   1940 "& a < b < c < d& a < m",                                   "& a < m < b < c < d",
   1941 "& a <<< b << c < d& a < m",                                "& a <<< b << c < m < d",
   1942 "& a < b < c < d& [before 1] c < m",                        "& a < b < m < c < d",
   1943 "& a < b <<< c << d <<< e& [before 3] e <<< x",            "& a < b <<< c << d <<< x <<< e",
   1944 "& a < b <<< c << d <<< e& [before 2] e <<< x",            "& a < b <<< c <<< x << d <<< e",
   1945 "& a < b <<< c << d <<< e& [before 1] e <<< x",            "& a <<< x < b <<< c << d <<< e",
   1946 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",    "& a < b <<< c << d <<< e <<< f < x < g",
   1947 */
   1948 static void TestRedundantRules(void) {
   1949   int32_t i;
   1950 
   1951   static const struct {
   1952       const char *rules;
   1953       const char *expectedRules;
   1954       const char *testdata[8];
   1955       uint32_t testdatalen;
   1956   } tests[] = {
   1957     /* this test conflicts with positioning of CODAN placeholder */
   1958        /*{
   1959         "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
   1960         "&\\u2089<<<x",
   1961         {"\\u2089", "x"}, 2
   1962        }, */
   1963     /* this test conflicts with the [before x] syntax tightening */
   1964       /*{
   1965         "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
   1966         "&\\u0252<<<x",
   1967         {"\\u0252", "x"}, 2
   1968       }, */
   1969     /* this test conflicts with the [before x] syntax tightening */
   1970       /*{
   1971          "& a < b <<< c << d <<< e& [before 1] e <<< x",
   1972          "& a <<< x < b <<< c << d <<< e",
   1973         {"a", "x", "b", "c", "d", "e"}, 6
   1974       }, */
   1975       {
   1976         "& a < b < c < d& [before 1] c < m",
   1977         "& a < b < m < c < d",
   1978         {"a", "b", "m", "c", "d"}, 5
   1979       },
   1980       {
   1981         "& a < b <<< c << d <<< e& [before 3] e <<< x",
   1982         "& a < b <<< c << d <<< x <<< e",
   1983         {"a", "b", "c", "d", "x", "e"}, 6
   1984       },
   1985     /* this test conflicts with the [before x] syntax tightening */
   1986       /* {
   1987         "& a < b <<< c << d <<< e& [before 2] e <<< x",
   1988         "& a < b <<< c <<< x << d <<< e",
   1989         {"a", "b", "c", "x", "d", "e"},, 6
   1990       }, */
   1991       {
   1992         "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
   1993         "& a < b <<< c << d <<< e <<< f < x < g",
   1994         {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
   1995       },
   1996       {
   1997         "& a <<< b << c < d& a < m",
   1998         "& a <<< b << c < m < d",
   1999         {"a", "b", "c", "m", "d"}, 5
   2000       },
   2001       {
   2002         "&a<b<<b\\u0301 &z<b",
   2003         "&a<b\\u0301 &z<b",
   2004         {"a", "b\\u0301", "z", "b"}, 4
   2005       },
   2006       {
   2007         "&z<m<<<q<<<m",
   2008         "&z<q<<<m",
   2009         {"z", "q", "m"},3
   2010       },
   2011       {
   2012         "&z<<<m<q<<<m",
   2013         "&z<q<<<m",
   2014         {"z", "q", "m"}, 3
   2015       },
   2016       {
   2017         "& a < b < c < d& r < c",
   2018         "& a < b < d& r < c",
   2019         {"a", "b", "d"}, 3
   2020       },
   2021       {
   2022         "& a < b < c < d& r < c",
   2023         "& a < b < d& r < c",
   2024         {"r", "c"}, 2
   2025       },
   2026       {
   2027         "& a < b < c < d& c < m",
   2028         "& a < b < c < m < d",
   2029         {"a", "b", "c", "m", "d"}, 5
   2030       },
   2031       {
   2032         "& a < b < c < d& a < m",
   2033         "& a < m < b < c < d",
   2034         {"a", "m", "b", "c", "d"}, 5
   2035       }
   2036   };
   2037 
   2038 
   2039   UCollator *credundant = NULL;
   2040   UCollator *cresulting = NULL;
   2041   UErrorCode status = U_ZERO_ERROR;
   2042   UChar rlz[2048] = { 0 };
   2043   uint32_t rlen = 0;
   2044 
   2045   for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
   2046     log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
   2047     rlen = u_unescape(tests[i].rules, rlz, 2048);
   2048 
   2049     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
   2050     if(status == U_FILE_ACCESS_ERROR) {
   2051       log_data_err("Is your data around?\n");
   2052       return;
   2053     } else if(U_FAILURE(status)) {
   2054       log_err("Error opening collator\n");
   2055       return;
   2056     }
   2057 
   2058     rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
   2059     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
   2060 
   2061     testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
   2062 
   2063     ucol_close(credundant);
   2064     ucol_close(cresulting);
   2065 
   2066     log_verbose("testing using data\n");
   2067 
   2068     genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
   2069   }
   2070 
   2071 }
   2072 
   2073 static void TestExpansionSyntax(void) {
   2074   int32_t i;
   2075 
   2076   const static char *rules[] = {
   2077     "&AE <<< a << b <<< c &d <<< f",
   2078     "&AE <<< a <<< b << c << d < e < f <<< g",
   2079     "&AE <<< B <<< C / D <<< F"
   2080   };
   2081 
   2082   const static char *expectedRules[] = {
   2083     "&A <<< a / E << b / E <<< c /E  &d <<< f",
   2084     "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
   2085     "&A <<< B / E <<< C / ED <<< F / E"
   2086   };
   2087 
   2088   const static char *testdata[][8] = {
   2089     {"AE", "a", "b", "c"},
   2090     {"AE", "a", "b", "c", "d", "e", "f", "g"},
   2091     {"AE", "B", "C"} /* / ED <<< F / E"},*/
   2092   };
   2093 
   2094   const static uint32_t testdatalen[] = {
   2095       4,
   2096       8,
   2097       3
   2098   };
   2099 
   2100 
   2101 
   2102   UCollator *credundant = NULL;
   2103   UCollator *cresulting = NULL;
   2104   UErrorCode status = U_ZERO_ERROR;
   2105   UChar rlz[2048] = { 0 };
   2106   uint32_t rlen = 0;
   2107 
   2108   for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
   2109     log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
   2110     rlen = u_unescape(rules[i], rlz, 2048);
   2111 
   2112     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2113     if(status == U_FILE_ACCESS_ERROR) {
   2114       log_data_err("Is your data around?\n");
   2115       return;
   2116     } else if(U_FAILURE(status)) {
   2117       log_err("Error opening collator\n");
   2118       return;
   2119     }
   2120     rlen = u_unescape(expectedRules[i], rlz, 2048);
   2121     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
   2122 
   2123     /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
   2124     /* as a hard error test, but only in information mode */
   2125     testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
   2126 
   2127     ucol_close(credundant);
   2128     ucol_close(cresulting);
   2129 
   2130     log_verbose("testing using data\n");
   2131 
   2132     genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
   2133   }
   2134 }
   2135 
   2136 static void TestCase(void)
   2137 {
   2138     const static UChar gRules[MAX_TOKEN_LEN] =
   2139     /*" & 0 < 1,\u2461<a,A"*/
   2140     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
   2141 
   2142     const static UChar testCase[][MAX_TOKEN_LEN] =
   2143     {
   2144         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
   2145         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
   2146         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
   2147         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
   2148     };
   2149 
   2150     const static UCollationResult caseTestResults[][9] =
   2151     {
   2152         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
   2153         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
   2154         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
   2155         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
   2156     };
   2157 
   2158     const static UColAttributeValue caseTestAttributes[][2] =
   2159     {
   2160         { UCOL_LOWER_FIRST, UCOL_OFF},
   2161         { UCOL_UPPER_FIRST, UCOL_OFF},
   2162         { UCOL_LOWER_FIRST, UCOL_ON},
   2163         { UCOL_UPPER_FIRST, UCOL_ON}
   2164     };
   2165     int32_t i,j,k;
   2166     UErrorCode status = U_ZERO_ERROR;
   2167     UCollationElements *iter;
   2168     UCollator  *myCollation;
   2169     myCollation = ucol_open("en_US", &status);
   2170 
   2171     if(U_FAILURE(status)){
   2172         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   2173         return;
   2174     }
   2175     log_verbose("Testing different case settings\n");
   2176     ucol_setStrength(myCollation, UCOL_TERTIARY);
   2177 
   2178     for(k = 0; k<4; k++) {
   2179       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
   2180       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
   2181       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
   2182       for (i = 0; i < 3 ; i++) {
   2183         for(j = i+1; j<4; j++) {
   2184           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
   2185         }
   2186       }
   2187     }
   2188     ucol_close(myCollation);
   2189 
   2190     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
   2191     if(U_FAILURE(status)){
   2192         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   2193         return;
   2194     }
   2195     log_verbose("Testing different case settings with custom rules\n");
   2196     ucol_setStrength(myCollation, UCOL_TERTIARY);
   2197 
   2198     for(k = 0; k<4; k++) {
   2199       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
   2200       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
   2201       for (i = 0; i < 3 ; i++) {
   2202         for(j = i+1; j<4; j++) {
   2203           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
   2204           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
   2205           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
   2206           backAndForth(iter);
   2207           ucol_closeElements(iter);
   2208           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
   2209           backAndForth(iter);
   2210           ucol_closeElements(iter);
   2211         }
   2212       }
   2213     }
   2214     ucol_close(myCollation);
   2215     {
   2216       const static char *lowerFirst[] = {
   2217         "h",
   2218         "H",
   2219         "ch",
   2220         "Ch",
   2221         "CH",
   2222         "cha",
   2223         "chA",
   2224         "Cha",
   2225         "ChA",
   2226         "CHa",
   2227         "CHA",
   2228         "i",
   2229         "I"
   2230       };
   2231 
   2232       const static char *upperFirst[] = {
   2233         "H",
   2234         "h",
   2235         "CH",
   2236         "Ch",
   2237         "ch",
   2238         "CHA",
   2239         "CHa",
   2240         "ChA",
   2241         "Cha",
   2242         "chA",
   2243         "cha",
   2244         "I",
   2245         "i"
   2246       };
   2247       log_verbose("mixed case test\n");
   2248       log_verbose("lower first, case level off\n");
   2249       genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
   2250       log_verbose("upper first, case level off\n");
   2251       genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
   2252       log_verbose("lower first, case level on\n");
   2253       genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
   2254       log_verbose("upper first, case level on\n");
   2255       genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
   2256     }
   2257 
   2258 }
   2259 
   2260 static void TestIncrementalNormalize(void) {
   2261 
   2262     /*UChar baseA     =0x61;*/
   2263     UChar baseA     =0x41;
   2264 /*    UChar baseB     = 0x42;*/
   2265     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
   2266     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
   2267     /*
   2268         0x316 is combining grave accent below, cc=220
   2269         0x321 is combining palatalized hook below, cc=202
   2270         0x300 is combining grave accent, cc=230
   2271     */
   2272 
   2273 #define MAXSLEN 2000
   2274     /*int          maxSLen   = 64000;*/
   2275     int          sLen;
   2276     int          i;
   2277 
   2278     UCollator        *coll;
   2279     UErrorCode       status = U_ZERO_ERROR;
   2280     UCollationResult result;
   2281 
   2282     int32_t myQ = getTestOption(QUICK_OPTION);
   2283 
   2284     if(getTestOption(QUICK_OPTION) < 0) {
   2285         setTestOption(QUICK_OPTION, 1);
   2286     }
   2287 
   2288     {
   2289         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
   2290         /*          most buffers along the way.*/
   2291         UChar            strA[MAXSLEN+1];
   2292         UChar            strB[MAXSLEN+1];
   2293 
   2294         coll = ucol_open("en_US", &status);
   2295         if(status == U_FILE_ACCESS_ERROR) {
   2296           log_data_err("Is your data around?\n");
   2297           return;
   2298         } else if(U_FAILURE(status)) {
   2299           log_err("Error opening collator\n");
   2300           return;
   2301         }
   2302         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   2303 
   2304         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
   2305         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
   2306         /*for (sLen = 1000; sLen<1001; sLen++) {*/
   2307         for (sLen = 500; sLen<501; sLen++) {
   2308         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
   2309             strA[0] = baseA;
   2310             strB[0] = baseA;
   2311             for (i=1; i<=sLen-1; i++) {
   2312                 strA[i] = ccMix[i % 3];
   2313                 strB[sLen-i] = ccMix[i % 3];
   2314             }
   2315             strA[sLen]   = 0;
   2316             strB[sLen]   = 0;
   2317 
   2318             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
   2319             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
   2320             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
   2321             doTest(coll, strA, strB, UCOL_EQUAL);
   2322         }
   2323     }
   2324 
   2325     setTestOption(QUICK_OPTION, myQ);
   2326 
   2327 
   2328     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
   2329     /*         of the string.  Checks a couple of edge cases.*/
   2330 
   2331     {
   2332         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
   2333         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
   2334         ucol_setStrength(coll, UCOL_TERTIARY);
   2335         doTest(coll, strA, strB, UCOL_EQUAL);
   2336     }
   2337 
   2338     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
   2339 
   2340     {
   2341       /* New UCA  3.1.1.
   2342        * test below used a code point from Desseret, which sorts differently
   2343        * than d800 dc00
   2344        */
   2345         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
   2346         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
   2347         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
   2348         ucol_setStrength(coll, UCOL_TERTIARY);
   2349         doTest(coll, strA, strB, UCOL_GREATER);
   2350     }
   2351 
   2352     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
   2353 
   2354     {
   2355         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
   2356         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
   2357         char  sortKeyA[50];
   2358         char  sortKeyAz[50];
   2359         char  sortKeyB[50];
   2360         char  sortKeyBz[50];
   2361         int   r;
   2362 
   2363         /* there used to be -3 here. Hmmmm.... */
   2364         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
   2365         result = ucol_strcoll(coll, strA, 3, strB, 3);
   2366         if (result != UCOL_GREATER) {
   2367             log_err("ERROR 1 in test 4\n");
   2368         }
   2369         result = ucol_strcoll(coll, strA, -1, strB, -1);
   2370         if (result != UCOL_EQUAL) {
   2371             log_err("ERROR 2 in test 4\n");
   2372         }
   2373 
   2374         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2375         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2376         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2377         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2378 
   2379         r = strcmp(sortKeyA, sortKeyAz);
   2380         if (r <= 0) {
   2381             log_err("Error 3 in test 4\n");
   2382         }
   2383         r = strcmp(sortKeyA, sortKeyB);
   2384         if (r <= 0) {
   2385             log_err("Error 4 in test 4\n");
   2386         }
   2387         r = strcmp(sortKeyAz, sortKeyBz);
   2388         if (r != 0) {
   2389             log_err("Error 5 in test 4\n");
   2390         }
   2391 
   2392         ucol_setStrength(coll, UCOL_IDENTICAL);
   2393         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2394         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2395         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2396         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2397 
   2398         r = strcmp(sortKeyA, sortKeyAz);
   2399         if (r <= 0) {
   2400             log_err("Error 6 in test 4\n");
   2401         }
   2402         r = strcmp(sortKeyA, sortKeyB);
   2403         if (r <= 0) {
   2404             log_err("Error 7 in test 4\n");
   2405         }
   2406         r = strcmp(sortKeyAz, sortKeyBz);
   2407         if (r != 0) {
   2408             log_err("Error 8 in test 4\n");
   2409         }
   2410         ucol_setStrength(coll, UCOL_TERTIARY);
   2411     }
   2412 
   2413 
   2414     /*  Test 5:  Null characters in non-normal source strings.*/
   2415 
   2416     {
   2417         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
   2418         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
   2419         char  sortKeyA[50];
   2420         char  sortKeyAz[50];
   2421         char  sortKeyB[50];
   2422         char  sortKeyBz[50];
   2423         int   r;
   2424 
   2425         result = ucol_strcoll(coll, strA, 6, strB, 6);
   2426         if (result != UCOL_GREATER) {
   2427             log_err("ERROR 1 in test 5\n");
   2428         }
   2429         result = ucol_strcoll(coll, strA, -1, strB, -1);
   2430         if (result != UCOL_EQUAL) {
   2431             log_err("ERROR 2 in test 5\n");
   2432         }
   2433 
   2434         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2435         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2436         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2437         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2438 
   2439         r = strcmp(sortKeyA, sortKeyAz);
   2440         if (r <= 0) {
   2441             log_err("Error 3 in test 5\n");
   2442         }
   2443         r = strcmp(sortKeyA, sortKeyB);
   2444         if (r <= 0) {
   2445             log_err("Error 4 in test 5\n");
   2446         }
   2447         r = strcmp(sortKeyAz, sortKeyBz);
   2448         if (r != 0) {
   2449             log_err("Error 5 in test 5\n");
   2450         }
   2451 
   2452         ucol_setStrength(coll, UCOL_IDENTICAL);
   2453         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2454         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2455         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2456         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2457 
   2458         r = strcmp(sortKeyA, sortKeyAz);
   2459         if (r <= 0) {
   2460             log_err("Error 6 in test 5\n");
   2461         }
   2462         r = strcmp(sortKeyA, sortKeyB);
   2463         if (r <= 0) {
   2464             log_err("Error 7 in test 5\n");
   2465         }
   2466         r = strcmp(sortKeyAz, sortKeyBz);
   2467         if (r != 0) {
   2468             log_err("Error 8 in test 5\n");
   2469         }
   2470         ucol_setStrength(coll, UCOL_TERTIARY);
   2471     }
   2472 
   2473 
   2474     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
   2475 
   2476     {
   2477         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
   2478         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
   2479 
   2480         result = ucol_strcoll(coll, strA, 5, strB, 5);
   2481         if (result != UCOL_LESS) {
   2482             log_err("Error 1 in test 6\n");
   2483         }
   2484         result = ucol_strcoll(coll, strA, -1, strB, -1);
   2485         if (result != UCOL_EQUAL) {
   2486             log_err("Error 2 in test 6\n");
   2487         }
   2488     }
   2489 
   2490     ucol_close(coll);
   2491 }
   2492 
   2493 
   2494 
   2495 #if 0
   2496 static void TestGetCaseBit(void) {
   2497   static const char *caseBitData[] = {
   2498     "a", "A", "ch", "Ch", "CH",
   2499       "\\uFF9E", "\\u0009"
   2500   };
   2501 
   2502   static const uint8_t results[] = {
   2503     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
   2504       UCOL_UPPER_CASE, UCOL_LOWER_CASE
   2505   };
   2506 
   2507   uint32_t i, blen = 0;
   2508   UChar b[256] = {0};
   2509   UErrorCode status = U_ZERO_ERROR;
   2510   UCollator *UCA = ucol_open("", &status);
   2511   uint8_t res = 0;
   2512 
   2513   for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
   2514     blen = u_unescape(caseBitData[i], b, 256);
   2515     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
   2516     if(results[i] != res) {
   2517       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
   2518     }
   2519   }
   2520 }
   2521 #endif
   2522 
   2523 static void TestHangulTailoring(void) {
   2524     static const char *koreanData[] = {
   2525         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
   2526             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
   2527             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
   2528             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
   2529             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
   2530             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
   2531     };
   2532 
   2533     const char *rules =
   2534         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
   2535         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
   2536         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
   2537         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
   2538         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
   2539         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
   2540 
   2541 
   2542   UErrorCode status = U_ZERO_ERROR;
   2543   UChar rlz[2048] = { 0 };
   2544   uint32_t rlen = u_unescape(rules, rlz, 2048);
   2545 
   2546   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   2547   if(status == U_FILE_ACCESS_ERROR) {
   2548     log_data_err("Is your data around?\n");
   2549     return;
   2550   } else if(U_FAILURE(status)) {
   2551     log_err("Error opening collator\n");
   2552     return;
   2553   }
   2554 
   2555   log_verbose("Using start of korean rules\n");
   2556 
   2557   if(U_SUCCESS(status)) {
   2558     genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   2559   } else {
   2560     log_err("Unable to open collator with rules %s\n", rules);
   2561   }
   2562 
   2563   log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
   2564   ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home  */
   2565   genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   2566 
   2567   ucol_close(coll);
   2568 
   2569   log_verbose("Using ko__LOTUS locale\n");
   2570   genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   2571 }
   2572 
   2573 static void TestCompressOverlap(void) {
   2574     UChar       secstr[150];
   2575     UChar       tertstr[150];
   2576     UErrorCode  status = U_ZERO_ERROR;
   2577     UCollator  *coll;
   2578     char        result[200];
   2579     uint32_t    resultlen;
   2580     int         count = 0;
   2581     char       *tempptr;
   2582 
   2583     coll = ucol_open("", &status);
   2584 
   2585     if (U_FAILURE(status)) {
   2586         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
   2587         return;
   2588     }
   2589     while (count < 149) {
   2590         secstr[count] = 0x0020; /* [06, 05, 05] */
   2591         tertstr[count] = 0x0020;
   2592         count ++;
   2593     }
   2594 
   2595     /* top down compression ----------------------------------- */
   2596     secstr[count] = 0x0332; /* [, 87, 05] */
   2597     tertstr[count] = 0x3000; /* [06, 05, 07] */
   2598 
   2599     /* no compression secstr should have 150 secondary bytes, tertstr should
   2600     have 150 tertiary bytes.
   2601     with correct overlapping compression, secstr should have 4 secondary
   2602     bytes, tertstr should have > 2 tertiary bytes */
   2603     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
   2604     (void)resultlen;    /* Suppress set but not used warning. */
   2605     tempptr = uprv_strchr(result, 1) + 1;
   2606     while (*(tempptr + 1) != 1) {
   2607         /* the last secondary collation element is not checked since it is not
   2608         part of the compression */
   2609         if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
   2610             log_err("Secondary compression overlapped\n");
   2611         }
   2612         tempptr ++;
   2613     }
   2614 
   2615     /* tertiary top/bottom/common for en_US is similar to the secondary
   2616     top/bottom/common */
   2617     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
   2618     tempptr = uprv_strrchr(result, 1) + 1;
   2619     while (*(tempptr + 1) != 0) {
   2620         /* the last secondary collation element is not checked since it is not
   2621         part of the compression */
   2622         if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
   2623             log_err("Tertiary compression overlapped\n");
   2624         }
   2625         tempptr ++;
   2626     }
   2627 
   2628     /* bottom up compression ------------------------------------- */
   2629     secstr[count] = 0;
   2630     tertstr[count] = 0;
   2631     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
   2632     tempptr = uprv_strchr(result, 1) + 1;
   2633     while (*(tempptr + 1) != 1) {
   2634         /* the last secondary collation element is not checked since it is not
   2635         part of the compression */
   2636         if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
   2637             log_err("Secondary compression overlapped\n");
   2638         }
   2639         tempptr ++;
   2640     }
   2641 
   2642     /* tertiary top/bottom/common for en_US is similar to the secondary
   2643     top/bottom/common */
   2644     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
   2645     tempptr = uprv_strrchr(result, 1) + 1;
   2646     while (*(tempptr + 1) != 0) {
   2647         /* the last secondary collation element is not checked since it is not
   2648         part of the compression */
   2649         if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
   2650             log_err("Tertiary compression overlapped\n");
   2651         }
   2652         tempptr ++;
   2653     }
   2654 
   2655     ucol_close(coll);
   2656 }
   2657 
   2658 static void TestCyrillicTailoring(void) {
   2659   static const char *test[] = {
   2660     "\\u0410b",
   2661       "\\u0410\\u0306a",
   2662       "\\u04d0A"
   2663   };
   2664 
   2665     /* Russian overrides contractions, so this test is not valid anymore */
   2666     /*genericLocaleStarter("ru", test, 3);*/
   2667 
   2668     genericLocaleStarter("root", test, 3);
   2669     genericRulesStarter("&\\u0410 = \\u0410", test, 3);
   2670     genericRulesStarter("&Z < \\u0410", test, 3);
   2671     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
   2672     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
   2673     genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
   2674     genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
   2675 }
   2676 
   2677 static void TestSuppressContractions(void) {
   2678 
   2679   static const char *testNoCont2[] = {
   2680       "\\u0410\\u0302a",
   2681       "\\u0410\\u0306b",
   2682       "\\u0410c"
   2683   };
   2684   static const char *testNoCont[] = {
   2685       "a\\u0410",
   2686       "A\\u0410\\u0306",
   2687       "\\uFF21\\u0410\\u0302"
   2688   };
   2689 
   2690   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
   2691   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
   2692 }
   2693 
   2694 static void TestContraction(void) {
   2695     const static char *testrules[] = {
   2696         "&A = AB / B",
   2697         "&A = A\\u0306/\\u0306",
   2698         "&c = ch / h"
   2699     };
   2700     const static UChar testdata[][2] = {
   2701         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
   2702         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
   2703         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
   2704     };
   2705     const static UChar testdata2[][2] = {
   2706         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
   2707         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
   2708         {0x0063 /* 'c' */, 0x006C /* 'l' */}
   2709     };
   2710     const static char *testrules3[] = {
   2711         "&z < xyz &xyzw << B",
   2712         "&z < xyz &xyz << B / w",
   2713         "&z < ch &achm << B",
   2714         "&z < ch &a << B / chm",
   2715         "&\\ud800\\udc00w << B",
   2716         "&\\ud800\\udc00 << B / w",
   2717         "&a\\ud800\\udc00m << B",
   2718         "&a << B / \\ud800\\udc00m",
   2719     };
   2720 
   2721     UErrorCode  status   = U_ZERO_ERROR;
   2722     UCollator  *coll;
   2723     UChar       rule[256] = {0};
   2724     uint32_t    rlen     = 0;
   2725     int         i;
   2726 
   2727     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
   2728         UCollationElements *iter1;
   2729         int j = 0;
   2730         log_verbose("Rule %s for testing\n", testrules[i]);
   2731         rlen = u_unescape(testrules[i], rule, 32);
   2732         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2733         if (U_FAILURE(status)) {
   2734             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   2735             return;
   2736         }
   2737         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
   2738         if (U_FAILURE(status)) {
   2739             log_err("Collation iterator creation failed\n");
   2740             return;
   2741         }
   2742         while (j < 2) {
   2743             UCollationElements *iter2 = ucol_openElements(coll,
   2744                                                          &(testdata[i][j]),
   2745                                                          1, &status);
   2746             uint32_t ce;
   2747             if (U_FAILURE(status)) {
   2748                 log_err("Collation iterator creation failed\n");
   2749                 return;
   2750             }
   2751             ce = ucol_next(iter2, &status);
   2752             while (ce != UCOL_NULLORDER) {
   2753                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
   2754                     log_err("Collation elements in contraction split does not match\n");
   2755                     return;
   2756                 }
   2757                 ce = ucol_next(iter2, &status);
   2758             }
   2759             j ++;
   2760             ucol_closeElements(iter2);
   2761         }
   2762         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
   2763             log_err("Collation elements not exhausted\n");
   2764             return;
   2765         }
   2766         ucol_closeElements(iter1);
   2767         ucol_close(coll);
   2768     }
   2769 
   2770     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
   2771     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2772     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
   2773         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   2774                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
   2775                 testdata2[1][1]);
   2776         return;
   2777     }
   2778     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
   2779         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   2780                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
   2781                 testdata2[2][1]);
   2782         return;
   2783     }
   2784     ucol_close(coll);
   2785 
   2786     for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
   2787         UCollator          *coll1,
   2788                            *coll2;
   2789         UCollationElements *iter1,
   2790                            *iter2;
   2791         UChar               ch = 0x0042 /* 'B' */;
   2792         uint32_t            ce;
   2793         rlen = u_unescape(testrules3[i], rule, 32);
   2794         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2795         rlen = u_unescape(testrules3[i + 1], rule, 32);
   2796         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2797         if (U_FAILURE(status)) {
   2798             log_err("Collator creation failed %s\n", testrules[i]);
   2799             return;
   2800         }
   2801         iter1 = ucol_openElements(coll1, &ch, 1, &status);
   2802         iter2 = ucol_openElements(coll2, &ch, 1, &status);
   2803         if (U_FAILURE(status)) {
   2804             log_err("Collation iterator creation failed\n");
   2805             return;
   2806         }
   2807         ce = ucol_next(iter1, &status);
   2808         if (U_FAILURE(status)) {
   2809             log_err("Retrieving ces failed\n");
   2810             return;
   2811         }
   2812         while (ce != UCOL_NULLORDER) {
   2813             if (ce != (uint32_t)ucol_next(iter2, &status)) {
   2814                 log_err("CEs does not match\n");
   2815                 return;
   2816             }
   2817             ce = ucol_next(iter1, &status);
   2818             if (U_FAILURE(status)) {
   2819                 log_err("Retrieving ces failed\n");
   2820                 return;
   2821             }
   2822         }
   2823         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
   2824             log_err("CEs not exhausted\n");
   2825             return;
   2826         }
   2827         ucol_closeElements(iter1);
   2828         ucol_closeElements(iter2);
   2829         ucol_close(coll1);
   2830         ucol_close(coll2);
   2831     }
   2832 }
   2833 
   2834 static void TestExpansion(void) {
   2835     const static char *testrules[] = {
   2836         "&J << K / B & K << M",
   2837         "&J << K / B << M"
   2838     };
   2839     const static UChar testdata[][3] = {
   2840         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
   2841         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
   2842         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
   2843         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
   2844         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
   2845         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
   2846     };
   2847 
   2848     UErrorCode  status   = U_ZERO_ERROR;
   2849     UCollator  *coll;
   2850     UChar       rule[256] = {0};
   2851     uint32_t    rlen     = 0;
   2852     int         i;
   2853 
   2854     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
   2855         int j = 0;
   2856         log_verbose("Rule %s for testing\n", testrules[i]);
   2857         rlen = u_unescape(testrules[i], rule, 32);
   2858         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2859         if (U_FAILURE(status)) {
   2860             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   2861             return;
   2862         }
   2863 
   2864         for (j = 0; j < 5; j ++) {
   2865             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
   2866         }
   2867         ucol_close(coll);
   2868     }
   2869 }
   2870 
   2871 #if 0
   2872 /* this test tests the current limitations of the engine */
   2873 /* it always fail, so it is disabled by default */
   2874 static void TestLimitations(void) {
   2875   /* recursive expansions */
   2876   {
   2877     static const char *rule = "&a=b/c&d=c/e";
   2878     static const char *tlimit01[] = {"add","b","adf"};
   2879     static const char *tlimit02[] = {"aa","b","af"};
   2880     log_verbose("recursive expansions\n");
   2881     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
   2882     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
   2883   }
   2884   /* contractions spanning expansions */
   2885   {
   2886     static const char *rule = "&a<<<c/e&g<<<eh";
   2887     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
   2888     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
   2889     log_verbose("contractions spanning expansions\n");
   2890     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
   2891     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
   2892   }
   2893   /* normalization: nulls in contractions */
   2894   {
   2895     static const char *rule = "&a<<<\\u0000\\u0302";
   2896     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   2897     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   2898     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   2899     static const UColAttributeValue valOn[] = { UCOL_ON };
   2900     static const UColAttributeValue valOff[] = { UCOL_OFF };
   2901 
   2902     log_verbose("NULL in contractions\n");
   2903     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   2904     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   2905     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   2906     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   2907 
   2908   }
   2909   /* normalization: contractions spanning normalization */
   2910   {
   2911     static const char *rule = "&a<<<\\u0000\\u0302";
   2912     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   2913     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   2914     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   2915     static const UColAttributeValue valOn[] = { UCOL_ON };
   2916     static const UColAttributeValue valOff[] = { UCOL_OFF };
   2917 
   2918     log_verbose("contractions spanning normalization\n");
   2919     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   2920     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   2921     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   2922     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   2923 
   2924   }
   2925   /* variable top:  */
   2926   {
   2927     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
   2928     static const char *rule = "&\\u2010<x<[variable top]=z";
   2929     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
   2930     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
   2931     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
   2932     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
   2933     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
   2934     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
   2935     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
   2936 
   2937     log_verbose("variable top\n");
   2938     genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2939     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2940     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2941     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
   2942     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
   2943 
   2944   }
   2945   /* case level */
   2946   {
   2947     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
   2948     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
   2949     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
   2950     static const UColAttribute att[] = { UCOL_CASE_FIRST};
   2951     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
   2952     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
   2953     log_verbose("case level\n");
   2954     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2955     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2956     /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
   2957     /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
   2958   }
   2959 
   2960 }
   2961 #endif
   2962 
   2963 static void TestBocsuCoverage(void) {
   2964   UErrorCode status = U_ZERO_ERROR;
   2965   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
   2966   UChar       test[256] = {0};
   2967   uint32_t    tlen     = u_unescape(testString, test, 32);
   2968   uint8_t key[256]     = {0};
   2969   uint32_t klen         = 0;
   2970 
   2971   UCollator *coll = ucol_open("", &status);
   2972   if(U_SUCCESS(status)) {
   2973   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
   2974 
   2975   klen = ucol_getSortKey(coll, test, tlen, key, 256);
   2976   (void)klen;    /* Suppress set but not used warning. */
   2977 
   2978   ucol_close(coll);
   2979   } else {
   2980     log_data_err("Couldn't open UCA\n");
   2981   }
   2982 }
   2983 
   2984 static void TestVariableTopSetting(void) {
   2985   UErrorCode status = U_ZERO_ERROR;
   2986   const UChar *current = NULL;
   2987   uint32_t varTopOriginal = 0, varTop1, varTop2;
   2988   UCollator *coll = ucol_open("", &status);
   2989   if(U_SUCCESS(status)) {
   2990 
   2991   uint32_t strength = 0;
   2992   uint16_t specs = 0;
   2993   uint32_t chOffset = 0;
   2994   uint32_t chLen = 0;
   2995   uint32_t exOffset = 0;
   2996   uint32_t exLen = 0;
   2997   uint32_t oldChOffset = 0;
   2998   uint32_t oldChLen = 0;
   2999   uint32_t oldExOffset = 0;
   3000   uint32_t oldExLen = 0;
   3001   uint32_t prefixOffset = 0;
   3002   uint32_t prefixLen = 0;
   3003 
   3004   UBool startOfRules = TRUE;
   3005   UColTokenParser src;
   3006   UColOptionSet opts;
   3007 
   3008   UChar *rulesCopy = NULL;
   3009   uint32_t rulesLen;
   3010 
   3011   UCollationResult result;
   3012 
   3013   UChar first[256] = { 0 };
   3014   UChar second[256] = { 0 };
   3015   UParseError parseError;
   3016   int32_t myQ = getTestOption(QUICK_OPTION);
   3017 
   3018   (void)prefixLen;        /* Suppress set but not used warnings. */
   3019   (void)prefixOffset;
   3020   (void)specs;
   3021 
   3022   uprv_memset(&src, 0, sizeof(UColTokenParser));
   3023 
   3024   src.opts = &opts;
   3025 
   3026   if(getTestOption(QUICK_OPTION) <= 0) {
   3027     setTestOption(QUICK_OPTION, 1);
   3028   }
   3029 
   3030   /* this test will fail when normalization is turned on */
   3031   /* therefore we always turn off exhaustive mode for it */
   3032   { /* QUICK > 0*/
   3033     log_verbose("Slide variable top over UCARules\n");
   3034     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
   3035     rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
   3036     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
   3037 
   3038     if(U_SUCCESS(status) && rulesLen > 0) {
   3039       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   3040       src.current = src.source = rulesCopy;
   3041       src.end = rulesCopy+rulesLen;
   3042       src.extraCurrent = src.end;
   3043       src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   3044 
   3045 	  /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
   3046 	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
   3047       while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
   3048         strength = src.parsedToken.strength;
   3049         chOffset = src.parsedToken.charsOffset;
   3050         chLen = src.parsedToken.charsLen;
   3051         exOffset = src.parsedToken.extensionOffset;
   3052         exLen = src.parsedToken.extensionLen;
   3053         prefixOffset = src.parsedToken.prefixOffset;
   3054         prefixLen = src.parsedToken.prefixLen;
   3055         specs = src.parsedToken.flags;
   3056 
   3057         startOfRules = FALSE;
   3058         {
   3059           log_verbose("%04X %d ", *(src.source+chOffset), chLen);
   3060         }
   3061         if(strength == UCOL_PRIMARY) {
   3062           status = U_ZERO_ERROR;
   3063           varTopOriginal = ucol_getVariableTop(coll, &status);
   3064           varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
   3065           if(U_FAILURE(status)) {
   3066             char buffer[256];
   3067             char *buf = buffer;
   3068             uint32_t i = 0, j;
   3069             uint32_t CE = UCOL_NO_MORE_CES;
   3070 
   3071             /* before we start screaming, let's see if there is a problem with the rules */
   3072             UErrorCode collIterateStatus = U_ZERO_ERROR;
   3073             collIterate *s = uprv_new_collIterate(&collIterateStatus);
   3074             uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
   3075 
   3076             CE = ucol_getNextCE(coll, s, &status);
   3077             (void)CE;    /* Suppress set but not used warning. */
   3078 
   3079             for(i = 0; i < oldChLen; i++) {
   3080               j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
   3081               buf += j;
   3082             }
   3083             if(status == U_PRIMARY_TOO_LONG_ERROR) {
   3084               log_verbose("= Expected failure for %s =", buffer);
   3085             } else {
   3086               if(uprv_collIterateAtEnd(s)) {
   3087                 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
   3088                   oldChOffset, u_errorName(status), buffer);
   3089               } else {
   3090                 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
   3091                   buffer);
   3092               }
   3093             }
   3094             uprv_delete_collIterate(s);
   3095           }
   3096           varTop2 = ucol_getVariableTop(coll, &status);
   3097           if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
   3098             log_err("cannot retrieve set varTop value!\n");
   3099             continue;
   3100           }
   3101 
   3102           if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
   3103 
   3104             u_strncpy(first, src.source+oldChOffset, oldChLen);
   3105             u_strncpy(first+oldChLen, src.source+chOffset, chLen);
   3106             u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
   3107             first[2*oldChLen+chLen] = 0;
   3108 
   3109             if(oldExLen == 0) {
   3110               u_strncpy(second, src.source+chOffset, chLen);
   3111               second[chLen] = 0;
   3112             } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
   3113               u_strncpy(second, src.source+oldExOffset, oldExLen);
   3114               u_strncpy(second+oldChLen, src.source+chOffset, chLen);
   3115               u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
   3116               second[2*oldExLen+chLen] = 0;
   3117             }
   3118             result = ucol_strcoll(coll, first, -1, second, -1);
   3119             if(result == UCOL_EQUAL) {
   3120               doTest(coll, first, second, UCOL_EQUAL);
   3121             } else {
   3122               log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
   3123             }
   3124           }
   3125         }
   3126         if(strength != UCOL_TOK_RESET) {
   3127           oldChOffset = chOffset;
   3128           oldChLen = chLen;
   3129           oldExOffset = exOffset;
   3130           oldExLen = exLen;
   3131         }
   3132       }
   3133       status = U_ZERO_ERROR;
   3134     }
   3135     else {
   3136       log_err("Unexpected failure getting rules %s\n", u_errorName(status));
   3137       return;
   3138     }
   3139     if (U_FAILURE(status)) {
   3140         log_err("Error parsing rules %s\n", u_errorName(status));
   3141         return;
   3142     }
   3143     status = U_ZERO_ERROR;
   3144   }
   3145 
   3146   setTestOption(QUICK_OPTION, myQ);
   3147 
   3148   log_verbose("Testing setting variable top to contractions\n");
   3149   {
   3150     UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
   3151     int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth;
   3152     while(*conts != 0) {
   3153       /*
   3154        * A continuation is NUL-terminated and NUL-padded
   3155        * except if it has the maximum length.
   3156        */
   3157       int32_t contractionLength = maxUCAContractionLength;
   3158       while(contractionLength > 0 && conts[contractionLength - 1] == 0) {
   3159         --contractionLength;
   3160       }
   3161       if(*(conts+1)==0) { /* pre-context */
   3162         varTop1 = ucol_setVariableTop(coll, conts, 1, &status);
   3163       } else {
   3164         varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status);
   3165       }
   3166       if(U_FAILURE(status)) {
   3167         if(status == U_PRIMARY_TOO_LONG_ERROR) {
   3168           /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
   3169            * therefore it is not an error when it complains about them. */
   3170           log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
   3171                       *conts, *(conts+1), *(conts+2));
   3172         } else {
   3173           log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
   3174                   *conts, *(conts+1), *(conts+2), u_errorName(status));
   3175         }
   3176         status = U_ZERO_ERROR;
   3177       }
   3178       conts+=maxUCAContractionLength;
   3179     }
   3180 
   3181     status = U_ZERO_ERROR;
   3182 
   3183     first[0] = 0x0040;
   3184     first[1] = 0x0050;
   3185     first[2] = 0x0000;
   3186 
   3187     ucol_setVariableTop(coll, first, -1, &status);
   3188 
   3189     if(U_SUCCESS(status)) {
   3190       log_err("Invalid contraction succeded in setting variable top!\n");
   3191     }
   3192 
   3193   }
   3194 
   3195   log_verbose("Test restoring variable top\n");
   3196 
   3197   status = U_ZERO_ERROR;
   3198   ucol_restoreVariableTop(coll, varTopOriginal, &status);
   3199   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
   3200     log_err("Couldn't restore old variable top\n");
   3201   }
   3202 
   3203   log_verbose("Testing calling with error set\n");
   3204 
   3205   status = U_INTERNAL_PROGRAM_ERROR;
   3206   varTop1 = ucol_setVariableTop(coll, first, 1, &status);
   3207   varTop2 = ucol_getVariableTop(coll, &status);
   3208   ucol_restoreVariableTop(coll, varTop2, &status);
   3209   varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
   3210   varTop2 = ucol_getVariableTop(NULL, &status);
   3211   ucol_restoreVariableTop(NULL, varTop2, &status);
   3212   if(status != U_INTERNAL_PROGRAM_ERROR) {
   3213     log_err("Bad reaction to passed error!\n");
   3214   }
   3215   uprv_free(src.source);
   3216   ucol_close(coll);
   3217   } else {
   3218     log_data_err("Couldn't open UCA collator\n");
   3219   }
   3220 
   3221 }
   3222 
   3223 static void TestNonChars(void) {
   3224   static const char *test[] = {
   3225       "\\u0000",  /* ignorable */
   3226       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
   3227       "\\uFDD0", "\\uFDEF",
   3228       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
   3229       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
   3230       "\\U0003FFFE", "\\U0003FFFF",
   3231       "\\U0004FFFE", "\\U0004FFFF",
   3232       "\\U0005FFFE", "\\U0005FFFF",
   3233       "\\U0006FFFE", "\\U0006FFFF",
   3234       "\\U0007FFFE", "\\U0007FFFF",
   3235       "\\U0008FFFE", "\\U0008FFFF",
   3236       "\\U0009FFFE", "\\U0009FFFF",
   3237       "\\U000AFFFE", "\\U000AFFFF",
   3238       "\\U000BFFFE", "\\U000BFFFF",
   3239       "\\U000CFFFE", "\\U000CFFFF",
   3240       "\\U000DFFFE", "\\U000DFFFF",
   3241       "\\U000EFFFE", "\\U000EFFFF",
   3242       "\\U000FFFFE", "\\U000FFFFF",
   3243       "\\U0010FFFE", "\\U0010FFFF",
   3244       "\\uFFFF"  /* special character with maximum primary weight */
   3245   };
   3246   UErrorCode status = U_ZERO_ERROR;
   3247   UCollator *coll = ucol_open("en_US", &status);
   3248 
   3249   log_verbose("Test non characters\n");
   3250 
   3251   if(U_SUCCESS(status)) {
   3252     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
   3253   } else {
   3254     log_err_status(status, "Unable to open collator\n");
   3255   }
   3256 
   3257   ucol_close(coll);
   3258 }
   3259 
   3260 static void TestExtremeCompression(void) {
   3261   static char *test[4];
   3262   int32_t j = 0, i = 0;
   3263 
   3264   for(i = 0; i<4; i++) {
   3265     test[i] = (char *)malloc(2048*sizeof(char));
   3266   }
   3267 
   3268   for(j = 20; j < 500; j++) {
   3269     for(i = 0; i<4; i++) {
   3270       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   3271       test[i][j-1] = (char)('a'+i);
   3272       test[i][j] = 0;
   3273     }
   3274     genericLocaleStarter("en_US", (const char **)test, 4);
   3275   }
   3276 
   3277 
   3278   for(i = 0; i<4; i++) {
   3279     free(test[i]);
   3280   }
   3281 }
   3282 
   3283 #if 0
   3284 static void TestExtremeCompression(void) {
   3285   static char *test[4];
   3286   int32_t j = 0, i = 0;
   3287   UErrorCode status = U_ZERO_ERROR;
   3288   UCollator *coll = ucol_open("en_US", status);
   3289   for(i = 0; i<4; i++) {
   3290     test[i] = (char *)malloc(2048*sizeof(char));
   3291   }
   3292   for(j = 10; j < 2048; j++) {
   3293     for(i = 0; i<4; i++) {
   3294       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
   3295       test[i][j-1] = (char)('a'+i);
   3296       test[i][j] = 0;
   3297     }
   3298   }
   3299   genericLocaleStarter("en_US", (const char **)test, 4);
   3300 
   3301   for(j = 10; j < 2048; j++) {
   3302     for(i = 0; i<1; i++) {
   3303       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   3304       test[i][j] = 0;
   3305     }
   3306   }
   3307   for(i = 0; i<4; i++) {
   3308     free(test[i]);
   3309   }
   3310 }
   3311 #endif
   3312 
   3313 static void TestSurrogates(void) {
   3314   static const char *test[] = {
   3315     "z","\\ud900\\udc25",  "\\ud805\\udc50",
   3316        "\\ud800\\udc00y",  "\\ud800\\udc00r",
   3317        "\\ud800\\udc00f",  "\\ud800\\udc00",
   3318        "\\ud800\\udc00c", "\\ud800\\udc00b",
   3319        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
   3320        "\\ud800\\udc00a",
   3321        "c", "b"
   3322   };
   3323 
   3324   static const char *rule =
   3325     "&z < \\ud900\\udc25   < \\ud805\\udc50"
   3326        "< \\ud800\\udc00y  < \\ud800\\udc00r"
   3327        "< \\ud800\\udc00f  << \\ud800\\udc00"
   3328        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
   3329        "< \\ud800\\udc00a  < c < b" ;
   3330 
   3331   genericRulesStarter(rule, test, 14);
   3332 }
   3333 
   3334 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
   3335 static void TestPrefix(void) {
   3336   uint32_t i;
   3337 
   3338   static const struct {
   3339     const char *rules;
   3340     const char *data[50];
   3341     const uint32_t len;
   3342   } tests[] = {
   3343     { "&z <<< z|a",
   3344       {"zz", "za"}, 2 },
   3345 
   3346     { "&z <<< z|   a",
   3347       {"zz", "za"}, 2 },
   3348     { "[strength I]"
   3349       "&a=\\ud900\\udc25"
   3350       "&z<<<\\ud900\\udc25|a",
   3351       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
   3352   };
   3353 
   3354 
   3355   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3356     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3357   }
   3358 }
   3359 
   3360 /* This test uses data suplied by Masashiko Maedera to test the implementation */
   3361 /* JIS X 4061 collation order implementation                                   */
   3362 static void TestNewJapanese(void) {
   3363 
   3364   static const char * const test1[] = {
   3365       "\\u30b7\\u30e3\\u30fc\\u30ec",
   3366       "\\u30b7\\u30e3\\u30a4",
   3367       "\\u30b7\\u30e4\\u30a3",
   3368       "\\u30b7\\u30e3\\u30ec",
   3369       "\\u3061\\u3087\\u3053",
   3370       "\\u3061\\u3088\\u3053",
   3371       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
   3372       "\\u3066\\u30fc\\u305f",
   3373       "\\u30c6\\u30fc\\u30bf",
   3374       "\\u30c6\\u30a7\\u30bf",
   3375       "\\u3066\\u3048\\u305f",
   3376       "\\u3067\\u30fc\\u305f",
   3377       "\\u30c7\\u30fc\\u30bf",
   3378       "\\u30c7\\u30a7\\u30bf",
   3379       "\\u3067\\u3048\\u305f",
   3380       "\\u3066\\u30fc\\u305f\\u30fc",
   3381       "\\u30c6\\u30fc\\u30bf\\u30a1",
   3382       "\\u30c6\\u30a7\\u30bf\\u30fc",
   3383       "\\u3066\\u3047\\u305f\\u3041",
   3384       "\\u3066\\u3048\\u305f\\u30fc",
   3385       "\\u3067\\u30fc\\u305f\\u30fc",
   3386       "\\u30c7\\u30fc\\u30bf\\u30a1",
   3387       "\\u3067\\u30a7\\u305f\\u30a1",
   3388       "\\u30c7\\u3047\\u30bf\\u3041",
   3389       "\\u30c7\\u30a8\\u30bf\\u30a2",
   3390       "\\u3072\\u3086",
   3391       "\\u3073\\u3085\\u3042",
   3392       "\\u3074\\u3085\\u3042",
   3393       "\\u3073\\u3085\\u3042\\u30fc",
   3394       "\\u30d3\\u30e5\\u30a2\\u30fc",
   3395       "\\u3074\\u3085\\u3042\\u30fc",
   3396       "\\u30d4\\u30e5\\u30a2\\u30fc",
   3397       "\\u30d2\\u30e5\\u30a6",
   3398       "\\u30d2\\u30e6\\u30a6",
   3399       "\\u30d4\\u30e5\\u30a6\\u30a2",
   3400       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
   3401       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
   3402       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
   3403       "\\u3072\\u3085\\u3093",
   3404       "\\u3074\\u3085\\u3093",
   3405       "\\u3075\\u30fc\\u308a",
   3406       "\\u30d5\\u30fc\\u30ea",
   3407       "\\u3075\\u3045\\u308a",
   3408       "\\u3075\\u30a5\\u308a",
   3409       "\\u3075\\u30a5\\u30ea",
   3410       "\\u30d5\\u30a6\\u30ea",
   3411       "\\u3076\\u30fc\\u308a",
   3412       "\\u30d6\\u30fc\\u30ea",
   3413       "\\u3076\\u3045\\u308a",
   3414       "\\u30d6\\u30a5\\u308a",
   3415       "\\u3077\\u3046\\u308a",
   3416       "\\u30d7\\u30a6\\u30ea",
   3417       "\\u3075\\u30fc\\u308a\\u30fc",
   3418       "\\u30d5\\u30a5\\u30ea\\u30fc",
   3419       "\\u3075\\u30a5\\u308a\\u30a3",
   3420       "\\u30d5\\u3045\\u308a\\u3043",
   3421       "\\u30d5\\u30a6\\u30ea\\u30fc",
   3422       "\\u3075\\u3046\\u308a\\u3043",
   3423       "\\u30d6\\u30a6\\u30ea\\u30a4",
   3424       "\\u3077\\u30fc\\u308a\\u30fc",
   3425       "\\u3077\\u30a5\\u308a\\u30a4",
   3426       "\\u3077\\u3046\\u308a\\u30fc",
   3427       "\\u30d7\\u30a6\\u30ea\\u30a4",
   3428       "\\u30d5\\u30fd",
   3429       "\\u3075\\u309e",
   3430       "\\u3076\\u309d",
   3431       "\\u3076\\u3075",
   3432       "\\u3076\\u30d5",
   3433       "\\u30d6\\u3075",
   3434       "\\u30d6\\u30d5",
   3435       "\\u3076\\u309e",
   3436       "\\u3076\\u3077",
   3437       "\\u30d6\\u3077",
   3438       "\\u3077\\u309d",
   3439       "\\u30d7\\u30fd",
   3440       "\\u3077\\u3075",
   3441 };
   3442 
   3443   static const char *test2[] = {
   3444     "\\u306f\\u309d", /* H\\u309d */
   3445     "\\u30cf\\u30fd", /* K\\u30fd */
   3446     "\\u306f\\u306f", /* HH */
   3447     "\\u306f\\u30cf", /* HK */
   3448     "\\u30cf\\u30cf", /* KK */
   3449     "\\u306f\\u309e", /* H\\u309e */
   3450     "\\u30cf\\u30fe", /* K\\u30fe */
   3451     "\\u306f\\u3070", /* HH\\u309b */
   3452     "\\u30cf\\u30d0", /* KK\\u309b */
   3453     "\\u306f\\u3071", /* HH\\u309c */
   3454     "\\u30cf\\u3071", /* KH\\u309c */
   3455     "\\u30cf\\u30d1", /* KK\\u309c */
   3456     "\\u3070\\u309d", /* H\\u309b\\u309d */
   3457     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
   3458     "\\u3070\\u306f", /* H\\u309bH */
   3459     "\\u30d0\\u30cf", /* K\\u309bK */
   3460     "\\u3070\\u309e", /* H\\u309b\\u309e */
   3461     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
   3462     "\\u3070\\u3070", /* H\\u309bH\\u309b */
   3463     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
   3464     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
   3465     "\\u3070\\u3071", /* H\\u309bH\\u309c */
   3466     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
   3467     "\\u3071\\u309d", /* H\\u309c\\u309d */
   3468     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
   3469     "\\u3071\\u306f", /* H\\u309cH */
   3470     "\\u30d1\\u30cf", /* K\\u309cK */
   3471     "\\u3071\\u3070", /* H\\u309cH\\u309b */
   3472     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
   3473     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
   3474     "\\u3071\\u3071", /* H\\u309cH\\u309c */
   3475     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
   3476   };
   3477   /*
   3478   static const char *test3[] = {
   3479     "\\u221er\\u221e",
   3480     "\\u221eR#",
   3481     "\\u221et\\u221e",
   3482     "#r\\u221e",
   3483     "#R#",
   3484     "#t%",
   3485     "#T%",
   3486     "8t\\u221e",
   3487     "8T\\u221e",
   3488     "8t#",
   3489     "8T#",
   3490     "8t%",
   3491     "8T%",
   3492     "8t8",
   3493     "8T8",
   3494     "\\u03c9r\\u221e",
   3495     "\\u03a9R%",
   3496     "rr\\u221e",
   3497     "rR\\u221e",
   3498     "Rr\\u221e",
   3499     "RR\\u221e",
   3500     "RT%",
   3501     "rt8",
   3502     "tr\\u221e",
   3503     "tr8",
   3504     "TR8",
   3505     "tt8",
   3506     "\\u30b7\\u30e3\\u30fc\\u30ec",
   3507   };
   3508   */
   3509   static const UColAttribute att[] = { UCOL_STRENGTH };
   3510   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
   3511 
   3512   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
   3513   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
   3514 
   3515   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
   3516   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
   3517   /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
   3518   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
   3519   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
   3520 }
   3521 
   3522 static void TestStrCollIdenticalPrefix(void) {
   3523   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
   3524   const char* test[] = {
   3525     "ab\\ud9b0\\udc70",
   3526     "ab\\ud9b0\\udc71"
   3527   };
   3528   genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
   3529 }
   3530 /* Contractions should have all their canonically equivalent */
   3531 /* strings included */
   3532 static void TestContractionClosure(void) {
   3533   static const struct {
   3534     const char *rules;
   3535     const char *data[10];
   3536     const uint32_t len;
   3537   } tests[] = {
   3538     {   "&b=\\u00e4\\u00e4",
   3539       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
   3540     {   "&b=\\u00C5",
   3541       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
   3542   };
   3543   uint32_t i;
   3544 
   3545 
   3546   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3547     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
   3548   }
   3549 }
   3550 
   3551 /* This tests also fails*/
   3552 static void TestBeforePrefixFailure(void) {
   3553   static const struct {
   3554     const char *rules;
   3555     const char *data[10];
   3556     const uint32_t len;
   3557   } tests[] = {
   3558     { "&g <<< a"
   3559       "&[before 3]\\uff41 <<< x",
   3560       {"x", "\\uff41"}, 2 },
   3561     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3562         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   3563         "&[before 3]\\u30a7<<<\\u30a9",
   3564       {"\\u30a9", "\\u30a7"}, 2 },
   3565     {   "&[before 3]\\u30a7<<<\\u30a9"
   3566         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3567         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
   3568       {"\\u30a9", "\\u30a7"}, 2 },
   3569   };
   3570   uint32_t i;
   3571 
   3572 
   3573   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3574     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3575   }
   3576 
   3577 #if 0
   3578   const char* rule1 =
   3579         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3580         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   3581         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
   3582   const char* rule2 =
   3583         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
   3584         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3585         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
   3586   const char* test[] = {
   3587       "\\u30c6\\u30fc\\u30bf",
   3588       "\\u30c6\\u30a7\\u30bf",
   3589   };
   3590   genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
   3591   genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
   3592 /* this piece of code should be in some sort of verbose mode     */
   3593 /* it gets the collation elements for elements and prints them   */
   3594 /* This is useful when trying to see whether the problem is      */
   3595   {
   3596     UErrorCode status = U_ZERO_ERROR;
   3597     uint32_t i = 0;
   3598     UCollationElements *it = NULL;
   3599     uint32_t CE;
   3600     UChar string[256];
   3601     uint32_t uStringLen;
   3602     UCollator *coll = NULL;
   3603 
   3604     uStringLen = u_unescape(rule1, string, 256);
   3605 
   3606     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   3607 
   3608     /*coll = ucol_open("ja_JP_JIS", &status);*/
   3609     it = ucol_openElements(coll, string, 0, &status);
   3610 
   3611     for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
   3612       log_verbose("%s\n", test[i]);
   3613       uStringLen = u_unescape(test[i], string, 256);
   3614       ucol_setText(it, string, uStringLen, &status);
   3615 
   3616       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
   3617         log_verbose("%08X\n", CE);
   3618       }
   3619       log_verbose("\n");
   3620 
   3621     }
   3622 
   3623     ucol_closeElements(it);
   3624     ucol_close(coll);
   3625   }
   3626 #endif
   3627 }
   3628 
   3629 static void TestPrefixCompose(void) {
   3630   const char* rule1 =
   3631         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
   3632   /*
   3633   const char* test[] = {
   3634       "\\u30c6\\u30fc\\u30bf",
   3635       "\\u30c6\\u30a7\\u30bf",
   3636   };
   3637   */
   3638   {
   3639     UErrorCode status = U_ZERO_ERROR;
   3640     /*uint32_t i = 0;*/
   3641     /*UCollationElements *it = NULL;*/
   3642 /*    uint32_t CE;*/
   3643     UChar string[256];
   3644     uint32_t uStringLen;
   3645     UCollator *coll = NULL;
   3646 
   3647     uStringLen = u_unescape(rule1, string, 256);
   3648 
   3649     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   3650     ucol_close(coll);
   3651   }
   3652 
   3653 
   3654 }
   3655 
   3656 /*
   3657 [last variable] last variable value
   3658 [last primary ignorable] largest CE for primary ignorable
   3659 [last secondary ignorable] largest CE for secondary ignorable
   3660 [last tertiary ignorable] largest CE for tertiary ignorable
   3661 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
   3662 */
   3663 
   3664 static void TestRuleOptions(void) {
   3665   /* values here are hardcoded and are correct for the current UCA
   3666    * when the UCA changes, one might be forced to change these
   3667    * values.
   3668    */
   3669 
   3670   /*
   3671    * These strings contain the last character before [variable top]
   3672    * and the first and second characters (by primary weights) after it.
   3673    * See FractionalUCA.txt. For example:
   3674       [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
   3675       [variable top = 0C FE]
   3676       [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
   3677      and
   3678       00B4; [0D 0C, 05, 05]
   3679    *
   3680    * Note: Starting with UCA 6.0, the [variable top] collation element
   3681    * is not the weight of any character or string,
   3682    * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
   3683    */
   3684 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
   3685 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
   3686 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
   3687 
   3688   /*
   3689    * This string has to match the character that has the [last regular] weight
   3690    * which changes with each UCA version.
   3691    * See the bottom of FractionalUCA.txt which says something like
   3692       [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
   3693    *
   3694    * Note: Starting with UCA 6.0, the [last regular] collation element
   3695    * is not the weight of any character or string,
   3696    * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
   3697    */
   3698 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
   3699 
   3700   static const struct {
   3701     const char *rules;
   3702     const char *data[10];
   3703     const uint32_t len;
   3704   } tests[] = {
   3705     /* - all befores here amount to zero */
   3706     { "&[before 3][first tertiary ignorable]<<<a",
   3707         { "\\u0000", "a"}, 2
   3708     }, /* you cannot go before first tertiary ignorable */
   3709 
   3710     { "&[before 3][last tertiary ignorable]<<<a",
   3711         { "\\u0000", "a"}, 2
   3712     }, /* you cannot go before last tertiary ignorable */
   3713 
   3714     { "&[before 3][first secondary ignorable]<<<a",
   3715         { "\\u0000", "a"}, 2
   3716     }, /* you cannot go before first secondary ignorable */
   3717 
   3718     { "&[before 3][last secondary ignorable]<<<a",
   3719         { "\\u0000", "a"}, 2
   3720     }, /* you cannot go before first secondary ignorable */
   3721 
   3722     /* 'normal' befores */
   3723 
   3724     { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
   3725         {  "c", "b", "\\u0332", "a" }, 4
   3726     },
   3727 
   3728     /* we don't have a code point that corresponds to
   3729      * the last primary ignorable
   3730      */
   3731     { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
   3732         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
   3733     },
   3734 
   3735     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
   3736         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
   3737     },
   3738 
   3739     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
   3740         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
   3741     },
   3742 
   3743     { "&[first regular]<a"
   3744       "&[before 1][first regular]<b",
   3745       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
   3746     },
   3747 
   3748     { "&[before 1][last regular]<b"
   3749       "&[last regular]<a",
   3750         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
   3751     },
   3752 
   3753     { "&[before 1][first implicit]<b"
   3754       "&[first implicit]<a",
   3755         { "b", "\\u4e00", "a", "\\u4e01"}, 4
   3756     },
   3757 
   3758     { "&[before 1][last implicit]<b"
   3759       "&[last implicit]<a",
   3760         { "b", "\\U0010FFFD", "a" }, 3
   3761     },
   3762 
   3763     { "&[last variable]<z"
   3764       "&[last primary ignorable]<x"
   3765       "&[last secondary ignorable]<<y"
   3766       "&[last tertiary ignorable]<<<w"
   3767       "&[top]<u",
   3768       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
   3769     }
   3770 
   3771   };
   3772   uint32_t i;
   3773 
   3774   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3775     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3776   }
   3777 }
   3778 
   3779 
   3780 static void TestOptimize(void) {
   3781   /* this is not really a test - just trying out
   3782    * whether copying of UCA contents will fail
   3783    * Cannot really test, since the functionality
   3784    * remains the same.
   3785    */
   3786   static const struct {
   3787     const char *rules;
   3788     const char *data[10];
   3789     const uint32_t len;
   3790   } tests[] = {
   3791     /* - all befores here amount to zero */
   3792     { "[optimize [\\uAC00-\\uD7FF]]",
   3793     { "a", "b"}, 2}
   3794   };
   3795   uint32_t i;
   3796 
   3797   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3798     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3799   }
   3800 }
   3801 
   3802 /*
   3803 cycheng (at) ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
   3804 weiv    ucol_strcollIter?
   3805 cycheng (at) ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
   3806 weiv    these are the input strings?
   3807 cycheng (at) ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
   3808 weiv    will check - could be a problem with utf-8 iterator
   3809 cycheng (at) ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
   3810 weiv    hmmm
   3811 cycheng (at) ca.ibm.c... note that we have a standalone high surrogate
   3812 weiv    that doesn't sound right
   3813 cycheng (at) ca.ibm.c... we got the same inconsistent results on AIX and Win2000
   3814 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
   3815 cycheng (at) ca.ibm.c... yes
   3816 weiv    and then do the comparison
   3817 cycheng (at) ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
   3818 weiv    utf-16 strings look like a little endian ones in the example you sent me
   3819 weiv    It could be a bug - let me try to test it out
   3820 cycheng (at) ca.ibm.c... ok
   3821 cycheng (at) ca.ibm.c... we can wait till the conf. call
   3822 cycheng (at) ca.ibm.c... next weke
   3823 weiv    that would be great
   3824 weiv    hmmm
   3825 weiv    I might be wrong
   3826 weiv    let me play with it some more
   3827 cycheng (at) ca.ibm.c... ok
   3828 cycheng (at) ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
   3829 cycheng (at) ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
   3830 cycheng (at) ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
   3831 weiv    ok
   3832 cycheng (at) ca.ibm.c... i ask sherman to send you more inconsistent data
   3833 weiv    thanks
   3834 cycheng (at) ca.ibm.c... the 4 strings we sent are just samples
   3835 */
   3836 #if 0
   3837 static void Alexis(void) {
   3838   UErrorCode status = U_ZERO_ERROR;
   3839   UCollator *coll = ucol_open("", &status);
   3840 
   3841 
   3842   const char utf16be[2][4] = {
   3843     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
   3844     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
   3845   };
   3846 
   3847   const char utf8[2][4] = {
   3848     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
   3849     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
   3850   };
   3851 
   3852   UCharIterator iterU161, iterU162;
   3853   UCharIterator iterU81, iterU82;
   3854 
   3855   UCollationResult resU16, resU8;
   3856 
   3857   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
   3858   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
   3859 
   3860   uiter_setUTF8(&iterU81, utf8[0], 4);
   3861   uiter_setUTF8(&iterU82, utf8[1], 4);
   3862 
   3863   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3864 
   3865   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
   3866   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
   3867 
   3868 
   3869   if(resU16 != resU8) {
   3870     log_err("different results\n");
   3871   }
   3872 
   3873   ucol_close(coll);
   3874 }
   3875 #endif
   3876 
   3877 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
   3878 static void Alexis2(void) {
   3879   UErrorCode status = U_ZERO_ERROR;
   3880   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   3881   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   3882   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   3883   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
   3884 
   3885   UConverter *conv = NULL;
   3886 
   3887   UCharIterator U16BEItS, U16BEItT;
   3888   UCharIterator U8ItS, U8ItT;
   3889 
   3890   UCollationResult resU16, resU16BE, resU8;
   3891 
   3892   static const char* const pairs[][2] = {
   3893     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
   3894     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
   3895     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
   3896     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
   3897     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
   3898     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
   3899     { "\\u0020", "\\u0020\\u0000"}
   3900 /*
   3901 5F20 (my result here)
   3902 5F204E008E3F
   3903 5F20 (your result here)
   3904 */
   3905   };
   3906 
   3907   int32_t i = 0;
   3908 
   3909   UCollator *coll = ucol_open("", &status);
   3910   if(status == U_FILE_ACCESS_ERROR) {
   3911     log_data_err("Is your data around?\n");
   3912     return;
   3913   } else if(U_FAILURE(status)) {
   3914     log_err("Error opening collator\n");
   3915     return;
   3916   }
   3917   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3918   conv = ucnv_open("UTF16BE", &status);
   3919   for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
   3920     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   3921     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   3922 
   3923     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
   3924 
   3925     log_verbose("Result of strcoll is %i\n", resU16);
   3926 
   3927     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
   3928     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
   3929     (void)U16BELenS;    /* Suppress set but not used warnings. */
   3930     (void)U16BELenT;
   3931 
   3932     /* use the original sizes, as the result from converter is in bytes */
   3933     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
   3934     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
   3935 
   3936     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
   3937 
   3938     log_verbose("Result of U16BE is %i\n", resU16BE);
   3939 
   3940     if(resU16 != resU16BE) {
   3941       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
   3942     }
   3943 
   3944     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
   3945     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
   3946 
   3947     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
   3948     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
   3949 
   3950     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
   3951 
   3952     if(resU16 != resU8) {
   3953       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
   3954     }
   3955 
   3956   }
   3957 
   3958   ucol_close(coll);
   3959   ucnv_close(conv);
   3960 }
   3961 
   3962 static void TestHebrewUCA(void) {
   3963   UErrorCode status = U_ZERO_ERROR;
   3964   static const char *first[] = {
   3965     "d790d6b8d79cd795d6bcd7a9",
   3966     "d790d79cd79ed7a7d799d799d7a1",
   3967     "d790d6b4d79ed795d6bcd7a9",
   3968   };
   3969 
   3970   char utf8String[3][256];
   3971   UChar utf16String[3][256];
   3972 
   3973   int32_t i = 0, j = 0;
   3974   int32_t sizeUTF8[3];
   3975   int32_t sizeUTF16[3];
   3976 
   3977   UCollator *coll = ucol_open("", &status);
   3978   if (U_FAILURE(status)) {
   3979       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
   3980       return;
   3981   }
   3982   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
   3983 
   3984   for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
   3985     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
   3986     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
   3987     log_verbose("%i: ");
   3988     for(j = 0; j < sizeUTF16[i]; j++) {
   3989       /*log_verbose("\\u%04X", utf16String[i][j]);*/
   3990       log_verbose("%04X", utf16String[i][j]);
   3991     }
   3992     log_verbose("\n");
   3993   }
   3994   for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
   3995     for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
   3996       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
   3997     }
   3998   }
   3999 
   4000   ucol_close(coll);
   4001 
   4002 }
   4003 
   4004 static void TestPartialSortKeyTermination(void) {
   4005   static const char* cases[] = {
   4006     "\\u1234\\u1234\\udc00",
   4007     "\\udc00\\ud800\\ud800"
   4008   };
   4009 
   4010   int32_t i = sizeof(UCollator);
   4011 
   4012   UErrorCode status = U_ZERO_ERROR;
   4013 
   4014   UCollator *coll = ucol_open("", &status);
   4015 
   4016   UCharIterator iter;
   4017 
   4018   UChar currCase[256];
   4019   int32_t length = 0;
   4020   int32_t pKeyLen = 0;
   4021 
   4022   uint8_t key[256];
   4023 
   4024   for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
   4025     uint32_t state[2] = {0, 0};
   4026     length = u_unescape(cases[i], currCase, 256);
   4027     uiter_setString(&iter, currCase, length);
   4028     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
   4029     (void)pKeyLen;   /* Suppress set but not used warning. */
   4030 
   4031     log_verbose("Done\n");
   4032 
   4033   }
   4034   ucol_close(coll);
   4035 }
   4036 
   4037 static void TestSettings(void) {
   4038   static const char* cases[] = {
   4039     "apple",
   4040       "Apple"
   4041   };
   4042 
   4043   static const char* locales[] = {
   4044     "",
   4045       "en"
   4046   };
   4047 
   4048   UErrorCode status = U_ZERO_ERROR;
   4049 
   4050   int32_t i = 0, j = 0;
   4051 
   4052   UChar source[256], target[256];
   4053   int32_t sLen = 0, tLen = 0;
   4054 
   4055   UCollator *collateObject = NULL;
   4056   for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
   4057     collateObject = ucol_open(locales[i], &status);
   4058     ucol_setStrength(collateObject, UCOL_PRIMARY);
   4059     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
   4060     for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
   4061       sLen = u_unescape(cases[j-1], source, 256);
   4062       source[sLen] = 0;
   4063       tLen = u_unescape(cases[j], target, 256);
   4064       source[tLen] = 0;
   4065       doTest(collateObject, source, target, UCOL_EQUAL);
   4066     }
   4067     ucol_close(collateObject);
   4068   }
   4069 }
   4070 
   4071 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
   4072     UErrorCode status = U_ZERO_ERROR;
   4073     int32_t errorNo = 0;
   4074     const UChar *sourceRules = NULL;
   4075     int32_t sourceRulesLen = 0;
   4076     UParseError parseError;
   4077     UColAttributeValue french = UCOL_OFF;
   4078 
   4079     if(!ucol_equals(source, target)) {
   4080         log_err("Same collators, different address not equal\n");
   4081         errorNo++;
   4082     }
   4083     ucol_close(target);
   4084     if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
   4085         target = ucol_safeClone(source, NULL, NULL, &status);
   4086         if(U_FAILURE(status)) {
   4087             log_err("Error creating clone\n");
   4088             errorNo++;
   4089             return errorNo;
   4090         }
   4091         if(!ucol_equals(source, target)) {
   4092             log_err("Collator different from it's clone\n");
   4093             errorNo++;
   4094         }
   4095         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
   4096         if(french == UCOL_ON) {
   4097             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
   4098         } else {
   4099             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
   4100         }
   4101         if(U_FAILURE(status)) {
   4102             log_err("Error setting attributes\n");
   4103             errorNo++;
   4104             return errorNo;
   4105         }
   4106         if(ucol_equals(source, target)) {
   4107             log_err("Collators same even when options changed\n");
   4108             errorNo++;
   4109         }
   4110         ucol_close(target);
   4111 
   4112         sourceRules = ucol_getRules(source, &sourceRulesLen);
   4113         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   4114         if(U_FAILURE(status)) {
   4115             log_err("Error instantiating target from rules - %s\n", u_errorName(status));
   4116             errorNo++;
   4117             return errorNo;
   4118         }
   4119         if(!ucol_equals(source, target)) {
   4120             log_err("Collator different from collator that was created from the same rules\n");
   4121             errorNo++;
   4122         }
   4123         ucol_close(target);
   4124     }
   4125     return errorNo;
   4126 }
   4127 
   4128 
   4129 static void TestEquals(void) {
   4130     /* ucol_equals is not currently a public API. There is a chance that it will become
   4131     * something like this, but currently it is only used by RuleBasedCollator::operator==
   4132     */
   4133     /* test whether the two collators instantiated from the same locale are equal */
   4134     UErrorCode status = U_ZERO_ERROR;
   4135     UParseError parseError;
   4136     int32_t noOfLoc = uloc_countAvailable();
   4137     const char *locName = NULL;
   4138     UCollator *source = NULL, *target = NULL;
   4139     int32_t i = 0;
   4140 
   4141     const char* rules[] = {
   4142         "&l < lj <<< Lj <<< LJ",
   4143         "&n < nj <<< Nj <<< NJ",
   4144         "&ae <<< \\u00e4",
   4145         "&AE <<< \\u00c4"
   4146     };
   4147     /*
   4148     const char* badRules[] = {
   4149     "&l <<< Lj",
   4150     "&n < nj <<< nJ <<< NJ",
   4151     "&a <<< \\u00e4",
   4152     "&AE <<< \\u00c4 <<< x"
   4153     };
   4154     */
   4155 
   4156     UChar sourceRules[1024], targetRules[1024];
   4157     int32_t sourceRulesSize = 0, targetRulesSize = 0;
   4158     int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
   4159 
   4160     for(i = 0; i < rulesSize; i++) {
   4161         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
   4162         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
   4163     }
   4164 
   4165     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   4166     if(status == U_FILE_ACCESS_ERROR) {
   4167         log_data_err("Is your data around?\n");
   4168         return;
   4169     } else if(U_FAILURE(status)) {
   4170         log_err("Error opening collator\n");
   4171         return;
   4172     }
   4173     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   4174     if(!ucol_equals(source, target)) {
   4175         log_err("Equivalent collators not equal!\n");
   4176     }
   4177     ucol_close(source);
   4178     ucol_close(target);
   4179 
   4180     source = ucol_open("root", &status);
   4181     target = ucol_open("root", &status);
   4182     log_verbose("Testing root\n");
   4183     if(!ucol_equals(source, source)) {
   4184         log_err("Same collator not equal\n");
   4185     }
   4186     if(TestEqualsForCollator(locName, source, target)) {
   4187         log_err("Errors for root\n", locName);
   4188     }
   4189     ucol_close(source);
   4190 
   4191     for(i = 0; i<noOfLoc; i++) {
   4192         status = U_ZERO_ERROR;
   4193         locName = uloc_getAvailable(i);
   4194         /*if(hasCollationElements(locName)) {*/
   4195         log_verbose("Testing equality for locale %s\n", locName);
   4196         source = ucol_open(locName, &status);
   4197         target = ucol_open(locName, &status);
   4198         if (U_FAILURE(status)) {
   4199             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
   4200             continue;
   4201         }
   4202         if(TestEqualsForCollator(locName, source, target)) {
   4203             log_err("Errors for locale %s\n", locName);
   4204         }
   4205         ucol_close(source);
   4206         /*}*/
   4207     }
   4208 }
   4209 
   4210 static void TestJ2726(void) {
   4211     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
   4212     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
   4213     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
   4214     UErrorCode status = U_ZERO_ERROR;
   4215     UCollator *coll = ucol_open("en", &status);
   4216     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   4217     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   4218     doTest(coll, a, aSpace, UCOL_EQUAL);
   4219     doTest(coll, aSpace, a, UCOL_EQUAL);
   4220     doTest(coll, a, spaceA, UCOL_EQUAL);
   4221     doTest(coll, spaceA, a, UCOL_EQUAL);
   4222     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
   4223     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
   4224     ucol_close(coll);
   4225 }
   4226 
   4227 static void NullRule(void) {
   4228     UChar r[3] = {0};
   4229     UErrorCode status = U_ZERO_ERROR;
   4230     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   4231     if(U_SUCCESS(status)) {
   4232         log_err("This should have been an error!\n");
   4233         ucol_close(coll);
   4234     } else {
   4235         status = U_ZERO_ERROR;
   4236     }
   4237     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   4238     if(U_FAILURE(status)) {
   4239         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
   4240     } else {
   4241         ucol_close(coll);
   4242     }
   4243 }
   4244 
   4245 /**
   4246  * Test for CollationElementIterator previous and next for the whole set of
   4247  * unicode characters with normalization on.
   4248  */
   4249 static void TestNumericCollation(void)
   4250 {
   4251     UErrorCode status = U_ZERO_ERROR;
   4252 
   4253     const static char *basicTestStrings[]={
   4254     "hello1",
   4255     "hello2",
   4256     "hello2002",
   4257     "hello2003",
   4258     "hello123456",
   4259     "hello1234567",
   4260     "hello10000000",
   4261     "hello100000000",
   4262     "hello1000000000",
   4263     "hello10000000000",
   4264     };
   4265 
   4266     const static char *preZeroTestStrings[]={
   4267     "avery10000",
   4268     "avery010000",
   4269     "avery0010000",
   4270     "avery00010000",
   4271     "avery000010000",
   4272     "avery0000010000",
   4273     "avery00000010000",
   4274     "avery000000010000",
   4275     };
   4276 
   4277     const static char *thirtyTwoBitNumericStrings[]={
   4278     "avery42949672960",
   4279     "avery42949672961",
   4280     "avery42949672962",
   4281     "avery429496729610"
   4282     };
   4283 
   4284      const static char *longNumericStrings[]={
   4285      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
   4286         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
   4287         are treated as multiple collation elements. */
   4288     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
   4289     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
   4290     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
   4291     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
   4292     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
   4293     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
   4294     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
   4295     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
   4296     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
   4297     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
   4298     };
   4299 
   4300     const static char *supplementaryDigits[] = {
   4301       "\\uD835\\uDFCE", /* 0 */
   4302       "\\uD835\\uDFCF", /* 1 */
   4303       "\\uD835\\uDFD0", /* 2 */
   4304       "\\uD835\\uDFD1", /* 3 */
   4305       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
   4306       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
   4307       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
   4308       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
   4309       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
   4310       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
   4311     };
   4312 
   4313     const static char *foreignDigits[] = {
   4314       "\\u0661",
   4315         "\\u0662",
   4316         "\\u0663",
   4317       "\\u0661\\u0660",
   4318       "\\u0661\\u0662",
   4319       "\\u0661\\u0663",
   4320       "\\u0662\\u0660",
   4321       "\\u0662\\u0662",
   4322       "\\u0662\\u0663",
   4323       "\\u0663\\u0660",
   4324       "\\u0663\\u0662",
   4325       "\\u0663\\u0663"
   4326     };
   4327 
   4328     const static char *evenZeroes[] = {
   4329       "2000",
   4330       "2001",
   4331         "2002",
   4332         "2003"
   4333     };
   4334 
   4335     UColAttribute att = UCOL_NUMERIC_COLLATION;
   4336     UColAttributeValue val = UCOL_ON;
   4337 
   4338     /* Open our collator. */
   4339     UCollator* coll = ucol_open("root", &status);
   4340     if (U_FAILURE(status)){
   4341         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
   4342               myErrorName(status));
   4343         return;
   4344     }
   4345     genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
   4346     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
   4347     genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
   4348     genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
   4349     genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
   4350     genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
   4351 
   4352     /* Setting up our collator to do digits. */
   4353     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   4354     if (U_FAILURE(status)){
   4355         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
   4356               myErrorName(status));
   4357         return;
   4358     }
   4359 
   4360     /*
   4361        Testing that prepended zeroes still yield the correct collation behavior.
   4362        We expect that every element in our strings array will be equal.
   4363     */
   4364     genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
   4365 
   4366     ucol_close(coll);
   4367 }
   4368 
   4369 static void TestTibetanConformance(void)
   4370 {
   4371     const char* test[] = {
   4372         "\\u0FB2\\u0591\\u0F71\\u0061",
   4373         "\\u0FB2\\u0F71\\u0061"
   4374     };
   4375 
   4376     UErrorCode status = U_ZERO_ERROR;
   4377     UCollator *coll = ucol_open("", &status);
   4378     UChar source[100];
   4379     UChar target[100];
   4380     int result;
   4381     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   4382     if (U_SUCCESS(status)) {
   4383         u_unescape(test[0], source, 100);
   4384         u_unescape(test[1], target, 100);
   4385         doTest(coll, source, target, UCOL_EQUAL);
   4386         result = ucol_strcoll(coll, source, -1,   target, -1);
   4387         log_verbose("result %d\n", result);
   4388         if (UCOL_EQUAL != result) {
   4389             log_err("Tibetan comparison error\n");
   4390         }
   4391     }
   4392     ucol_close(coll);
   4393 
   4394     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
   4395 }
   4396 
   4397 static void TestPinyinProblem(void) {
   4398     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
   4399     genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
   4400 }
   4401 
   4402 #define TST_UCOL_MAX_INPUT 0x220001
   4403 #define topByte 0xFF000000;
   4404 #define bottomByte 0xFF;
   4405 #define fourBytes 0xFFFFFFFF;
   4406 
   4407 
   4408 static void showImplicit(UChar32 i) {
   4409     if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
   4410         log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
   4411     }
   4412 }
   4413 
   4414 static void TestImplicitGeneration(void) {
   4415     UErrorCode status = U_ZERO_ERROR;
   4416     UChar32 last = 0;
   4417     UChar32 current;
   4418     UChar32 i = 0, j = 0;
   4419     UChar32 roundtrip = 0;
   4420     UChar32 lastBottom = 0;
   4421     UChar32 currentBottom = 0;
   4422     UChar32 lastTop = 0;
   4423     UChar32 currentTop = 0;
   4424 
   4425     UCollator *coll = ucol_open("root", &status);
   4426     if(U_FAILURE(status)) {
   4427         log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   4428         return;
   4429     }
   4430 
   4431     uprv_uca_getRawFromImplicit(0xE20303E7);
   4432 
   4433     for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
   4434         current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
   4435 
   4436         /* check that it round-trips AND that all intervening ones are illegal*/
   4437         roundtrip = uprv_uca_getRawFromImplicit(current);
   4438         if (roundtrip != i) {
   4439             log_err("No roundtrip %08X\n", i);
   4440         }
   4441         if (last != 0) {
   4442             for (j = last + 1; j < current; ++j) {
   4443                 roundtrip = uprv_uca_getRawFromImplicit(j);
   4444                 /* raise an error if it *doesn't* find an error*/
   4445                 if (roundtrip != -1) {
   4446                     log_err("Fails to recognize illegal %08X\n", j);
   4447                 }
   4448             }
   4449         }
   4450         /* now do other consistency checks*/
   4451         lastBottom = last & bottomByte;
   4452         currentBottom = current & bottomByte;
   4453         lastTop = last & topByte;
   4454         currentTop = current & topByte;
   4455         (void)lastBottom;     /* Suppress set but not used warnings. */
   4456         (void)currentBottom;
   4457 
   4458         /* print out some values for spot-checking*/
   4459         if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
   4460             showImplicit(i-3);
   4461             showImplicit(i-2);
   4462             showImplicit(i-1);
   4463             showImplicit(i);
   4464             showImplicit(i+1);
   4465             showImplicit(i+2);
   4466         }
   4467         last = current;
   4468 
   4469         if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
   4470             log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
   4471         }
   4472     }
   4473     showImplicit(TST_UCOL_MAX_INPUT-2);
   4474     showImplicit(TST_UCOL_MAX_INPUT-1);
   4475     showImplicit(TST_UCOL_MAX_INPUT);
   4476     ucol_close(coll);
   4477 }
   4478 
   4479 /**
   4480  * Iterate through the given iterator, checking to see that all the strings
   4481  * in the expected array are present.
   4482  * @param expected array of strings we expect to see, or NULL
   4483  * @param expectedCount number of elements of expected, or 0
   4484  */
   4485 static int32_t checkUEnumeration(const char* msg,
   4486                                  UEnumeration* iter,
   4487                                  const char** expected,
   4488                                  int32_t expectedCount) {
   4489     UErrorCode ec = U_ZERO_ERROR;
   4490     int32_t i = 0, n, j, bit;
   4491     int32_t seenMask = 0;
   4492 
   4493     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
   4494     n = uenum_count(iter, &ec);
   4495     if (!assertSuccess("count", &ec)) return -1;
   4496     log_verbose("%s = [", msg);
   4497     for (;; ++i) {
   4498         const char* s = uenum_next(iter, NULL, &ec);
   4499         if (!assertSuccess("snext", &ec) || s == NULL) break;
   4500         if (i != 0) log_verbose(",");
   4501         log_verbose("%s", s);
   4502         /* check expected list */
   4503         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   4504             if ((seenMask&bit) == 0 &&
   4505                 uprv_strcmp(s, expected[j]) == 0) {
   4506                 seenMask |= bit;
   4507                 break;
   4508             }
   4509         }
   4510     }
   4511     log_verbose("] (%d)\n", i);
   4512     assertTrue("count verified", i==n);
   4513     /* did we see all expected strings? */
   4514     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   4515         if ((seenMask&bit)!=0) {
   4516             log_verbose("Ok: \"%s\" seen\n", expected[j]);
   4517         } else {
   4518             log_err("FAIL: \"%s\" not seen\n", expected[j]);
   4519         }
   4520     }
   4521     return n;
   4522 }
   4523 
   4524 /**
   4525  * Test new API added for separate collation tree.
   4526  */
   4527 static void TestSeparateTrees(void) {
   4528     UErrorCode ec = U_ZERO_ERROR;
   4529     UEnumeration *e = NULL;
   4530     int32_t n = -1;
   4531     UBool isAvailable;
   4532     char loc[256];
   4533 
   4534     static const char* AVAIL[] = { "en", "de" };
   4535 
   4536     static const char* KW[] = { "collation" };
   4537 
   4538     static const char* KWVAL[] = { "phonebook", "stroke" };
   4539 
   4540 #if !UCONFIG_NO_SERVICE
   4541     e = ucol_openAvailableLocales(&ec);
   4542     if (e != NULL) {
   4543         assertSuccess("ucol_openAvailableLocales", &ec);
   4544         assertTrue("ucol_openAvailableLocales!=0", e!=0);
   4545         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
   4546         (void)n;    /* Suppress set but not used warnings. */
   4547         /* Don't need to check n because we check list */
   4548         uenum_close(e);
   4549     } else {
   4550         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
   4551     }
   4552 #endif
   4553 
   4554     e = ucol_getKeywords(&ec);
   4555     if (e != NULL) {
   4556         assertSuccess("ucol_getKeywords", &ec);
   4557         assertTrue("ucol_getKeywords!=0", e!=0);
   4558         n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
   4559         /* Don't need to check n because we check list */
   4560         uenum_close(e);
   4561     } else {
   4562         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
   4563     }
   4564 
   4565     e = ucol_getKeywordValues(KW[0], &ec);
   4566     if (e != NULL) {
   4567         assertSuccess("ucol_getKeywordValues", &ec);
   4568         assertTrue("ucol_getKeywordValues!=0", e!=0);
   4569         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
   4570         /* Don't need to check n because we check list */
   4571         uenum_close(e);
   4572     } else {
   4573         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
   4574     }
   4575 
   4576     /* Try setting a warning before calling ucol_getKeywordValues */
   4577     ec = U_USING_FALLBACK_WARNING;
   4578     e = ucol_getKeywordValues(KW[0], &ec);
   4579     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
   4580         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
   4581         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
   4582         /* Don't need to check n because we check list */
   4583         uenum_close(e);
   4584     }
   4585 
   4586     /*
   4587 U_DRAFT int32_t U_EXPORT2
   4588 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
   4589                              const char* locale, UBool* isAvailable,
   4590                              UErrorCode* status);
   4591 }
   4592 */
   4593     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
   4594                                      &isAvailable, &ec);
   4595     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   4596         assertEquals("getFunctionalEquivalent(de)", "root", loc);
   4597         assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
   4598                    isAvailable == TRUE);
   4599     }
   4600 
   4601     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
   4602                                      &isAvailable, &ec);
   4603     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   4604         assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
   4605         assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
   4606                    isAvailable == TRUE);
   4607     }
   4608 }
   4609 
   4610 /* supercedes TestJ784 */
   4611 static void TestBeforePinyin(void) {
   4612     const static char rules[] = {
   4613         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
   4614         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
   4615         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
   4616         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
   4617         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
   4618         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
   4619     };
   4620 
   4621     const static char *test[] = {
   4622         "l\\u0101",
   4623         "la",
   4624         "l\\u0101n",
   4625         "lan ",
   4626         "l\\u0113",
   4627         "le",
   4628         "l\\u0113n",
   4629         "len"
   4630     };
   4631 
   4632     const static char *test2[] = {
   4633         "x\\u0101",
   4634         "x\\u0100",
   4635         "X\\u0101",
   4636         "X\\u0100",
   4637         "x\\u00E1",
   4638         "x\\u00C1",
   4639         "X\\u00E1",
   4640         "X\\u00C1",
   4641         "x\\u01CE",
   4642         "x\\u01CD",
   4643         "X\\u01CE",
   4644         "X\\u01CD",
   4645         "x\\u00E0",
   4646         "x\\u00C0",
   4647         "X\\u00E0",
   4648         "X\\u00C0",
   4649         "xa",
   4650         "xA",
   4651         "Xa",
   4652         "XA",
   4653         "x\\u0101x",
   4654         "x\\u0100x",
   4655         "x\\u00E1x",
   4656         "x\\u00C1x",
   4657         "x\\u01CEx",
   4658         "x\\u01CDx",
   4659         "x\\u00E0x",
   4660         "x\\u00C0x",
   4661         "xax",
   4662         "xAx"
   4663     };
   4664 
   4665     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
   4666     genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
   4667     genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
   4668     genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
   4669 }
   4670 
   4671 static void TestBeforeTightening(void) {
   4672     static const struct {
   4673         const char *rules;
   4674         UErrorCode expectedStatus;
   4675     } tests[] = {
   4676         { "&[before 1]a<x", U_ZERO_ERROR },
   4677         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
   4678         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
   4679         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
   4680         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
   4681         { "&[before 2]a<<x",U_ZERO_ERROR },
   4682         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
   4683         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
   4684         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
   4685         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
   4686         { "&[before 3]a<<<x",U_ZERO_ERROR },
   4687         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
   4688         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
   4689     };
   4690 
   4691     int32_t i = 0;
   4692 
   4693     UErrorCode status = U_ZERO_ERROR;
   4694     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   4695     uint32_t rlen = 0;
   4696 
   4697     UCollator *coll = NULL;
   4698 
   4699 
   4700     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   4701         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
   4702         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   4703         if(status != tests[i].expectedStatus) {
   4704             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
   4705                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
   4706         }
   4707         ucol_close(coll);
   4708         status = U_ZERO_ERROR;
   4709     }
   4710 
   4711 }
   4712 
   4713 /*
   4714 &m < a
   4715 &[before 1] a < x <<< X << q <<< Q < z
   4716 assert: m <<< M < x <<< X << q <<< Q < z < a < n
   4717 
   4718 &m < a
   4719 &[before 2] a << x <<< X << q <<< Q < z
   4720 assert: m <<< M < x <<< X << q <<< Q << a < z < n
   4721 
   4722 &m < a
   4723 &[before 3] a <<< x <<< X << q <<< Q < z
   4724 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
   4725 
   4726 
   4727 &m << a
   4728 &[before 1] a < x <<< X << q <<< Q < z
   4729 assert: x <<< X << q <<< Q < z < m <<< M << a < n
   4730 
   4731 &m << a
   4732 &[before 2] a << x <<< X << q <<< Q < z
   4733 assert: m <<< M << x <<< X << q <<< Q << a < z < n
   4734 
   4735 &m << a
   4736 &[before 3] a <<< x <<< X << q <<< Q < z
   4737 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
   4738 
   4739 
   4740 &m <<< a
   4741 &[before 1] a < x <<< X << q <<< Q < z
   4742 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
   4743 
   4744 &m <<< a
   4745 &[before 2] a << x <<< X << q <<< Q < z
   4746 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
   4747 
   4748 &m <<< a
   4749 &[before 3] a <<< x <<< X << q <<< Q < z
   4750 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
   4751 
   4752 
   4753 &[before 1] s < x <<< X << q <<< Q < z
   4754 assert: r <<< R < x <<< X << q <<< Q < z < s < n
   4755 
   4756 &[before 2] s << x <<< X << q <<< Q < z
   4757 assert: r <<< R < x <<< X << q <<< Q << s < z < n
   4758 
   4759 &[before 3] s <<< x <<< X << q <<< Q < z
   4760 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
   4761 
   4762 
   4763 &[before 1] \u24DC < x <<< X << q <<< Q < z
   4764 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
   4765 
   4766 &[before 2] \u24DC << x <<< X << q <<< Q < z
   4767 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
   4768 
   4769 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
   4770 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
   4771 */
   4772 
   4773 
   4774 #if 0
   4775 /* requires features not yet supported */
   4776 static void TestMoreBefore(void) {
   4777     static const struct {
   4778         const char* rules;
   4779         const char* order[16];
   4780         int32_t size;
   4781     } tests[] = {
   4782         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
   4783         { "m","M","x","X","q","Q","z","a","n" }, 9},
   4784         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
   4785         { "m","M","x","X","q","Q","a","z","n" }, 9},
   4786         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
   4787         { "m","M","x","X","a","q","Q","z","n" }, 9},
   4788         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
   4789         { "x","X","q","Q","z","m","M","a","n" }, 9},
   4790         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
   4791         { "m","M","x","X","q","Q","a","z","n" }, 9},
   4792         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
   4793         { "m","M","x","X","a","q","Q","z","n" }, 9},
   4794         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
   4795         { "x","X","q","Q","z","n","m","a","M" }, 9},
   4796         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
   4797         { "x","X","q","Q","m","a","M","z","n" }, 9},
   4798         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
   4799         { "m","x","X","a","M","q","Q","z","n" }, 9},
   4800         { "&[before 1] s < x <<< X << q <<< Q < z",
   4801         { "r","R","x","X","q","Q","z","s","n" }, 9},
   4802         { "&[before 2] s << x <<< X << q <<< Q < z",
   4803         { "r","R","x","X","q","Q","s","z","n" }, 9},
   4804         { "&[before 3] s <<< x <<< X << q <<< Q < z",
   4805         { "r","R","x","X","s","q","Q","z","n" }, 9},
   4806         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
   4807         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
   4808         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
   4809         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
   4810         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
   4811         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
   4812     };
   4813 
   4814     int32_t i = 0;
   4815 
   4816     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   4817         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
   4818     }
   4819 }
   4820 #endif
   4821 
   4822 static void TestTailorNULL( void ) {
   4823     const static char* rule = "&a <<< '\\u0000'";
   4824     UErrorCode status = U_ZERO_ERROR;
   4825     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   4826     uint32_t rlen = 0;
   4827     UChar a = 1, null = 0;
   4828     UCollationResult res = UCOL_EQUAL;
   4829 
   4830     UCollator *coll = NULL;
   4831 
   4832 
   4833     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
   4834     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   4835 
   4836     if(U_FAILURE(status)) {
   4837         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
   4838     } else {
   4839         res = ucol_strcoll(coll, &a, 1, &null, 1);
   4840 
   4841         if(res != UCOL_LESS) {
   4842             log_err("NULL was not tailored properly!\n");
   4843         }
   4844     }
   4845 
   4846     ucol_close(coll);
   4847 }
   4848 
   4849 static void
   4850 TestUpperFirstQuaternary(void)
   4851 {
   4852   const char* tests[] = { "B", "b", "Bb", "bB" };
   4853   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
   4854   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
   4855   genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
   4856 }
   4857 
   4858 static void
   4859 TestJ4960(void)
   4860 {
   4861   const char* tests[] = { "\\u00e2T", "aT" };
   4862   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
   4863   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
   4864   const char* tests2[] = { "a", "A" };
   4865   const char* rule = "&[first tertiary ignorable]=A=a";
   4866   UColAttribute att2[] = { UCOL_CASE_LEVEL };
   4867   UColAttributeValue attVals2[] = { UCOL_ON };
   4868   /* Test whether we correctly ignore primary ignorables on case level when */
   4869   /* we have only primary & case level */
   4870   genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
   4871   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
   4872   /* and case level */
   4873   genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
   4874   /* Test whether completely ignorable letters have case level info (they shouldn't) */
   4875   genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
   4876 }
   4877 
   4878 static void
   4879 TestJ5223(void)
   4880 {
   4881   static const char *test = "this is a test string";
   4882   UChar ustr[256];
   4883   int32_t ustr_length = u_unescape(test, ustr, 256);
   4884   unsigned char sortkey[256];
   4885   int32_t sortkey_length;
   4886   UErrorCode status = U_ZERO_ERROR;
   4887   static UCollator *coll = NULL;
   4888   coll = ucol_open("root", &status);
   4889   if(U_FAILURE(status)) {
   4890     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   4891     return;
   4892   }
   4893   ucol_setStrength(coll, UCOL_PRIMARY);
   4894   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   4895   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   4896   if (U_FAILURE(status)) {
   4897     log_err("Failed setting atributes\n");
   4898     return;
   4899   }
   4900   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
   4901   if (sortkey_length > 256) return;
   4902 
   4903   /* we mark the position where the null byte should be written in advance */
   4904   sortkey[sortkey_length-1] = 0xAA;
   4905 
   4906   /* we set the buffer size one byte higher than needed */
   4907   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
   4908     sortkey_length+1);
   4909 
   4910   /* no error occurs (for me) */
   4911   if (sortkey[sortkey_length-1] == 0xAA) {
   4912     log_err("Hit bug at first try\n");
   4913   }
   4914 
   4915   /* we mark the position where the null byte should be written again */
   4916   sortkey[sortkey_length-1] = 0xAA;
   4917 
   4918   /* this time we set the buffer size to the exact amount needed */
   4919   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
   4920     sortkey_length);
   4921 
   4922   /* now the trailing null byte is not written */
   4923   if (sortkey[sortkey_length-1] == 0xAA) {
   4924     log_err("Hit bug at second try\n");
   4925   }
   4926 
   4927   ucol_close(coll);
   4928 }
   4929 
   4930 /* Regression test for Thai partial sort key problem */
   4931 static void
   4932 TestJ5232(void)
   4933 {
   4934     const static char *test[] = {
   4935         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
   4936         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
   4937     };
   4938 
   4939     genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
   4940 }
   4941 
   4942 static void
   4943 TestJ5367(void)
   4944 {
   4945     const static char *test[] = { "a", "y" };
   4946     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
   4947     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
   4948 }
   4949 
   4950 static void
   4951 TestVI5913(void)
   4952 {
   4953     UErrorCode status = U_ZERO_ERROR;
   4954     int32_t i, j;
   4955     UCollator *coll =NULL;
   4956     uint8_t  resColl[100], expColl[100];
   4957     int32_t  rLen, tLen, ruleLen, sLen, kLen;
   4958     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &a<0x1FF3-omega with Ypogegrammeni*/
   4959     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
   4960     UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0};  /* &z<a+e with circumflex.*/
   4961     static const UChar tData[][20]={
   4962         {0x1EAC, 0},
   4963         {0x0041, 0x0323, 0x0302, 0},
   4964         {0x1EA0, 0x0302, 0},
   4965         {0x00C2, 0x0323, 0},
   4966         {0x1ED8, 0},  /* O with dot and circumflex */
   4967         {0x1ECC, 0x0302, 0},
   4968         {0x1EB7, 0},
   4969         {0x1EA1, 0x0306, 0},
   4970     };
   4971     static const UChar tailorData[][20]={
   4972         {0x1FA2, 0},  /* Omega with 3 combining marks */
   4973         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
   4974         {0x1FF3, 0x0313, 0x0300, 0},
   4975         {0x1F60, 0x0300, 0x0345, 0},
   4976         {0x1F62, 0x0345, 0},
   4977         {0x1FA0, 0x0300, 0},
   4978     };
   4979     static const UChar tailorData2[][20]={
   4980         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
   4981         {0x0073, 0x0323, 0x030C, 0},
   4982         {0x0073, 0x030C, 0x0323, 0},
   4983     };
   4984     static const UChar tailorData3[][20]={
   4985         {0x007a, 0},  /*  z */
   4986         {0x0061, 0x0065, 0},  /*  a + e */
   4987         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
   4988         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
   4989         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
   4990         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
   4991         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
   4992         {0x00EA, 0},  /* e with circumflex  */
   4993     };
   4994 
   4995     /* Test Vietnamese sort. */
   4996     coll = ucol_open("vi", &status);
   4997     if(U_FAILURE(status)) {
   4998         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
   4999         return;
   5000     }
   5001     log_verbose("\n\nVI collation:");
   5002     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
   5003         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
   5004     }
   5005     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
   5006         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
   5007     }
   5008     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
   5009         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
   5010     }
   5011     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
   5012         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
   5013     }
   5014 
   5015     for (j=0; j<8; j++) {
   5016         tLen = u_strlen(tData[j]);
   5017         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
   5018         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
   5019         for(i = 0; i<rLen; i++) {
   5020             log_verbose(" %02X", resColl[i]);
   5021         }
   5022     }
   5023 
   5024     ucol_close(coll);
   5025 
   5026     /* Test Romanian sort. */
   5027     coll = ucol_open("ro", &status);
   5028     log_verbose("\n\nRO collation:");
   5029     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
   5030         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
   5031     }
   5032     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
   5033         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
   5034     }
   5035     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
   5036         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
   5037     }
   5038 
   5039     for (j=4; j<8; j++) {
   5040         tLen = u_strlen(tData[j]);
   5041         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
   5042         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
   5043         for(i = 0; i<rLen; i++) {
   5044             log_verbose(" %02X", resColl[i]);
   5045         }
   5046     }
   5047     ucol_close(coll);
   5048 
   5049     /* Test the precomposed Greek character with 3 combining marks. */
   5050     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
   5051     ruleLen = u_strlen(rule);
   5052     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5053     if (U_FAILURE(status)) {
   5054         log_err("ucol_openRules failed with %s\n", u_errorName(status));
   5055         return;
   5056     }
   5057     sLen = u_strlen(tailorData[0]);
   5058     for (j=1; j<6; j++) {
   5059         tLen = u_strlen(tailorData[j]);
   5060         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
   5061             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
   5062         }
   5063     }
   5064     /* Test getSortKey. */
   5065     tLen = u_strlen(tailorData[0]);
   5066     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
   5067     for (j=0; j<6; j++) {
   5068         tLen = u_strlen(tailorData[j]);
   5069         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
   5070         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   5071             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5072             for(i = 0; i<rLen; i++) {
   5073                 log_err(" %02X", resColl[i]);
   5074             }
   5075         }
   5076     }
   5077     ucol_close(coll);
   5078 
   5079     log_verbose("\n\nTailoring test for s with caron:");
   5080     ruleLen = u_strlen(rule2);
   5081     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5082     tLen = u_strlen(tailorData2[0]);
   5083     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
   5084     for (j=1; j<3; j++) {
   5085         tLen = u_strlen(tailorData2[j]);
   5086         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
   5087         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   5088             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5089             for(i = 0; i<rLen; i++) {
   5090                 log_err(" %02X", resColl[i]);
   5091             }
   5092         }
   5093     }
   5094     ucol_close(coll);
   5095 
   5096     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
   5097     ruleLen = u_strlen(rule3);
   5098     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5099     tLen = u_strlen(tailorData3[3]);
   5100     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
   5101     for (j=4; j<6; j++) {
   5102         tLen = u_strlen(tailorData3[j]);
   5103         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
   5104 
   5105         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   5106             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5107             for(i = 0; i<rLen; i++) {
   5108                 log_err(" %02X", resColl[i]);
   5109             }
   5110         }
   5111 
   5112         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5113          for(i = 0; i<rLen; i++) {
   5114              log_verbose(" %02X", resColl[i]);
   5115          }
   5116     }
   5117     ucol_close(coll);
   5118 }
   5119 
   5120 static void
   5121 TestTailor6179(void)
   5122 {
   5123     UErrorCode status = U_ZERO_ERROR;
   5124     int32_t i;
   5125     UCollator *coll =NULL;
   5126     uint8_t  resColl[100];
   5127     int32_t  rLen, tLen, ruleLen;
   5128     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
   5129     static const UChar rule1[]={
   5130             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
   5131             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
   5132             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
   5133             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
   5134     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
   5135     static const UChar rule2[]={
   5136             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
   5137             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
   5138             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
   5139             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
   5140             0x3C,0x3C,0x20,0x62,0};
   5141 
   5142     static const UChar tData1[][4]={
   5143         {0x61, 0},
   5144         {0x62, 0},
   5145         { 0xFDD0,0x009E, 0}
   5146     };
   5147     static const UChar tData2[][4]={
   5148         {0x61, 0},
   5149         {0x62, 0},
   5150         { 0xFDD0,0x009E, 0}
   5151      };
   5152 
   5153     /*
   5154      * These values from FractionalUCA.txt will change,
   5155      * and need to be updated here.
   5156      */
   5157     static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0};
   5158     static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0};
   5159     static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
   5160     static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
   5161 
   5162     /* Test [Last Primary ignorable] */
   5163 
   5164     log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
   5165     ruleLen = u_strlen(rule1);
   5166     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5167     if (U_FAILURE(status)) {
   5168         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
   5169         return;
   5170     }
   5171     tLen = u_strlen(tData1[0]);
   5172     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
   5173     if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
   5174         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
   5175         for(i = 0; i<rLen; i++) {
   5176             log_err(" %02X", resColl[i]);
   5177         }
   5178         log_err("\n");
   5179     }
   5180     tLen = u_strlen(tData1[1]);
   5181     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
   5182     if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
   5183         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
   5184         for(i = 0; i<rLen; i++) {
   5185             log_err(" %02X", resColl[i]);
   5186         }
   5187         log_err("\n");
   5188     }
   5189     ucol_close(coll);
   5190 
   5191 
   5192     /* Test [Last Secondary ignorable] */
   5193     log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
   5194     ruleLen = u_strlen(rule1);
   5195     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5196     if (U_FAILURE(status)) {
   5197         log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
   5198         return;
   5199     }
   5200     tLen = u_strlen(tData2[0]);
   5201     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
   5202     if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
   5203         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
   5204         for(i = 0; i<rLen; i++) {
   5205             log_err(" %02X", resColl[i]);
   5206         }
   5207         log_err("\n");
   5208     }
   5209     if(!log_knownIssue("8982", "debug and fix")) { /* TODO: debug & fix, see ticket #8982 */
   5210       tLen = u_strlen(tData2[1]);
   5211       rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
   5212       if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
   5213         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
   5214         for(i = 0; i<rLen; i++) {
   5215           log_err(" %02X", resColl[i]);
   5216         }
   5217         log_err("\n");
   5218       }
   5219     }
   5220     ucol_close(coll);
   5221 }
   5222 
   5223 static void
   5224 TestUCAPrecontext(void)
   5225 {
   5226     UErrorCode status = U_ZERO_ERROR;
   5227     int32_t i, j;
   5228     UCollator *coll =NULL;
   5229     uint8_t  resColl[100], prevColl[100];
   5230     int32_t  rLen, tLen, ruleLen;
   5231     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
   5232     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
   5233     /* & l middle-dot << a  a is an expansion. */
   5234 
   5235     UChar tData1[][20]={
   5236             { 0xb7, 0},  /* standalone middle dot(0xb7) */
   5237             { 0x387, 0}, /* standalone middle dot(0x387) */
   5238             { 0x61, 0},  /* a */
   5239             { 0x6C, 0},  /* l */
   5240             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
   5241             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
   5242             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
   5243             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
   5244             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
   5245             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
   5246             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
   5247      };
   5248 
   5249     log_verbose("\n\nEN collation:");
   5250     coll = ucol_open("en", &status);
   5251     if (U_FAILURE(status)) {
   5252         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
   5253         return;
   5254     }
   5255     for (j=0; j<11; j++) {
   5256         tLen = u_strlen(tData1[j]);
   5257         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5258         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5259             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5260                     j, tData1[j]);
   5261         }
   5262         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5263         for(i = 0; i<rLen; i++) {
   5264             log_verbose(" %02X", resColl[i]);
   5265         }
   5266         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5267      }
   5268      ucol_close(coll);
   5269 
   5270 
   5271      log_verbose("\n\nJA collation:");
   5272      coll = ucol_open("ja", &status);
   5273      if (U_FAILURE(status)) {
   5274          log_err("Tailoring test: &z <<a|- failed!");
   5275          return;
   5276      }
   5277      for (j=0; j<11; j++) {
   5278          tLen = u_strlen(tData1[j]);
   5279          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5280          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5281              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5282                      j, tData1[j]);
   5283          }
   5284          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5285          for(i = 0; i<rLen; i++) {
   5286              log_verbose(" %02X", resColl[i]);
   5287          }
   5288          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5289       }
   5290       ucol_close(coll);
   5291 
   5292 
   5293       log_verbose("\n\nTailoring test: & middle dot < a ");
   5294       ruleLen = u_strlen(rule1);
   5295       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5296       if (U_FAILURE(status)) {
   5297           log_err("Tailoring test: & middle dot < a failed!");
   5298           return;
   5299       }
   5300       for (j=0; j<11; j++) {
   5301           tLen = u_strlen(tData1[j]);
   5302           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5303           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5304               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5305                       j, tData1[j]);
   5306           }
   5307           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5308           for(i = 0; i<rLen; i++) {
   5309               log_verbose(" %02X", resColl[i]);
   5310           }
   5311           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5312        }
   5313        ucol_close(coll);
   5314 
   5315 
   5316        log_verbose("\n\nTailoring test: & l middle-dot << a ");
   5317        ruleLen = u_strlen(rule2);
   5318        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5319        if (U_FAILURE(status)) {
   5320            log_err("Tailoring test: & l middle-dot << a failed!");
   5321            return;
   5322        }
   5323        for (j=0; j<11; j++) {
   5324            tLen = u_strlen(tData1[j]);
   5325            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5326            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5327                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5328                        j, tData1[j]);
   5329            }
   5330            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
   5331                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
   5332                        j, tData1[j]);
   5333            }
   5334            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5335            for(i = 0; i<rLen; i++) {
   5336                log_verbose(" %02X", resColl[i]);
   5337            }
   5338            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5339         }
   5340         ucol_close(coll);
   5341 }
   5342 
   5343 static void
   5344 TestOutOfBuffer5468(void)
   5345 {
   5346     static const char *test = "\\u4e00";
   5347     UChar ustr[256];
   5348     int32_t ustr_length = u_unescape(test, ustr, 256);
   5349     unsigned char shortKeyBuf[1];
   5350     int32_t sortkey_length;
   5351     UErrorCode status = U_ZERO_ERROR;
   5352     static UCollator *coll = NULL;
   5353 
   5354     coll = ucol_open("root", &status);
   5355     if(U_FAILURE(status)) {
   5356       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   5357       return;
   5358     }
   5359     ucol_setStrength(coll, UCOL_PRIMARY);
   5360     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   5361     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   5362     if (U_FAILURE(status)) {
   5363       log_err("Failed setting atributes\n");
   5364       return;
   5365     }
   5366 
   5367     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
   5368     if (sortkey_length != 4) {
   5369         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
   5370     }
   5371     log_verbose("length of sortKey is %d", sortkey_length);
   5372     ucol_close(coll);
   5373 }
   5374 
   5375 #define TSKC_DATA_SIZE 5
   5376 #define TSKC_BUF_SIZE  50
   5377 static void
   5378 TestSortKeyConsistency(void)
   5379 {
   5380     UErrorCode icuRC = U_ZERO_ERROR;
   5381     UCollator* ucol;
   5382     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
   5383 
   5384     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
   5385     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
   5386     int32_t i, j, i2;
   5387 
   5388     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
   5389     if (U_FAILURE(icuRC))
   5390     {
   5391         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
   5392         return;
   5393     }
   5394 
   5395     for (i = 0; i < TSKC_DATA_SIZE; i++)
   5396     {
   5397         UCharIterator uiter;
   5398         uint32_t state[2] = { 0, 0 };
   5399         int32_t dataLen = i+1;
   5400         for (j=0; j<TSKC_BUF_SIZE; j++)
   5401             bufFull[i][j] = bufPart[i][j] = 0;
   5402 
   5403         /* Full sort key */
   5404         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
   5405 
   5406         /* Partial sort key */
   5407         uiter_setString(&uiter, data, dataLen);
   5408         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
   5409         if (U_FAILURE(icuRC))
   5410         {
   5411             log_err("ucol_nextSortKeyPart failed\n");
   5412             ucol_close(ucol);
   5413             return;
   5414         }
   5415 
   5416         for (i2=0; i2<i; i2++)
   5417         {
   5418             UBool fullMatch = TRUE;
   5419             UBool partMatch = TRUE;
   5420             for (j=0; j<TSKC_BUF_SIZE; j++)
   5421             {
   5422                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
   5423                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
   5424             }
   5425             if (fullMatch != partMatch) {
   5426                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
   5427                                   : "partial key was consistent, but full key changed\n");
   5428                 ucol_close(ucol);
   5429                 return;
   5430             }
   5431         }
   5432     }
   5433 
   5434     /*=============================================*/
   5435    ucol_close(ucol);
   5436 }
   5437 
   5438 /* ticket: 6101 */
   5439 static void TestCroatianSortKey(void) {
   5440     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
   5441     UErrorCode status = U_ZERO_ERROR;
   5442     UCollator *ucol;
   5443     UCharIterator iter;
   5444 
   5445     static const UChar text[] = { 0x0044, 0xD81A };
   5446 
   5447     size_t length = sizeof(text)/sizeof(*text);
   5448 
   5449     uint8_t textSortKey[32];
   5450     size_t lenSortKey = 32;
   5451     size_t actualSortKeyLen;
   5452     uint32_t uStateInfo[2] = { 0, 0 };
   5453 
   5454     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
   5455     if (U_FAILURE(status)) {
   5456         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
   5457         return;
   5458     }
   5459 
   5460     uiter_setString(&iter, text, length);
   5461 
   5462     actualSortKeyLen = ucol_nextSortKeyPart(
   5463         ucol, &iter, (uint32_t*)uStateInfo,
   5464         textSortKey, lenSortKey, &status
   5465         );
   5466 
   5467     if (actualSortKeyLen == lenSortKey) {
   5468         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
   5469     }
   5470 
   5471     ucol_close(ucol);
   5472 }
   5473 
   5474 /* ticket: 6140 */
   5475 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
   5476  * they are both Hiragana and Katakana
   5477  */
   5478 #define SORTKEYLEN 50
   5479 static void TestHiragana(void) {
   5480     UErrorCode status = U_ZERO_ERROR;
   5481     UCollator* ucol;
   5482     UCollationResult strcollresult;
   5483     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
   5484     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
   5485     int32_t data1Len = sizeof(data1)/sizeof(*data1);
   5486     int32_t data2Len = sizeof(data2)/sizeof(*data2);
   5487     int32_t i, j;
   5488     uint8_t sortKey1[SORTKEYLEN];
   5489     uint8_t sortKey2[SORTKEYLEN];
   5490 
   5491     UCharIterator uiter1;
   5492     UCharIterator uiter2;
   5493     uint32_t state1[2] = { 0, 0 };
   5494     uint32_t state2[2] = { 0, 0 };
   5495     int32_t keySize1;
   5496     int32_t keySize2;
   5497 
   5498     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
   5499             &status);
   5500     if (U_FAILURE(status)) {
   5501         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
   5502         return;
   5503     }
   5504 
   5505     /* Start of full sort keys */
   5506     /* Full sort key1 */
   5507     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
   5508     /* Full sort key2 */
   5509     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
   5510     if (keySize1 == keySize2) {
   5511         for (i = 0; i < keySize1; i++) {
   5512             if (sortKey1[i] != sortKey2[i]) {
   5513                 log_err("Full sort keys are different. Should be equal.");
   5514             }
   5515         }
   5516     } else {
   5517         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
   5518     }
   5519     /* End of full sort keys */
   5520 
   5521     /* Start of partial sort keys */
   5522     /* Partial sort key1 */
   5523     uiter_setString(&uiter1, data1, data1Len);
   5524     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
   5525     /* Partial sort key2 */
   5526     uiter_setString(&uiter2, data2, data2Len);
   5527     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
   5528     if (U_SUCCESS(status) && keySize1 == keySize2) {
   5529         for (j = 0; j < keySize1; j++) {
   5530             if (sortKey1[j] != sortKey2[j]) {
   5531                 log_err("Partial sort keys are different. Should be equal");
   5532             }
   5533         }
   5534     } else {
   5535         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
   5536     }
   5537     /* End of partial sort keys */
   5538 
   5539     /* Start of strcoll */
   5540     /* Use ucol_strcoll() to determine ordering */
   5541     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
   5542     if (strcollresult != UCOL_EQUAL) {
   5543         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
   5544     }
   5545 
   5546     ucol_close(ucol);
   5547 }
   5548 
   5549 /* Convenient struct for running collation tests */
   5550 typedef struct {
   5551   const UChar source[MAX_TOKEN_LEN];  /* String on left */
   5552   const UChar target[MAX_TOKEN_LEN];  /* String on right */
   5553   UCollationResult result;            /* -1, 0 or +1, depending on collation */
   5554 } OneTestCase;
   5555 
   5556 /*
   5557  * Utility function to test one collation test case.
   5558  * @param testcases Array of test cases.
   5559  * @param n_testcases Size of the array testcases.
   5560  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
   5561  * @param n_rules Size of the array str_rules.
   5562  */
   5563 static void doTestOneTestCase(const OneTestCase testcases[],
   5564                               int n_testcases,
   5565                               const char* str_rules[],
   5566                               int n_rules)
   5567 {
   5568   int rule_no, testcase_no;
   5569   UChar rule[500];
   5570   int32_t length = 0;
   5571   UErrorCode status = U_ZERO_ERROR;
   5572   UParseError parse_error;
   5573   UCollator  *myCollation;
   5574 
   5575   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
   5576 
   5577     length = u_unescape(str_rules[rule_no], rule, 500);
   5578     if (length == 0) {
   5579         log_err("ERROR: The rule cannot be unescaped: %s\n");
   5580         return;
   5581     }
   5582     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
   5583     if(U_FAILURE(status)){
   5584         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5585         return;
   5586     }
   5587     log_verbose("Testing the <<* syntax\n");
   5588     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   5589     ucol_setStrength(myCollation, UCOL_TERTIARY);
   5590     for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
   5591       doTest(myCollation,
   5592              testcases[testcase_no].source,
   5593              testcases[testcase_no].target,
   5594              testcases[testcase_no].result
   5595              );
   5596     }
   5597     ucol_close(myCollation);
   5598   }
   5599 }
   5600 
   5601 const static OneTestCase rangeTestcases[] = {
   5602   { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
   5603   { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
   5604   { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
   5605 
   5606   { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
   5607   { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
   5608   { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
   5609   { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
   5610   { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
   5611 
   5612   { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
   5613   { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
   5614   { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
   5615   { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
   5616 
   5617   { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
   5618   { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
   5619   { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
   5620   { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
   5621   { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
   5622   { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
   5623   { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
   5624   { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
   5625 };
   5626 
   5627 static int nRangeTestcases = LEN(rangeTestcases);
   5628 
   5629 const static OneTestCase rangeTestcasesSupplemental[] = {
   5630   { {0xfffe},                            {0xffff},                          UCOL_LESS }, /* U+FFFE < U+FFFF */
   5631   { {0xffff},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFF < U+10000 */
   5632   { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
   5633   { {0xfffe},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+FFFE < U+10001 */
   5634   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
   5635   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
   5636   { {0xfffe},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+FFFE < U+10001 */
   5637 };
   5638 
   5639 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
   5640 
   5641 const static OneTestCase rangeTestcasesQwerty[] = {
   5642   { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
   5643   { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
   5644 
   5645   { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
   5646   { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
   5647 
   5648   { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
   5649   { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
   5650 
   5651   { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
   5652   { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
   5653 
   5654   { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
   5655     {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
   5656   { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
   5657     {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
   5658 };
   5659 
   5660 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
   5661 
   5662 static void TestSameStrengthList(void)
   5663 {
   5664   const char* strRules[] = {
   5665     /* Normal */
   5666     "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
   5667 
   5668     /* Lists */
   5669     "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
   5670   };
   5671   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   5672 }
   5673 
   5674 static void TestSameStrengthListQuoted(void)
   5675 {
   5676   const char* strRules[] = {
   5677     /* Lists with quoted characters */
   5678     "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
   5679     "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
   5680 
   5681     "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
   5682     "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
   5683 
   5684     "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
   5685     "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
   5686   };
   5687   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   5688 }
   5689 
   5690 static void TestSameStrengthListSupplemental(void)
   5691 {
   5692   const char* strRules[] = {
   5693     "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
   5694     "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
   5695     "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
   5696     "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
   5697   };
   5698   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
   5699 }
   5700 
   5701 static void TestSameStrengthListQwerty(void)
   5702 {
   5703   const char* strRules[] = {
   5704     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
   5705     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
   5706     "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
   5707     "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
   5708     "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
   5709 
   5710     /* Quoted characters also will work if two quoted characters are not consecutive.  */
   5711     "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
   5712 
   5713     /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
   5714     /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
   5715 
   5716  };
   5717   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
   5718 }
   5719 
   5720 static void TestSameStrengthListQuotedQwerty(void)
   5721 {
   5722   const char* strRules[] = {
   5723     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
   5724     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
   5725     "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
   5726 
   5727     /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
   5728     /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
   5729    };
   5730   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
   5731 }
   5732 
   5733 static void TestSameStrengthListRanges(void)
   5734 {
   5735   const char* strRules[] = {
   5736     "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
   5737   };
   5738   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   5739 }
   5740 
   5741 static void TestSameStrengthListSupplementalRanges(void)
   5742 {
   5743   const char* strRules[] = {
   5744     "&\\ufffe<*\\uffff-\\U00010002",
   5745   };
   5746   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
   5747 }
   5748 
   5749 static void TestSpecialCharacters(void)
   5750 {
   5751   const char* strRules[] = {
   5752     /* Normal */
   5753     "&';'<'+'<','<'-'<'&'<'*'",
   5754 
   5755     /* List */
   5756     "&';'<*'+,-&*'",
   5757 
   5758     /* Range */
   5759     "&';'<*'+'-'-&*'",
   5760   };
   5761 
   5762   const static OneTestCase specialCharacterStrings[] = {
   5763     { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
   5764     { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
   5765     { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
   5766     { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
   5767   };
   5768   doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
   5769 }
   5770 
   5771 static void TestPrivateUseCharacters(void)
   5772 {
   5773   const char* strRules[] = {
   5774     /* Normal */
   5775     "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
   5776     "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
   5777   };
   5778 
   5779   const static OneTestCase privateUseCharacterStrings[] = {
   5780     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   5781     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   5782     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   5783     { {0xe2da}, {0xe2db}, UCOL_LESS },
   5784     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   5785     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   5786   };
   5787   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5788 }
   5789 
   5790 static void TestPrivateUseCharactersInList(void)
   5791 {
   5792   const char* strRules[] = {
   5793     /* List */
   5794     "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
   5795     /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
   5796     "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
   5797   };
   5798 
   5799   const static OneTestCase privateUseCharacterStrings[] = {
   5800     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   5801     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   5802     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   5803     { {0xe2da}, {0xe2db}, UCOL_LESS },
   5804     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   5805     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   5806   };
   5807   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5808 }
   5809 
   5810 static void TestPrivateUseCharactersInRange(void)
   5811 {
   5812   const char* strRules[] = {
   5813     /* Range */
   5814     "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
   5815     "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
   5816     /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
   5817   };
   5818 
   5819   const static OneTestCase privateUseCharacterStrings[] = {
   5820     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   5821     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   5822     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   5823     { {0xe2da}, {0xe2db}, UCOL_LESS },
   5824     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   5825     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   5826   };
   5827   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5828 }
   5829 
   5830 static void TestInvalidListsAndRanges(void)
   5831 {
   5832   const char* invalidRules[] = {
   5833     /* Range not in starred expression */
   5834     "&\\ufffe<\\uffff-\\U00010002",
   5835 
   5836     /* Range without start */
   5837     "&a<*-c",
   5838 
   5839     /* Range without end */
   5840     "&a<*b-",
   5841 
   5842     /* More than one hyphen */
   5843     "&a<*b-g-l",
   5844 
   5845     /* Range in the wrong order */
   5846     "&a<*k-b",
   5847 
   5848   };
   5849 
   5850   UChar rule[500];
   5851   UErrorCode status = U_ZERO_ERROR;
   5852   UParseError parse_error;
   5853   int n_rules = LEN(invalidRules);
   5854   int rule_no;
   5855   int length;
   5856   UCollator  *myCollation;
   5857 
   5858   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
   5859 
   5860     length = u_unescape(invalidRules[rule_no], rule, 500);
   5861     if (length == 0) {
   5862         log_err("ERROR: The rule cannot be unescaped: %s\n");
   5863         return;
   5864     }
   5865     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
   5866     (void)myCollation;      /* Suppress set but not used warning. */
   5867     if(!U_FAILURE(status)){
   5868       log_err("ERROR: Could not cause a failure as expected: \n");
   5869     }
   5870     status = U_ZERO_ERROR;
   5871   }
   5872 }
   5873 
   5874 /*
   5875  * This test ensures that characters placed before a character in a different script have the same lead byte
   5876  * in their collation key before and after script reordering.
   5877  */
   5878 static void TestBeforeRuleWithScriptReordering(void)
   5879 {
   5880     UParseError error;
   5881     UErrorCode status = U_ZERO_ERROR;
   5882     UCollator  *myCollation;
   5883     char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
   5884     UChar rules[500];
   5885     uint32_t rulesLength = 0;
   5886     int32_t reorderCodes[1] = {USCRIPT_GREEK};
   5887     UCollationResult collResult;
   5888 
   5889     uint8_t baseKey[256];
   5890     uint32_t baseKeyLength;
   5891     uint8_t beforeKey[256];
   5892     uint32_t beforeKeyLength;
   5893 
   5894     UChar base[] = { 0x03b1 }; /* base */
   5895     int32_t baseLen = sizeof(base)/sizeof(*base);
   5896 
   5897     UChar before[] = { 0x0e01 }; /* ko kai */
   5898     int32_t beforeLen = sizeof(before)/sizeof(*before);
   5899 
   5900     /*UChar *data[] = { before, base };
   5901     genericRulesStarter(srules, data, 2);*/
   5902 
   5903     log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
   5904 
   5905     (void)beforeKeyLength;   /* Suppress set but not used warnings. */
   5906     (void)baseKeyLength;
   5907 
   5908     /* build collator */
   5909     log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
   5910 
   5911     rulesLength = u_unescape(srules, rules, LEN(rules));
   5912     myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   5913     if(U_FAILURE(status)) {
   5914         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5915         return;
   5916     }
   5917 
   5918     /* check collation results - before rule applied but not script reordering */
   5919     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
   5920     if (collResult != UCOL_GREATER) {
   5921         log_err("Collation result not correct before script reordering = %d\n", collResult);
   5922     }
   5923 
   5924     /* check the lead byte of the collation keys before script reordering */
   5925     baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
   5926     beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
   5927     if (baseKey[0] != beforeKey[0]) {
   5928       log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
   5929    }
   5930 
   5931     /* reorder the scripts */
   5932     ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
   5933     if(U_FAILURE(status)) {
   5934         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
   5935         return;
   5936     }
   5937 
   5938     /* check collation results - before rule applied and after script reordering */
   5939     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
   5940     if (collResult != UCOL_GREATER) {
   5941         log_err("Collation result not correct after script reordering = %d\n", collResult);
   5942     }
   5943 
   5944     /* check the lead byte of the collation keys after script reordering */
   5945     ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
   5946     ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
   5947     if (baseKey[0] != beforeKey[0]) {
   5948         log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
   5949     }
   5950 
   5951     ucol_close(myCollation);
   5952 }
   5953 
   5954 /*
   5955  * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
   5956  */
   5957 static void TestNonLeadBytesDuringCollationReordering(void)
   5958 {
   5959     UErrorCode status = U_ZERO_ERROR;
   5960     UCollator  *myCollation;
   5961     int32_t reorderCodes[1] = {USCRIPT_GREEK};
   5962 
   5963     uint8_t baseKey[256];
   5964     uint32_t baseKeyLength;
   5965     uint8_t reorderKey[256];
   5966     uint32_t reorderKeyLength;
   5967 
   5968     UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
   5969 
   5970     uint32_t i;
   5971 
   5972 
   5973     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   5974 
   5975     /* build collator tertiary */
   5976     myCollation = ucol_open("", &status);
   5977     ucol_setStrength(myCollation, UCOL_TERTIARY);
   5978     if(U_FAILURE(status)) {
   5979         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5980         return;
   5981     }
   5982     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
   5983 
   5984     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   5985     if(U_FAILURE(status)) {
   5986         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   5987         return;
   5988     }
   5989     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
   5990 
   5991     if (baseKeyLength != reorderKeyLength) {
   5992         log_err("Key lengths not the same during reordering.\n");
   5993         return;
   5994     }
   5995 
   5996     for (i = 1; i < baseKeyLength; i++) {
   5997         if (baseKey[i] != reorderKey[i]) {
   5998             log_err("Collation key bytes not the same at position %d.\n", i);
   5999             return;
   6000         }
   6001     }
   6002     ucol_close(myCollation);
   6003 
   6004     /* build collator quaternary */
   6005     myCollation = ucol_open("", &status);
   6006     ucol_setStrength(myCollation, UCOL_QUATERNARY);
   6007     if(U_FAILURE(status)) {
   6008         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6009         return;
   6010     }
   6011     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
   6012 
   6013     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   6014     if(U_FAILURE(status)) {
   6015         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   6016         return;
   6017     }
   6018     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
   6019 
   6020     if (baseKeyLength != reorderKeyLength) {
   6021         log_err("Key lengths not the same during reordering.\n");
   6022         return;
   6023     }
   6024 
   6025     for (i = 1; i < baseKeyLength; i++) {
   6026         if (baseKey[i] != reorderKey[i]) {
   6027             log_err("Collation key bytes not the same at position %d.\n", i);
   6028             return;
   6029         }
   6030     }
   6031     ucol_close(myCollation);
   6032 }
   6033 
   6034 /*
   6035  * Test reordering API.
   6036  */
   6037 static void TestReorderingAPI(void)
   6038 {
   6039     UErrorCode status = U_ZERO_ERROR;
   6040     UCollator  *myCollation;
   6041     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   6042     int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS};
   6043     int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   6044     UCollationResult collResult;
   6045     int32_t retrievedReorderCodesLength;
   6046     int32_t retrievedReorderCodes[10];
   6047     UChar greekString[] = { 0x03b1 };
   6048     UChar punctuationString[] = { 0x203e };
   6049     int loopIndex;
   6050 
   6051     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   6052 
   6053     /* build collator tertiary */
   6054     myCollation = ucol_open("", &status);
   6055     ucol_setStrength(myCollation, UCOL_TERTIARY);
   6056     if(U_FAILURE(status)) {
   6057         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6058         return;
   6059     }
   6060 
   6061     /* set the reorderding */
   6062     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   6063     if (U_FAILURE(status)) {
   6064         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   6065         return;
   6066     }
   6067 
   6068     /* get the reordering */
   6069     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   6070     if (status != U_BUFFER_OVERFLOW_ERROR) {
   6071         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
   6072         return;
   6073     }
   6074     status = U_ZERO_ERROR;
   6075     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   6076         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   6077         return;
   6078     }
   6079     /* now let's really get it */
   6080     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   6081     if (U_FAILURE(status)) {
   6082         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   6083         return;
   6084     }
   6085     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   6086         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   6087         return;
   6088     }
   6089     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   6090         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   6091             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   6092             return;
   6093         }
   6094     }
   6095     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   6096     if (collResult != UCOL_LESS) {
   6097         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
   6098         return;
   6099     }
   6100 
   6101     /* clear the reordering */
   6102     ucol_setReorderCodes(myCollation, NULL, 0, &status);
   6103     if (U_FAILURE(status)) {
   6104         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
   6105         return;
   6106     }
   6107 
   6108     /* get the reordering again */
   6109     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   6110     if (retrievedReorderCodesLength != 0) {
   6111         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
   6112         return;
   6113     }
   6114 
   6115     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   6116     if (collResult != UCOL_GREATER) {
   6117         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   6118         return;
   6119     }
   6120 
   6121     /* test for error condition on duplicate reorder codes */
   6122     ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
   6123     if (!U_FAILURE(status)) {
   6124         log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
   6125         return;
   6126     }
   6127 
   6128     status = U_ZERO_ERROR;
   6129     /* test for reorder codes after a reset code */
   6130     ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status);
   6131     if (!U_FAILURE(status)) {
   6132         log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
   6133         return;
   6134     }
   6135 
   6136     ucol_close(myCollation);
   6137 }
   6138 
   6139 /*
   6140  * Test reordering API.
   6141  */
   6142 static void TestReorderingAPIWithRuleCreatedCollator(void)
   6143 {
   6144     UErrorCode status = U_ZERO_ERROR;
   6145     UCollator  *myCollation;
   6146     UChar rules[90];
   6147     static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
   6148     static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   6149     static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
   6150     UCollationResult collResult;
   6151     int32_t retrievedReorderCodesLength;
   6152     int32_t retrievedReorderCodes[10];
   6153     static const UChar greekString[] = { 0x03b1 };
   6154     static const UChar punctuationString[] = { 0x203e };
   6155     static const UChar hanString[] = { 0x65E5, 0x672C };
   6156     int loopIndex;
   6157 
   6158     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   6159 
   6160     /* build collator from rules */
   6161     u_uastrcpy(rules, "[reorder Hani Grek]");
   6162     myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
   6163     if(U_FAILURE(status)) {
   6164         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6165         return;
   6166     }
   6167 
   6168     /* get the reordering */
   6169     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   6170     if (U_FAILURE(status)) {
   6171         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   6172         return;
   6173     }
   6174     if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
   6175         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
   6176         return;
   6177     }
   6178     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   6179         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
   6180             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   6181             return;
   6182         }
   6183     }
   6184     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString));
   6185     if (collResult != UCOL_GREATER) {
   6186         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   6187         return;
   6188     }
   6189 
   6190     /* set the reordering */
   6191     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   6192     if (U_FAILURE(status)) {
   6193         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   6194         return;
   6195     }
   6196 
   6197     /* get the reordering */
   6198     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   6199     if (status != U_BUFFER_OVERFLOW_ERROR) {
   6200         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
   6201         return;
   6202     }
   6203     status = U_ZERO_ERROR;
   6204     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   6205         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   6206         return;
   6207     }
   6208     /* now let's really get it */
   6209     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   6210     if (U_FAILURE(status)) {
   6211         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   6212         return;
   6213     }
   6214     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   6215         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   6216         return;
   6217     }
   6218     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   6219         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   6220             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   6221             return;
   6222         }
   6223     }
   6224     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   6225     if (collResult != UCOL_LESS) {
   6226         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
   6227         return;
   6228     }
   6229 
   6230     /* clear the reordering */
   6231     ucol_setReorderCodes(myCollation, NULL, 0, &status);
   6232     if (U_FAILURE(status)) {
   6233         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
   6234         return;
   6235     }
   6236 
   6237     /* get the reordering again */
   6238     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   6239     if (retrievedReorderCodesLength != 0) {
   6240         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
   6241         return;
   6242     }
   6243 
   6244     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   6245     if (collResult != UCOL_GREATER) {
   6246         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   6247         return;
   6248     }
   6249 
   6250     /* reset the reordering */
   6251     ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
   6252     if (U_FAILURE(status)) {
   6253         log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
   6254         return;
   6255     }
   6256     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   6257     if (U_FAILURE(status)) {
   6258         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   6259         return;
   6260     }
   6261     if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
   6262         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
   6263         return;
   6264     }
   6265     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   6266         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
   6267             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   6268             return;
   6269         }
   6270     }
   6271 
   6272     ucol_close(myCollation);
   6273 }
   6274 
   6275 static int compareUScriptCodes(const void * a, const void * b)
   6276 {
   6277   return ( *(int32_t*)a - *(int32_t*)b );
   6278 }
   6279 
   6280 static void TestEquivalentReorderingScripts(void) {
   6281     UErrorCode status = U_ZERO_ERROR;
   6282     int32_t equivalentScripts[50];
   6283     int32_t equivalentScriptsLength;
   6284     int loopIndex;
   6285     int32_t equivalentScriptsResult[] = {
   6286         USCRIPT_BOPOMOFO,
   6287         USCRIPT_LISU,
   6288         USCRIPT_LYCIAN,
   6289         USCRIPT_CARIAN,
   6290         USCRIPT_LYDIAN,
   6291         USCRIPT_YI,
   6292         USCRIPT_OLD_ITALIC,
   6293         USCRIPT_GOTHIC,
   6294         USCRIPT_DESERET,
   6295         USCRIPT_SHAVIAN,
   6296         USCRIPT_OSMANYA,
   6297         USCRIPT_LINEAR_B,
   6298         USCRIPT_CYPRIOT,
   6299         USCRIPT_OLD_SOUTH_ARABIAN,
   6300         USCRIPT_AVESTAN,
   6301         USCRIPT_IMPERIAL_ARAMAIC,
   6302         USCRIPT_INSCRIPTIONAL_PARTHIAN,
   6303         USCRIPT_INSCRIPTIONAL_PAHLAVI,
   6304         USCRIPT_UGARITIC,
   6305         USCRIPT_OLD_PERSIAN,
   6306         USCRIPT_CUNEIFORM,
   6307         USCRIPT_EGYPTIAN_HIEROGLYPHS,
   6308         USCRIPT_PHONETIC_POLLARD,
   6309         USCRIPT_SORA_SOMPENG,
   6310         USCRIPT_MEROITIC_CURSIVE,
   6311         USCRIPT_MEROITIC_HIEROGLYPHS
   6312     };
   6313 
   6314     qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes);
   6315 
   6316     /* UScript.GOTHIC */
   6317     equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
   6318     if (U_FAILURE(status)) {
   6319         log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
   6320         return;
   6321     }
   6322     /*
   6323     fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
   6324     fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
   6325     for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
   6326         fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
   6327     }
   6328     */
   6329     if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
   6330         log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
   6331         return;
   6332     }
   6333     for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
   6334         if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
   6335             log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
   6336             return;
   6337         }
   6338     }
   6339 
   6340     /* UScript.SHAVIAN */
   6341     equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
   6342     if (U_FAILURE(status)) {
   6343         log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
   6344         return;
   6345     }
   6346     if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
   6347         log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
   6348         return;
   6349     }
   6350     for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
   6351         if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
   6352             log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
   6353             return;
   6354         }
   6355     }
   6356 }
   6357 
   6358 static void TestReorderingAcrossCloning(void)
   6359 {
   6360     UErrorCode status = U_ZERO_ERROR;
   6361     UCollator  *myCollation;
   6362     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   6363     UCollator *clonedCollation;
   6364     int32_t retrievedReorderCodesLength;
   6365     int32_t retrievedReorderCodes[10];
   6366     int loopIndex;
   6367 
   6368     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   6369 
   6370     /* build collator tertiary */
   6371     myCollation = ucol_open("", &status);
   6372     ucol_setStrength(myCollation, UCOL_TERTIARY);
   6373     if(U_FAILURE(status)) {
   6374         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6375         return;
   6376     }
   6377 
   6378     /* set the reorderding */
   6379     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   6380     if (U_FAILURE(status)) {
   6381         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   6382         return;
   6383     }
   6384 
   6385     /* clone the collator */
   6386     clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
   6387     if (U_FAILURE(status)) {
   6388         log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
   6389         return;
   6390     }
   6391 
   6392     /* get the reordering */
   6393     retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   6394     if (U_FAILURE(status)) {
   6395         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   6396         return;
   6397     }
   6398     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   6399         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   6400         return;
   6401     }
   6402     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   6403         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   6404             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   6405             return;
   6406         }
   6407     }
   6408 
   6409     /*uprv_free(buffer);*/
   6410     ucol_close(myCollation);
   6411     ucol_close(clonedCollation);
   6412 }
   6413 
   6414 /*
   6415  * Utility function to test one collation reordering test case set.
   6416  * @param testcases Array of test cases.
   6417  * @param n_testcases Size of the array testcases.
   6418  * @param reorderTokens Array of reordering codes.
   6419  * @param reorderTokensLen Size of the array reorderTokens.
   6420  */
   6421 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
   6422 {
   6423     uint32_t testCaseNum;
   6424     UErrorCode status = U_ZERO_ERROR;
   6425     UCollator  *myCollation;
   6426 
   6427     myCollation = ucol_open("", &status);
   6428     if (U_FAILURE(status)) {
   6429         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6430         return;
   6431     }
   6432     ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
   6433     if(U_FAILURE(status)) {
   6434         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
   6435         return;
   6436     }
   6437 
   6438     for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
   6439         doTest(myCollation,
   6440             testCases[testCaseNum].source,
   6441             testCases[testCaseNum].target,
   6442             testCases[testCaseNum].result
   6443         );
   6444     }
   6445     ucol_close(myCollation);
   6446 }
   6447 
   6448 static void TestGreekFirstReorder(void)
   6449 {
   6450     const char* strRules[] = {
   6451         "[reorder Grek]"
   6452     };
   6453 
   6454     const int32_t apiRules[] = {
   6455         USCRIPT_GREEK
   6456     };
   6457 
   6458     const static OneTestCase privateUseCharacterStrings[] = {
   6459         { {0x0391}, {0x0391}, UCOL_EQUAL },
   6460         { {0x0041}, {0x0391}, UCOL_GREATER },
   6461         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
   6462         { {0x0060}, {0x0391}, UCOL_LESS },
   6463         { {0x0391}, {0xe2dc}, UCOL_LESS },
   6464         { {0x0391}, {0x0060}, UCOL_GREATER },
   6465     };
   6466 
   6467     /* Test rules creation */
   6468     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6469 
   6470     /* Test collation reordering API */
   6471     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   6472 }
   6473 
   6474 static void TestGreekLastReorder(void)
   6475 {
   6476     const char* strRules[] = {
   6477         "[reorder Zzzz Grek]"
   6478     };
   6479 
   6480     const int32_t apiRules[] = {
   6481         USCRIPT_UNKNOWN, USCRIPT_GREEK
   6482     };
   6483 
   6484     const static OneTestCase privateUseCharacterStrings[] = {
   6485         { {0x0391}, {0x0391}, UCOL_EQUAL },
   6486         { {0x0041}, {0x0391}, UCOL_LESS },
   6487         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
   6488         { {0x0060}, {0x0391}, UCOL_LESS },
   6489         { {0x0391}, {0xe2dc}, UCOL_GREATER },
   6490     };
   6491 
   6492     /* Test rules creation */
   6493     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6494 
   6495     /* Test collation reordering API */
   6496     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   6497 }
   6498 
   6499 static void TestNonScriptReorder(void)
   6500 {
   6501     const char* strRules[] = {
   6502         "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
   6503     };
   6504 
   6505     const int32_t apiRules[] = {
   6506         USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
   6507         UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
   6508         UCOL_REORDER_CODE_CURRENCY
   6509     };
   6510 
   6511     const static OneTestCase privateUseCharacterStrings[] = {
   6512         { {0x0391}, {0x0041}, UCOL_LESS },
   6513         { {0x0041}, {0x0391}, UCOL_GREATER },
   6514         { {0x0060}, {0x0041}, UCOL_LESS },
   6515         { {0x0060}, {0x0391}, UCOL_GREATER },
   6516         { {0x0024}, {0x0041}, UCOL_GREATER },
   6517     };
   6518 
   6519     /* Test rules creation */
   6520     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6521 
   6522     /* Test collation reordering API */
   6523     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   6524 }
   6525 
   6526 static void TestHaniReorder(void)
   6527 {
   6528     const char* strRules[] = {
   6529         "[reorder Hani]"
   6530     };
   6531     const int32_t apiRules[] = {
   6532         USCRIPT_HAN
   6533     };
   6534 
   6535     const static OneTestCase privateUseCharacterStrings[] = {
   6536         { {0x4e00}, {0x0041}, UCOL_LESS },
   6537         { {0x4e00}, {0x0060}, UCOL_GREATER },
   6538         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
   6539         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
   6540         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
   6541         { {0xfa27}, {0x0041}, UCOL_LESS },
   6542         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
   6543     };
   6544 
   6545     /* Test rules creation */
   6546     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6547 
   6548     /* Test collation reordering API */
   6549     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   6550 }
   6551 
   6552 static void TestHaniReorderWithOtherRules(void)
   6553 {
   6554     const char* strRules[] = {
   6555         "[reorder Hani] &b<a"
   6556     };
   6557     /*const int32_t apiRules[] = {
   6558         USCRIPT_HAN
   6559     };*/
   6560 
   6561     const static OneTestCase privateUseCharacterStrings[] = {
   6562         { {0x4e00}, {0x0041}, UCOL_LESS },
   6563         { {0x4e00}, {0x0060}, UCOL_GREATER },
   6564         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
   6565         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
   6566         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
   6567         { {0xfa27}, {0x0041}, UCOL_LESS },
   6568         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
   6569         { {0x0062}, {0x0061}, UCOL_LESS },
   6570     };
   6571 
   6572     /* Test rules creation */
   6573     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6574 }
   6575 
   6576 static void TestMultipleReorder(void)
   6577 {
   6578     const char* strRules[] = {
   6579         "[reorder Grek Zzzz DIGIT Latn Hani]"
   6580     };
   6581 
   6582     const int32_t apiRules[] = {
   6583         USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
   6584     };
   6585 
   6586     const static OneTestCase collationTestCases[] = {
   6587         { {0x0391}, {0x0041}, UCOL_LESS},
   6588         { {0x0031}, {0x0041}, UCOL_LESS},
   6589         { {0x0041}, {0x4e00}, UCOL_LESS},
   6590     };
   6591 
   6592     /* Test rules creation */
   6593     doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
   6594 
   6595     /* Test collation reordering API */
   6596     doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
   6597 }
   6598 
   6599 /*
   6600  * Test that covers issue reported in ticket 8814
   6601  */
   6602 static void TestReorderWithNumericCollation(void)
   6603 {
   6604     UErrorCode status = U_ZERO_ERROR;
   6605     UCollator  *myCollation;
   6606     UCollator  *myReorderCollation;
   6607     int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
   6608     /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
   6609     UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
   6610     UChar fortyS[] = { 0x0053 };
   6611     UChar fortyThreeP[] = { 0x0050 };
   6612     uint8_t fortyS_sortKey[128];
   6613     int32_t fortyS_sortKey_Length;
   6614     uint8_t fortyThreeP_sortKey[128];
   6615     int32_t fortyThreeP_sortKey_Length;
   6616     uint8_t fortyS_sortKey_reorder[128];
   6617     int32_t fortyS_sortKey_reorder_Length;
   6618     uint8_t fortyThreeP_sortKey_reorder[128];
   6619     int32_t fortyThreeP_sortKey_reorder_Length;
   6620     UCollationResult collResult;
   6621     UCollationResult collResultReorder;
   6622 
   6623     log_verbose("Testing reordering with and without numeric collation\n");
   6624 
   6625     /* build collator tertiary with numeric */
   6626     myCollation = ucol_open("", &status);
   6627     /*
   6628     ucol_setStrength(myCollation, UCOL_TERTIARY);
   6629     */
   6630     ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   6631     if(U_FAILURE(status)) {
   6632         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6633         return;
   6634     }
   6635 
   6636     /* build collator tertiary with numeric and reordering */
   6637     myReorderCollation = ucol_open("", &status);
   6638     /*
   6639     ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
   6640     */
   6641     ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   6642     ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status);
   6643     if(U_FAILURE(status)) {
   6644         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6645         return;
   6646     }
   6647 
   6648     fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128);
   6649     fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128);
   6650     fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128);
   6651     fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
   6652 
   6653     if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
   6654         log_err_status(status, "ERROR: couldn't generate sort keys\n");
   6655         return;
   6656     }
   6657     collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
   6658     collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
   6659     /*
   6660     fprintf(stderr, "\tcollResult = %x\n", collResult);
   6661     fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
   6662     fprintf(stderr, "\nfortyS\n");
   6663     for (i = 0; i < fortyS_sortKey_Length; i++) {
   6664         fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
   6665     }
   6666     fprintf(stderr, "\nfortyThreeP\n");
   6667     for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
   6668         fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
   6669     }
   6670     */
   6671     if (collResult != collResultReorder) {
   6672         log_err_status(status, "ERROR: collation results should have been the same.\n");
   6673         return;
   6674     }
   6675 
   6676     ucol_close(myCollation);
   6677     ucol_close(myReorderCollation);
   6678 }
   6679 
   6680 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
   6681 {
   6682   for (; *a == *b; ++a, ++b) {
   6683     if (*a == 0) {
   6684       return 0;
   6685     }
   6686   }
   6687   return (*a < *b ? -1 : 1);
   6688 }
   6689 
   6690 static void TestImportRulesDeWithPhonebook(void)
   6691 {
   6692   const char* normalRules[] = {
   6693     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
   6694     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
   6695     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
   6696   };
   6697   const OneTestCase normalTests[] = {
   6698     { {0x00e6}, {0x00c6}, UCOL_LESS},
   6699     { {0x00fc}, {0x00dc}, UCOL_GREATER},
   6700   };
   6701 
   6702   const char* importRules[] = {
   6703     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
   6704     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
   6705     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
   6706   };
   6707   const OneTestCase importTests[] = {
   6708     { {0x00e6}, {0x00c6}, UCOL_LESS},
   6709     { {0x00fc}, {0x00dc}, UCOL_LESS},
   6710   };
   6711 
   6712   doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules));
   6713   doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules));
   6714 }
   6715 
   6716 #if 0
   6717 static void TestImportRulesFiWithEor(void)
   6718 {
   6719   /* DUCET. */
   6720   const char* defaultRules[] = {
   6721     "&a<b",                                    /* Dummy rule. */
   6722   };
   6723 
   6724   const OneTestCase defaultTests[] = {
   6725     { {0x0110}, {0x00F0}, UCOL_LESS},
   6726     { {0x00a3}, {0x00a5}, UCOL_LESS},
   6727     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
   6728   };
   6729 
   6730   /* European Ordering rules: ignore currency characters. */
   6731   const char* eorRules[] = {
   6732     "[import root-u-co-eor]",
   6733   };
   6734 
   6735   const OneTestCase eorTests[] = {
   6736     { {0x0110}, {0x00F0}, UCOL_LESS},
   6737     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
   6738     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
   6739   };
   6740 
   6741   const char* fiStdRules[] = {
   6742     "[import fi-u-co-standard]",
   6743   };
   6744 
   6745   const OneTestCase fiStdTests[] = {
   6746     { {0x0110}, {0x00F0}, UCOL_GREATER},
   6747     { {0x00a3}, {0x00a5}, UCOL_LESS},
   6748     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
   6749   };
   6750 
   6751   /* Both European Ordering Rules and Fi Standard Rules. */
   6752   const char* eorFiStdRules[] = {
   6753     "[import root-u-co-eor][import fi-u-co-standard]",
   6754   };
   6755 
   6756   /* This is essentially same as the one before once fi.txt is updated with import. */
   6757   const char* fiEorRules[] = {
   6758     "[import fi-u-co-eor]",
   6759   };
   6760 
   6761   const OneTestCase fiEorTests[] = {
   6762     { {0x0110}, {0x00F0}, UCOL_GREATER},
   6763     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
   6764     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
   6765   };
   6766 
   6767   doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
   6768   doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));
   6769   doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));
   6770   doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules));
   6771 
   6772   log_knownIssue("8962", NULL);
   6773   /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
   6774         eor{
   6775             Sequence{
   6776                 "[import root-u-co-eor][import fi-u-co-standard]"
   6777             }
   6778             Version{"21.0"}
   6779         }
   6780   */
   6781   /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
   6782 
   6783 }
   6784 #endif
   6785 
   6786 #if 0
   6787 /*
   6788  * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
   6789  * the resource files are built with -includeUnihanColl option.
   6790  * TODO: Uncomment this function and make it work when unihan rules are built by default.
   6791  */
   6792 static void TestImportRulesCJKWithUnihan(void)
   6793 {
   6794   /* DUCET. */
   6795   const char* defaultRules[] = {
   6796     "&a<b",                                    /* Dummy rule. */
   6797   };
   6798 
   6799   const OneTestCase defaultTests[] = {
   6800     { {0x3402}, {0x4e1e}, UCOL_GREATER},
   6801   };
   6802 
   6803   /* European Ordering rules: ignore currency characters. */
   6804   const char* unihanRules[] = {
   6805     "[import ko-u-co-unihan]",
   6806   };
   6807 
   6808   const OneTestCase unihanTests[] = {
   6809     { {0x3402}, {0x4e1e}, UCOL_LESS},
   6810   };
   6811 
   6812   doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
   6813   doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules));
   6814 
   6815 }
   6816 #endif
   6817 
   6818 static void TestImport(void)
   6819 {
   6820     UCollator* vicoll;
   6821     UCollator* escoll;
   6822     UCollator* viescoll;
   6823     UCollator* importviescoll;
   6824     UParseError error;
   6825     UErrorCode status = U_ZERO_ERROR;
   6826     UChar* virules;
   6827     int32_t viruleslength;
   6828     UChar* esrules;
   6829     int32_t esruleslength;
   6830     UChar* viesrules;
   6831     int32_t viesruleslength;
   6832     char srules[500] = "[import vi][import es]";
   6833     UChar rules[500];
   6834     uint32_t length = 0;
   6835     int32_t itemCount;
   6836     int32_t i, k;
   6837     UChar32 start;
   6838     UChar32 end;
   6839     UChar str[500];
   6840     int32_t strLength;
   6841 
   6842     uint8_t sk1[500];
   6843     uint8_t sk2[500];
   6844 
   6845     UBool b;
   6846     USet* tailoredSet;
   6847     USet* importTailoredSet;
   6848 
   6849 
   6850     vicoll = ucol_open("vi", &status);
   6851     if(U_FAILURE(status)){
   6852         log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
   6853         return;
   6854     }
   6855 
   6856     virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
   6857     escoll = ucol_open("es", &status);
   6858     esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
   6859     viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
   6860     viesrules[0] = 0;
   6861     u_strcat(viesrules, virules);
   6862     u_strcat(viesrules, esrules);
   6863     viesruleslength = viruleslength + esruleslength;
   6864     viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   6865 
   6866     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
   6867     length = u_unescape(srules, rules, 500);
   6868     importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
   6869     if(U_FAILURE(status)){
   6870         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   6871         return;
   6872     }
   6873 
   6874     tailoredSet = ucol_getTailoredSet(viescoll, &status);
   6875     importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
   6876 
   6877     if(!uset_equals(tailoredSet, importTailoredSet)){
   6878         log_err("Tailored sets not equal");
   6879     }
   6880 
   6881     uset_close(importTailoredSet);
   6882 
   6883     itemCount = uset_getItemCount(tailoredSet);
   6884 
   6885     for( i = 0; i < itemCount; i++){
   6886         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
   6887         if(strLength < 2){
   6888             for (; start <= end; start++){
   6889                 k = 0;
   6890                 U16_APPEND(str, k, 500, start, b);
   6891                 (void)b;    /* Suppress set but not used warning. */
   6892                 ucol_getSortKey(viescoll, str, 1, sk1, 500);
   6893                 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
   6894                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
   6895                     log_err("Sort key for %s not equal\n", str);
   6896                     break;
   6897                 }
   6898             }
   6899         }else{
   6900             ucol_getSortKey(viescoll, str, strLength, sk1, 500);
   6901             ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
   6902             if(compare_uint8_t_arrays(sk1, sk2) != 0){
   6903                 log_err("ZZSort key for %s not equal\n", str);
   6904                 break;
   6905             }
   6906 
   6907         }
   6908     }
   6909 
   6910     uset_close(tailoredSet);
   6911 
   6912     uprv_free(viesrules);
   6913 
   6914     ucol_close(vicoll);
   6915     ucol_close(escoll);
   6916     ucol_close(viescoll);
   6917     ucol_close(importviescoll);
   6918 }
   6919 
   6920 static void TestImportWithType(void)
   6921 {
   6922     UCollator* vicoll;
   6923     UCollator* decoll;
   6924     UCollator* videcoll;
   6925     UCollator* importvidecoll;
   6926     UParseError error;
   6927     UErrorCode status = U_ZERO_ERROR;
   6928     const UChar* virules;
   6929     int32_t viruleslength;
   6930     const UChar* derules;
   6931     int32_t deruleslength;
   6932     UChar* viderules;
   6933     int32_t videruleslength;
   6934     const char srules[500] = "[import vi][import de-u-co-phonebk]";
   6935     UChar rules[500];
   6936     uint32_t length = 0;
   6937     int32_t itemCount;
   6938     int32_t i, k;
   6939     UChar32 start;
   6940     UChar32 end;
   6941     UChar str[500];
   6942     int32_t strLength;
   6943 
   6944     uint8_t sk1[500];
   6945     uint8_t sk2[500];
   6946 
   6947     USet* tailoredSet;
   6948     USet* importTailoredSet;
   6949 
   6950     vicoll = ucol_open("vi", &status);
   6951     if(U_FAILURE(status)){
   6952         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   6953         return;
   6954     }
   6955     virules = ucol_getRules(vicoll, &viruleslength);
   6956     /* decoll = ucol_open("de@collation=phonebook", &status); */
   6957     decoll = ucol_open("de-u-co-phonebk", &status);
   6958     if(U_FAILURE(status)){
   6959         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   6960         return;
   6961     }
   6962 
   6963 
   6964     derules = ucol_getRules(decoll, &deruleslength);
   6965     viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
   6966     viderules[0] = 0;
   6967     u_strcat(viderules, virules);
   6968     u_strcat(viderules, derules);
   6969     videruleslength = viruleslength + deruleslength;
   6970     videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   6971 
   6972     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
   6973     length = u_unescape(srules, rules, 500);
   6974     importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
   6975     if(U_FAILURE(status)){
   6976         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   6977         return;
   6978     }
   6979 
   6980     tailoredSet = ucol_getTailoredSet(videcoll, &status);
   6981     importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
   6982 
   6983     if(!uset_equals(tailoredSet, importTailoredSet)){
   6984         log_err("Tailored sets not equal");
   6985     }
   6986 
   6987     uset_close(importTailoredSet);
   6988 
   6989     itemCount = uset_getItemCount(tailoredSet);
   6990 
   6991     for( i = 0; i < itemCount; i++){
   6992         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
   6993         if(strLength < 2){
   6994             for (; start <= end; start++){
   6995                 k = 0;
   6996                 U16_APPEND_UNSAFE(str, k, start);
   6997                 ucol_getSortKey(videcoll, str, 1, sk1, 500);
   6998                 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
   6999                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
   7000                     log_err("Sort key for %s not equal\n", str);
   7001                     break;
   7002                 }
   7003             }
   7004         }else{
   7005             ucol_getSortKey(videcoll, str, strLength, sk1, 500);
   7006             ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
   7007             if(compare_uint8_t_arrays(sk1, sk2) != 0){
   7008                 log_err("Sort key for %s not equal\n", str);
   7009                 break;
   7010             }
   7011 
   7012         }
   7013     }
   7014 
   7015     uset_close(tailoredSet);
   7016 
   7017     uprv_free(viderules);
   7018 
   7019     ucol_close(videcoll);
   7020     ucol_close(importvidecoll);
   7021     ucol_close(vicoll);
   7022     ucol_close(decoll);
   7023 }
   7024 
   7025 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
   7026 static const UChar longUpperStr1[]= { /* 155 chars */
   7027     0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
   7028     0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
   7029     0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
   7030     0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
   7031     0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
   7032     0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
   7033     0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
   7034     0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
   7035     0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
   7036     0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
   7037 };
   7038 
   7039 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
   7040 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
   7041     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   7042     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   7043     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   7044     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   7045     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
   7046 };
   7047 
   7048 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
   7049 static const UChar longUpperStr3[]= { /* 324 chars */
   7050     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7051     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7052     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7053     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7054     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7055     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7056     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7057     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7058     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7059     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7060     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   7061     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
   7062 };
   7063 
   7064 #define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
   7065 
   7066 typedef struct {
   7067     const UChar * longUpperStrPtr;
   7068     int32_t       longUpperStrLen;
   7069 } LongUpperStrItem;
   7070 
   7071 /* String pointers must be in reverse collation order of the corresponding strings */
   7072 static const LongUpperStrItem longUpperStrItems[] = {
   7073     { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
   7074     { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
   7075     { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
   7076     { NULL,          0                           }
   7077 };
   7078 
   7079 enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with collation changes */
   7080 
   7081 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
   7082 static void TestCaseLevelBufferOverflow(void)
   7083 {
   7084     UErrorCode status = U_ZERO_ERROR;
   7085     UCollator * ucol = ucol_open("root", &status);
   7086     if ( U_SUCCESS(status) ) {
   7087         ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
   7088         if ( U_SUCCESS(status) ) {
   7089             const LongUpperStrItem * itemPtr;
   7090             uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
   7091             for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
   7092                 int32_t sortKeyLen;
   7093                 if (itemPtr > longUpperStrItems) {
   7094                     uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
   7095                 }
   7096                 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
   7097                 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
   7098                     log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
   7099                     break;
   7100                 }
   7101                 if ( itemPtr > longUpperStrItems ) {
   7102                     int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
   7103                     if (compareResult >= 0) {
   7104                         log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
   7105                     }
   7106                 }
   7107             }
   7108         } else {
   7109             log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
   7110         }
   7111         ucol_close(ucol);
   7112     } else {
   7113         log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
   7114     }
   7115 }
   7116 
   7117 
   7118 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
   7119 
   7120 void addMiscCollTest(TestNode** root)
   7121 {
   7122     TEST(TestRuleOptions);
   7123     TEST(TestBeforePrefixFailure);
   7124     TEST(TestContractionClosure);
   7125     TEST(TestPrefixCompose);
   7126     TEST(TestStrCollIdenticalPrefix);
   7127     TEST(TestPrefix);
   7128     TEST(TestNewJapanese);
   7129     /*TEST(TestLimitations);*/
   7130     TEST(TestNonChars);
   7131     TEST(TestExtremeCompression);
   7132     TEST(TestSurrogates);
   7133     TEST(TestVariableTopSetting);
   7134     TEST(TestBocsuCoverage);
   7135     TEST(TestCyrillicTailoring);
   7136     TEST(TestCase);
   7137     TEST(IncompleteCntTest);
   7138     TEST(BlackBirdTest);
   7139     TEST(FunkyATest);
   7140     TEST(BillFairmanTest);
   7141     TEST(RamsRulesTest);
   7142     TEST(IsTailoredTest);
   7143     TEST(TestCollations);
   7144     TEST(TestChMove);
   7145     TEST(TestImplicitTailoring);
   7146     TEST(TestFCDProblem);
   7147     TEST(TestEmptyRule);
   7148     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
   7149     TEST(TestJ815);
   7150     /*TEST(TestJ831);*/ /* we changed lv locale */
   7151     TEST(TestBefore);
   7152     TEST(TestRedundantRules);
   7153     TEST(TestExpansionSyntax);
   7154     TEST(TestHangulTailoring);
   7155     TEST(TestUCARules);
   7156     TEST(TestIncrementalNormalize);
   7157     TEST(TestComposeDecompose);
   7158     TEST(TestCompressOverlap);
   7159     TEST(TestContraction);
   7160     TEST(TestExpansion);
   7161     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
   7162     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
   7163     TEST(TestOptimize);
   7164     TEST(TestSuppressContractions);
   7165     TEST(Alexis2);
   7166     TEST(TestHebrewUCA);
   7167     TEST(TestPartialSortKeyTermination);
   7168     TEST(TestSettings);
   7169     TEST(TestEquals);
   7170     TEST(TestJ2726);
   7171     TEST(NullRule);
   7172     TEST(TestNumericCollation);
   7173     TEST(TestTibetanConformance);
   7174     TEST(TestPinyinProblem);
   7175     TEST(TestImplicitGeneration);
   7176     TEST(TestSeparateTrees);
   7177     TEST(TestBeforePinyin);
   7178     TEST(TestBeforeTightening);
   7179     /*TEST(TestMoreBefore);*/
   7180     TEST(TestTailorNULL);
   7181     TEST(TestUpperFirstQuaternary);
   7182     TEST(TestJ4960);
   7183     TEST(TestJ5223);
   7184     TEST(TestJ5232);
   7185     TEST(TestJ5367);
   7186     TEST(TestHiragana);
   7187     TEST(TestSortKeyConsistency);
   7188     TEST(TestVI5913);  /* VI, RO tailored rules */
   7189     TEST(TestCroatianSortKey);
   7190     TEST(TestTailor6179);
   7191     TEST(TestUCAPrecontext);
   7192     TEST(TestOutOfBuffer5468);
   7193     TEST(TestSameStrengthList);
   7194 
   7195     TEST(TestSameStrengthListQuoted);
   7196     TEST(TestSameStrengthListSupplemental);
   7197     TEST(TestSameStrengthListQwerty);
   7198     TEST(TestSameStrengthListQuotedQwerty);
   7199     TEST(TestSameStrengthListRanges);
   7200     TEST(TestSameStrengthListSupplementalRanges);
   7201     TEST(TestSpecialCharacters);
   7202     TEST(TestPrivateUseCharacters);
   7203     TEST(TestPrivateUseCharactersInList);
   7204     TEST(TestPrivateUseCharactersInRange);
   7205     TEST(TestInvalidListsAndRanges);
   7206     TEST(TestImportRulesDeWithPhonebook);
   7207     /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
   7208     /* TEST(TestImportRulesCJKWithUnihan); */
   7209     TEST(TestImport);
   7210     TEST(TestImportWithType);
   7211 
   7212     TEST(TestBeforeRuleWithScriptReordering);
   7213     TEST(TestNonLeadBytesDuringCollationReordering);
   7214     TEST(TestReorderingAPI);
   7215     TEST(TestReorderingAPIWithRuleCreatedCollator);
   7216     TEST(TestEquivalentReorderingScripts);
   7217     TEST(TestGreekFirstReorder);
   7218     TEST(TestGreekLastReorder);
   7219     TEST(TestNonScriptReorder);
   7220     TEST(TestHaniReorder);
   7221     TEST(TestHaniReorderWithOtherRules);
   7222     TEST(TestMultipleReorder);
   7223     TEST(TestReorderingAcrossCloning);
   7224     TEST(TestReorderWithNumericCollation);
   7225 
   7226     TEST(TestCaseLevelBufferOverflow);
   7227 }
   7228 
   7229 #endif /* #if !UCONFIG_NO_COLLATION */
   7230