Home | History | Annotate | Download | only in cintltst
      1 
      2 /********************************************************************
      3  * COPYRIGHT:
      4  * Copyright (c) 2001-2010, International Business Machines Corporation and
      5  * others. All Rights Reserved.
      6  ********************************************************************/
      7 /*******************************************************************************
      8 *
      9 * File cmsccoll.C
     10 *
     11 *******************************************************************************/
     12 /**
     13  * These are the tests specific to ICU 1.8 and above, that I didn't know where
     14  * to fit.
     15  */
     16 
     17 #include <stdio.h>
     18 
     19 #include "unicode/utypes.h"
     20 
     21 #if !UCONFIG_NO_COLLATION
     22 
     23 #include "unicode/ucol.h"
     24 #include "unicode/ucoleitr.h"
     25 #include "unicode/uloc.h"
     26 #include "cintltst.h"
     27 #include "ccolltst.h"
     28 #include "callcoll.h"
     29 #include "unicode/ustring.h"
     30 #include "string.h"
     31 #include "ucol_imp.h"
     32 #include "ucol_tok.h"
     33 #include "cmemory.h"
     34 #include "cstring.h"
     35 #include "uassert.h"
     36 #include "unicode/parseerr.h"
     37 #include "unicode/ucnv.h"
     38 #include "unicode/ures.h"
     39 #include "unicode/uscript.h"
     40 #include "uparse.h"
     41 #include "putilimp.h"
     42 
     43 
     44 #define LEN(a) (sizeof(a)/sizeof(a[0]))
     45 
     46 #define MAX_TOKEN_LEN 16
     47 
     48 typedef UCollationResult tst_strcoll(void *collator, const int object,
     49                         const UChar *source, const int sLen,
     50                         const UChar *target, const int tLen);
     51 
     52 
     53 
     54 const static char cnt1[][10] = {
     55 
     56   "AA",
     57   "AC",
     58   "AZ",
     59   "AQ",
     60   "AB",
     61   "ABZ",
     62   "ABQ",
     63   "Z",
     64   "ABC",
     65   "Q",
     66   "B"
     67 };
     68 
     69 const static char cnt2[][10] = {
     70   "DA",
     71   "DAD",
     72   "DAZ",
     73   "MAR",
     74   "Z",
     75   "DAVIS",
     76   "MARK",
     77   "DAV",
     78   "DAVI"
     79 };
     80 
     81 static void IncompleteCntTest(void)
     82 {
     83   UErrorCode status = U_ZERO_ERROR;
     84   UChar temp[90];
     85   UChar t1[90];
     86   UChar t2[90];
     87 
     88   UCollator *coll =  NULL;
     89   uint32_t i = 0, j = 0;
     90   uint32_t size = 0;
     91 
     92   u_uastrcpy(temp, " & Z < ABC < Q < B");
     93 
     94   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
     95 
     96   if(U_SUCCESS(status)) {
     97     size = sizeof(cnt1)/sizeof(cnt1[0]);
     98     for(i = 0; i < size-1; i++) {
     99       for(j = i+1; j < size; j++) {
    100         UCollationElements *iter;
    101         u_uastrcpy(t1, cnt1[i]);
    102         u_uastrcpy(t2, cnt1[j]);
    103         doTest(coll, t1, t2, UCOL_LESS);
    104         /* synwee : added collation element iterator test */
    105         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    106         if (U_FAILURE(status)) {
    107           log_err("Creation of iterator failed\n");
    108           break;
    109         }
    110         backAndForth(iter);
    111         ucol_closeElements(iter);
    112       }
    113     }
    114   }
    115 
    116   ucol_close(coll);
    117 
    118 
    119   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
    120   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
    121 
    122   if(U_SUCCESS(status)) {
    123     size = sizeof(cnt2)/sizeof(cnt2[0]);
    124     for(i = 0; i < size-1; i++) {
    125       for(j = i+1; j < size; j++) {
    126         UCollationElements *iter;
    127         u_uastrcpy(t1, cnt2[i]);
    128         u_uastrcpy(t2, cnt2[j]);
    129         doTest(coll, t1, t2, UCOL_LESS);
    130 
    131         /* synwee : added collation element iterator test */
    132         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    133         if (U_FAILURE(status)) {
    134           log_err("Creation of iterator failed\n");
    135           break;
    136         }
    137         backAndForth(iter);
    138         ucol_closeElements(iter);
    139       }
    140     }
    141   }
    142 
    143   ucol_close(coll);
    144 
    145 
    146 }
    147 
    148 const static char shifted[][20] = {
    149   "black bird",
    150   "black-bird",
    151   "blackbird",
    152   "black Bird",
    153   "black-Bird",
    154   "blackBird",
    155   "black birds",
    156   "black-birds",
    157   "blackbirds"
    158 };
    159 
    160 const static UCollationResult shiftedTert[] = {
    161   UCOL_EQUAL,
    162   UCOL_EQUAL,
    163   UCOL_EQUAL,
    164   UCOL_LESS,
    165   UCOL_EQUAL,
    166   UCOL_EQUAL,
    167   UCOL_LESS,
    168   UCOL_EQUAL,
    169   UCOL_EQUAL
    170 };
    171 
    172 const static char nonignorable[][20] = {
    173   "black bird",
    174   "black Bird",
    175   "black birds",
    176   "black-bird",
    177   "black-Bird",
    178   "black-birds",
    179   "blackbird",
    180   "blackBird",
    181   "blackbirds"
    182 };
    183 
    184 static void BlackBirdTest(void) {
    185   UErrorCode status = U_ZERO_ERROR;
    186   UChar t1[90];
    187   UChar t2[90];
    188 
    189   uint32_t i = 0, j = 0;
    190   uint32_t size = 0;
    191   UCollator *coll = ucol_open("en_US", &status);
    192 
    193   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
    194   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
    195 
    196   if(U_SUCCESS(status)) {
    197     size = sizeof(nonignorable)/sizeof(nonignorable[0]);
    198     for(i = 0; i < size-1; i++) {
    199       for(j = i+1; j < size; j++) {
    200         u_uastrcpy(t1, nonignorable[i]);
    201         u_uastrcpy(t2, nonignorable[j]);
    202         doTest(coll, t1, t2, UCOL_LESS);
    203       }
    204     }
    205   }
    206 
    207   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
    208   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
    209 
    210   if(U_SUCCESS(status)) {
    211     size = sizeof(shifted)/sizeof(shifted[0]);
    212     for(i = 0; i < size-1; i++) {
    213       for(j = i+1; j < size; j++) {
    214         u_uastrcpy(t1, shifted[i]);
    215         u_uastrcpy(t2, shifted[j]);
    216         doTest(coll, t1, t2, UCOL_LESS);
    217       }
    218     }
    219   }
    220 
    221   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
    222   if(U_SUCCESS(status)) {
    223     size = sizeof(shifted)/sizeof(shifted[0]);
    224     for(i = 1; i < size; i++) {
    225       u_uastrcpy(t1, shifted[i-1]);
    226       u_uastrcpy(t2, shifted[i]);
    227       doTest(coll, t1, t2, shiftedTert[i]);
    228     }
    229   }
    230 
    231   ucol_close(coll);
    232 }
    233 
    234 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
    235     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
    236     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
    237     {0x0041/*'A'*/, 0x0300, 0x0000},
    238     {0x00C0, 0x0301, 0x0000},
    239     /* this would work with forced normalization */
    240     {0x00C0, 0x0316, 0x0000}
    241 };
    242 
    243 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
    244     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    245     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
    246     {0x00C0, 0},
    247     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    248     /* this would work with forced normalization */
    249     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
    250 };
    251 
    252 const static UCollationResult results[] = {
    253     UCOL_GREATER,
    254     UCOL_EQUAL,
    255     UCOL_EQUAL,
    256     UCOL_GREATER,
    257     UCOL_EQUAL
    258 };
    259 
    260 static void FunkyATest(void)
    261 {
    262 
    263     int32_t i;
    264     UErrorCode status = U_ZERO_ERROR;
    265     UCollator  *myCollation;
    266     myCollation = ucol_open("en_US", &status);
    267     if(U_FAILURE(status)){
    268         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    269         return;
    270     }
    271     log_verbose("Testing some A letters, for some reason\n");
    272     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    273     ucol_setStrength(myCollation, UCOL_TERTIARY);
    274     for (i = 0; i < 4 ; i++)
    275     {
    276         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    277     }
    278     ucol_close(myCollation);
    279 }
    280 
    281 UColAttributeValue caseFirst[] = {
    282     UCOL_OFF,
    283     UCOL_LOWER_FIRST,
    284     UCOL_UPPER_FIRST
    285 };
    286 
    287 
    288 UColAttributeValue alternateHandling[] = {
    289     UCOL_NON_IGNORABLE,
    290     UCOL_SHIFTED
    291 };
    292 
    293 UColAttributeValue caseLevel[] = {
    294     UCOL_OFF,
    295     UCOL_ON
    296 };
    297 
    298 UColAttributeValue strengths[] = {
    299     UCOL_PRIMARY,
    300     UCOL_SECONDARY,
    301     UCOL_TERTIARY,
    302     UCOL_QUATERNARY,
    303     UCOL_IDENTICAL
    304 };
    305 
    306 #if 0
    307 static const char * strengthsC[] = {
    308     "UCOL_PRIMARY",
    309     "UCOL_SECONDARY",
    310     "UCOL_TERTIARY",
    311     "UCOL_QUATERNARY",
    312     "UCOL_IDENTICAL"
    313 };
    314 
    315 static const char * caseFirstC[] = {
    316     "UCOL_OFF",
    317     "UCOL_LOWER_FIRST",
    318     "UCOL_UPPER_FIRST"
    319 };
    320 
    321 
    322 static const char * alternateHandlingC[] = {
    323     "UCOL_NON_IGNORABLE",
    324     "UCOL_SHIFTED"
    325 };
    326 
    327 static const char * caseLevelC[] = {
    328     "UCOL_OFF",
    329     "UCOL_ON"
    330 };
    331 
    332 /* not used currently - does not test only prints */
    333 static void PrintMarkDavis(void)
    334 {
    335   UErrorCode status = U_ZERO_ERROR;
    336   UChar m[256];
    337   uint8_t sortkey[256];
    338   UCollator *coll = ucol_open("en_US", &status);
    339   uint32_t h,i,j,k, sortkeysize;
    340   uint32_t sizem = 0;
    341   char buffer[512];
    342   uint32_t len = 512;
    343 
    344   log_verbose("PrintMarkDavis");
    345 
    346   u_uastrcpy(m, "Mark Davis");
    347   sizem = u_strlen(m);
    348 
    349 
    350   m[1] = 0xe4;
    351 
    352   for(i = 0; i<sizem; i++) {
    353     fprintf(stderr, "\\u%04X ", m[i]);
    354   }
    355   fprintf(stderr, "\n");
    356 
    357   for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
    358     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
    359     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
    360 
    361     for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
    362       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
    363       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
    364 
    365       for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
    366         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
    367         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
    368 
    369         for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
    370           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
    371           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
    372           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
    373           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
    374         }
    375 
    376       }
    377 
    378     }
    379 
    380   }
    381 }
    382 #endif
    383 
    384 static void BillFairmanTest(void) {
    385 /*
    386 ** check for actual locale via ICU resource bundles
    387 **
    388 ** lp points to the original locale ("fr_FR_....")
    389 */
    390 
    391     UResourceBundle *lr,*cr;
    392     UErrorCode              lec = U_ZERO_ERROR;
    393     const char *lp = "fr_FR_you_ll_never_find_this_locale";
    394 
    395     log_verbose("BillFairmanTest\n");
    396 
    397     lr = ures_open(NULL,lp,&lec);
    398     if (lr) {
    399         cr = ures_getByKey(lr,"collations",0,&lec);
    400         if (cr) {
    401             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
    402             if (lp) {
    403                 if (U_SUCCESS(lec)) {
    404                     if(strcmp(lp, "fr") != 0) {
    405                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
    406                     }
    407                 }
    408             }
    409             ures_close(cr);
    410         }
    411         ures_close(lr);
    412     }
    413 }
    414 
    415 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
    416     UChar source[256] = { '\0'};
    417     UChar target[256] = { '\0'};
    418     UChar preP = 0x31a3;
    419     UChar preQ = 0x310d;
    420 /*
    421     UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
    422     UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
    423 */
    424     /*log_verbose("Testing primary\n");*/
    425 
    426     doTest(col, p, q, UCOL_LESS);
    427 /*
    428     UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
    429 
    430     if(result!=UCOL_LESS){
    431        aescstrdup(p,utfSource,256);
    432        aescstrdup(q,utfTarget,256);
    433        fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
    434     }
    435 */
    436     source[0] = preP;
    437     u_strcpy(source+1,p);
    438     target[0] = preQ;
    439     u_strcpy(target+1,q);
    440     doTest(col, source, target, UCOL_LESS);
    441 /*
    442     fprintf(file,"Primary swamps 2nd failed  source: %s target: %s \n", utfSource,utfTarget);
    443 */
    444 }
    445 
    446 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
    447     UChar source[256] = { '\0'};
    448     UChar target[256] = { '\0'};
    449 
    450     /*log_verbose("Testing secondary\n");*/
    451 
    452     doTest(col, p, q, UCOL_LESS);
    453 /*
    454     fprintf(file,"secondary failed  source: %s target: %s \n", utfSource,utfTarget);
    455 */
    456     source[0] = 0x0053;
    457     u_strcpy(source+1,p);
    458     target[0]= 0x0073;
    459     u_strcpy(target+1,q);
    460 
    461     doTest(col, source, target, UCOL_LESS);
    462 /*
    463     fprintf(file,"secondary swamps 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
    464 */
    465 
    466 
    467     u_strcpy(source,p);
    468     source[u_strlen(p)] = 0x62;
    469     source[u_strlen(p)+1] = 0;
    470 
    471 
    472     u_strcpy(target,q);
    473     target[u_strlen(q)] = 0x61;
    474     target[u_strlen(q)+1] = 0;
    475 
    476     doTest(col, source, target, UCOL_GREATER);
    477 
    478 /*
    479     fprintf(file,"secondary is swamped by 1  failed  source: %s target: %s \n",utfSource,utfTarget);
    480 */
    481 }
    482 
    483 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
    484     UChar source[256] = { '\0'};
    485     UChar target[256] = { '\0'};
    486 
    487     /*log_verbose("Testing tertiary\n");*/
    488 
    489     doTest(col, p, q, UCOL_LESS);
    490 /*
    491     fprintf(file,"Tertiary failed  source: %s target: %s \n",utfSource,utfTarget);
    492 */
    493     source[0] = 0x0020;
    494     u_strcpy(source+1,p);
    495     target[0]= 0x002D;
    496     u_strcpy(target+1,q);
    497 
    498     doTest(col, source, target, UCOL_LESS);
    499 /*
    500     fprintf(file,"Tertiary swamps 4th failed  source: %s target: %s \n", utfSource,utfTarget);
    501 */
    502 
    503     u_strcpy(source,p);
    504     source[u_strlen(p)] = 0xE0;
    505     source[u_strlen(p)+1] = 0;
    506 
    507     u_strcpy(target,q);
    508     target[u_strlen(q)] = 0x61;
    509     target[u_strlen(q)+1] = 0;
    510 
    511     doTest(col, source, target, UCOL_GREATER);
    512 
    513 /*
    514     fprintf(file,"Tertiary is swamped by 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
    515 */
    516 }
    517 
    518 static void testEquality(UCollator* col, const UChar* p,const UChar* q){
    519 /*
    520     UChar source[256] = { '\0'};
    521     UChar target[256] = { '\0'};
    522 */
    523 
    524     doTest(col, p, q, UCOL_EQUAL);
    525 /*
    526     fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
    527 */
    528 }
    529 
    530 static void testCollator(UCollator *coll, UErrorCode *status) {
    531   const UChar *rules = NULL, *current = NULL;
    532   int32_t ruleLen = 0;
    533   uint32_t strength = 0;
    534   uint32_t chOffset = 0; uint32_t chLen = 0;
    535   uint32_t exOffset = 0; uint32_t exLen = 0;
    536   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
    537   uint32_t firstEx = 0;
    538 /*  uint32_t rExpsLen = 0; */
    539   uint32_t firstLen = 0;
    540   UBool varT = FALSE; UBool top_ = TRUE;
    541   uint16_t specs = 0;
    542   UBool startOfRules = TRUE;
    543   UBool lastReset = FALSE;
    544   UBool before = FALSE;
    545   uint32_t beforeStrength = 0;
    546   UColTokenParser src;
    547   UColOptionSet opts;
    548 
    549   UChar first[256];
    550   UChar second[256];
    551   UChar tempB[256];
    552   uint32_t tempLen;
    553   UChar *rulesCopy = NULL;
    554   UParseError parseError;
    555 
    556   uprv_memset(&src, 0, sizeof(UColTokenParser));
    557 
    558   src.opts = &opts;
    559 
    560   rules = ucol_getRules(coll, &ruleLen);
    561   if(U_SUCCESS(*status) && ruleLen > 0) {
    562     rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
    563     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
    564     src.current = src.source = rulesCopy;
    565     src.end = rulesCopy+ruleLen;
    566     src.extraCurrent = src.end;
    567     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
    568     *first = *second = 0;
    569 
    570 	/* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
    571 	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
    572     while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
    573       strength = src.parsedToken.strength;
    574       chOffset = src.parsedToken.charsOffset;
    575       chLen = src.parsedToken.charsLen;
    576       exOffset = src.parsedToken.extensionOffset;
    577       exLen = src.parsedToken.extensionLen;
    578       prefixOffset = src.parsedToken.prefixOffset;
    579       prefixLen = src.parsedToken.prefixLen;
    580       specs = src.parsedToken.flags;
    581 
    582       startOfRules = FALSE;
    583       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
    584       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
    585       if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
    586         second[0] = 0;
    587       } else {
    588         u_strncpy(second,src.source+chOffset, chLen);
    589         second[chLen] = 0;
    590 
    591         if(exLen > 0 && firstEx == 0) {
    592           u_strncat(first, src.source+exOffset, exLen);
    593           first[firstLen+exLen] = 0;
    594         }
    595 
    596         if(lastReset == TRUE && prefixLen != 0) {
    597           u_strncpy(first+prefixLen, first, firstLen);
    598           u_strncpy(first, src.source+prefixOffset, prefixLen);
    599           first[firstLen+prefixLen] = 0;
    600           firstLen = firstLen+prefixLen;
    601         }
    602 
    603         if(before == TRUE) { /* swap first and second */
    604           u_strcpy(tempB, first);
    605           u_strcpy(first, second);
    606           u_strcpy(second, tempB);
    607 
    608           tempLen = firstLen;
    609           firstLen = chLen;
    610           chLen = tempLen;
    611 
    612           tempLen = firstEx;
    613           firstEx = exLen;
    614           exLen = tempLen;
    615           if(beforeStrength < strength) {
    616             strength = beforeStrength;
    617           }
    618         }
    619       }
    620       lastReset = FALSE;
    621 
    622       switch(strength){
    623       case UCOL_IDENTICAL:
    624           testEquality(coll,first,second);
    625           break;
    626       case UCOL_PRIMARY:
    627           testPrimary(coll,first,second);
    628           break;
    629       case UCOL_SECONDARY:
    630           testSecondary(coll,first,second);
    631           break;
    632       case UCOL_TERTIARY:
    633           testTertiary(coll,first,second);
    634           break;
    635       case UCOL_TOK_RESET:
    636         lastReset = TRUE;
    637         before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
    638         if(before) {
    639           beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
    640         }
    641         break;
    642       default:
    643           break;
    644       }
    645 
    646       if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
    647         before = FALSE;
    648       } else {
    649         firstLen = chLen;
    650         firstEx = exLen;
    651         u_strcpy(first, second);
    652       }
    653     }
    654     uprv_free(src.source);
    655   }
    656 }
    657 
    658 static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
    659   UCollator *UCA = (UCollator *)collator;
    660   return ucol_strcoll(UCA, source, sLen, target, tLen);
    661 }
    662 
    663 /*
    664 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
    665 #ifdef U_WINDOWS
    666   LCID lcid = (LCID)collator;
    667   return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
    668 #else
    669   return 0;
    670 #endif
    671 }
    672 */
    673 
    674 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
    675                                      UChar s1, UChar s2,
    676                                      const UChar *s, const uint32_t sLen,
    677                                      const UChar *t, const uint32_t tLen) {
    678   UChar source[256] = {0};
    679   UChar target[256] = {0};
    680 
    681   source[0] = s1;
    682   u_strcpy(source+1, s);
    683   target[0] = s2;
    684   u_strcpy(target+1, t);
    685 
    686   return func(collator, opts, source, sLen+1, target, tLen+1);
    687 }
    688 
    689 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
    690                                    UChar s1, UChar s2,
    691                                    const UChar *s, const uint32_t sLen,
    692                                    const UChar *t, const uint32_t tLen) {
    693   UChar source[256] = {0};
    694   UChar target[256] = {0};
    695 
    696   u_strcpy(source, s);
    697   source[sLen] = s1;
    698   u_strcpy(target, t);
    699   target[tLen] = s2;
    700 
    701   return func(collator, opts, source, sLen+1, target, tLen+1);
    702 }
    703 
    704 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
    705                               const UChar *s, const uint32_t sLen,
    706                               const UChar *t, const uint32_t tLen,
    707                               UCollationResult result) {
    708   /*UChar fPrimary = 0x6d;*/
    709   /*UChar sPrimary = 0x6e;*/
    710   UChar fSecondary = 0x310d;
    711   UChar sSecondary = 0x31a3;
    712   UChar fTertiary = 0x310f;
    713   UChar sTertiary = 0x31b7;
    714 
    715   UCollationResult oposite;
    716   if(result == UCOL_EQUAL) {
    717     return UCOL_IDENTICAL;
    718   } else if(result == UCOL_GREATER) {
    719     oposite = UCOL_LESS;
    720   } else {
    721     oposite = UCOL_GREATER;
    722   }
    723 
    724   if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
    725     return UCOL_PRIMARY;
    726   } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
    727     (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
    728     return UCOL_SECONDARY;
    729   } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
    730     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
    731     return UCOL_TERTIARY;
    732   } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
    733     (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
    734     return UCOL_QUATERNARY;
    735   } else {
    736     return UCOL_IDENTICAL;
    737   }
    738 }
    739 
    740 static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
    741   uint32_t i = 0;
    742 
    743   if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
    744     buffer[0] = '=';
    745     buffer[1] = '=';
    746     buffer[2] = '\0';
    747   } else if(res == UCOL_GREATER) {
    748     for(i = 0; i<strength+1; i++) {
    749       buffer[i] = '>';
    750     }
    751     buffer[strength+1] = '\0';
    752   } else {
    753     for(i = 0; i<strength+1; i++) {
    754       buffer[i] = '<';
    755     }
    756     buffer[strength+1] = '\0';
    757   }
    758 
    759   return buffer;
    760 }
    761 
    762 
    763 
    764 static void logFailure (const char *platform, const char *test,
    765                         const UChar *source, const uint32_t sLen,
    766                         const UChar *target, const uint32_t tLen,
    767                         UCollationResult realRes, uint32_t realStrength,
    768                         UCollationResult expRes, uint32_t expStrength, UBool error) {
    769 
    770   uint32_t i = 0;
    771 
    772   char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
    773   static int32_t maxOutputLength = 0;
    774   int32_t outputLength;
    775 
    776   *sEsc = *tEsc = *s = *t = 0;
    777   if(error == TRUE) {
    778     log_err("Difference between expected and generated order. Run test with -v for more info\n");
    779   } else if(getTestOption(VERBOSITY_OPTION) == 0) {
    780     return;
    781   }
    782   for(i = 0; i<sLen; i++) {
    783     sprintf(b, "%04X", source[i]);
    784     strcat(sEsc, "\\u");
    785     strcat(sEsc, b);
    786     strcat(s, b);
    787     strcat(s, " ");
    788     if(source[i] < 0x80) {
    789       sprintf(b, "(%c)", source[i]);
    790       strcat(sEsc, b);
    791     }
    792   }
    793   for(i = 0; i<tLen; i++) {
    794     sprintf(b, "%04X", target[i]);
    795     strcat(tEsc, "\\u");
    796     strcat(tEsc, b);
    797     strcat(t, b);
    798     strcat(t, " ");
    799     if(target[i] < 0x80) {
    800       sprintf(b, "(%c)", target[i]);
    801       strcat(tEsc, b);
    802     }
    803   }
    804 /*
    805   strcpy(output, "[[ ");
    806   strcat(output, sEsc);
    807   strcat(output, getRelationSymbol(expRes, expStrength, relation));
    808   strcat(output, tEsc);
    809 
    810   strcat(output, " : ");
    811 
    812   strcat(output, sEsc);
    813   strcat(output, getRelationSymbol(realRes, realStrength, relation));
    814   strcat(output, tEsc);
    815   strcat(output, " ]] ");
    816 
    817   log_verbose("%s", output);
    818 */
    819 
    820 
    821   strcpy(output, "DIFF: ");
    822 
    823   strcat(output, s);
    824   strcat(output, " : ");
    825   strcat(output, t);
    826 
    827   strcat(output, test);
    828   strcat(output, ": ");
    829 
    830   strcat(output, sEsc);
    831   strcat(output, getRelationSymbol(expRes, expStrength, relation));
    832   strcat(output, tEsc);
    833 
    834   strcat(output, " ");
    835 
    836   strcat(output, platform);
    837   strcat(output, ": ");
    838 
    839   strcat(output, sEsc);
    840   strcat(output, getRelationSymbol(realRes, realStrength, relation));
    841   strcat(output, tEsc);
    842 
    843   outputLength = (int32_t)strlen(output);
    844   if(outputLength > maxOutputLength) {
    845     maxOutputLength = outputLength;
    846     U_ASSERT(outputLength < sizeof(output));
    847   }
    848 
    849   log_verbose("%s\n", output);
    850 
    851 }
    852 
    853 /*
    854 static void printOutRules(const UChar *rules) {
    855   uint32_t len = u_strlen(rules);
    856   uint32_t i = 0;
    857   char toPrint;
    858   uint32_t line = 0;
    859 
    860   fprintf(stdout, "Rules:");
    861 
    862   for(i = 0; i<len; i++) {
    863     if(rules[i]<0x7f && rules[i]>=0x20) {
    864       toPrint = (char)rules[i];
    865       if(toPrint == '&') {
    866         line = 1;
    867         fprintf(stdout, "\n&");
    868       } else if(toPrint == ';') {
    869         fprintf(stdout, "<<");
    870         line+=2;
    871       } else if(toPrint == ',') {
    872         fprintf(stdout, "<<<");
    873         line+=3;
    874       } else {
    875         fprintf(stdout, "%c", toPrint);
    876         line++;
    877       }
    878     } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
    879       fprintf(stdout, "\\u%04X", rules[i]);
    880       line+=6;
    881     }
    882     if(line>72) {
    883       fprintf(stdout, "\n");
    884       line = 0;
    885     }
    886   }
    887 
    888   log_verbose("\n");
    889 
    890 }
    891 */
    892 
    893 static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
    894   uint32_t diffs = 0;
    895   UCollationResult realResult;
    896   uint32_t realStrength;
    897 
    898   uint32_t sLen = u_strlen(first);
    899   uint32_t tLen = u_strlen(second);
    900 
    901   realResult = func(collator, opts, first, sLen, second, tLen);
    902   realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
    903 
    904   if(strength == UCOL_IDENTICAL && realResult != UCOL_IDENTICAL) {
    905     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
    906     diffs++;
    907   } else if(realResult != UCOL_LESS || realStrength != strength) {
    908     logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
    909     diffs++;
    910   }
    911   return diffs;
    912 }
    913 
    914 
    915 static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
    916   const UChar *rules = NULL, *current = NULL;
    917   int32_t ruleLen = 0;
    918   uint32_t strength = 0;
    919   uint32_t chOffset = 0; uint32_t chLen = 0;
    920   uint32_t exOffset = 0; uint32_t exLen = 0;
    921   uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
    922 /*  uint32_t rExpsLen = 0; */
    923   uint32_t firstLen = 0, secondLen = 0;
    924   UBool varT = FALSE; UBool top_ = TRUE;
    925   uint16_t specs = 0;
    926   UBool startOfRules = TRUE;
    927   UColTokenParser src;
    928   UColOptionSet opts;
    929 
    930   UChar first[256];
    931   UChar second[256];
    932   UChar *rulesCopy = NULL;
    933 
    934   uint32_t UCAdiff = 0;
    935   uint32_t Windiff = 1;
    936   UParseError parseError;
    937 
    938   uprv_memset(&src, 0, sizeof(UColTokenParser));
    939   src.opts = &opts;
    940 
    941   rules = ucol_getRules(coll, &ruleLen);
    942 
    943   /*printOutRules(rules);*/
    944 
    945   if(U_SUCCESS(*status) && ruleLen > 0) {
    946     rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
    947     uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
    948     src.current = src.source = rulesCopy;
    949     src.end = rulesCopy+ruleLen;
    950     src.extraCurrent = src.end;
    951     src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
    952     *first = *second = 0;
    953 
    954     /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
    955        the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
    956     while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
    957       strength = src.parsedToken.strength;
    958       chOffset = src.parsedToken.charsOffset;
    959       chLen = src.parsedToken.charsLen;
    960       exOffset = src.parsedToken.extensionOffset;
    961       exLen = src.parsedToken.extensionLen;
    962       prefixOffset = src.parsedToken.prefixOffset;
    963       prefixLen = src.parsedToken.prefixLen;
    964       specs = src.parsedToken.flags;
    965 
    966       startOfRules = FALSE;
    967       varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
    968       top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
    969 
    970       u_strncpy(second,src.source+chOffset, chLen);
    971       second[chLen] = 0;
    972       secondLen = chLen;
    973 
    974       if(exLen > 0) {
    975         u_strncat(first, src.source+exOffset, exLen);
    976         first[firstLen+exLen] = 0;
    977         firstLen += exLen;
    978       }
    979 
    980       if(strength != UCOL_TOK_RESET) {
    981         if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
    982           UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
    983           /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
    984         }
    985       }
    986 
    987 
    988       firstLen = chLen;
    989       u_strcpy(first, second);
    990 
    991     }
    992     if(UCAdiff != 0 && Windiff != 0) {
    993       log_verbose("\n");
    994     }
    995     if(UCAdiff == 0) {
    996       log_verbose("No immediate difference with %s!\n", refName);
    997     }
    998     if(Windiff == 0) {
    999       log_verbose("No immediate difference with Win32!\n");
   1000     }
   1001     uprv_free(src.source);
   1002   }
   1003 }
   1004 
   1005 /*
   1006  * Takes two CEs (lead and continuation) and
   1007  * compares them as CEs should be compared:
   1008  * primary vs. primary, secondary vs. secondary
   1009  * tertiary vs. tertiary
   1010  */
   1011 static int32_t compareCEs(uint32_t s1, uint32_t s2,
   1012                    uint32_t t1, uint32_t t2) {
   1013   uint32_t s = 0, t = 0;
   1014   if(s1 == t1 && s2 == t2) {
   1015     return 0;
   1016   }
   1017   s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
   1018   t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
   1019   if(s < t) {
   1020     return -1;
   1021   } else if(s > t) {
   1022     return 1;
   1023   } else {
   1024     s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
   1025     t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
   1026     if(s < t) {
   1027       return -1;
   1028     } else if(s > t) {
   1029       return 1;
   1030     } else {
   1031       s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
   1032       t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
   1033       if(s < t) {
   1034         return -1;
   1035       } else {
   1036         return 1;
   1037       }
   1038     }
   1039   }
   1040 }
   1041 
   1042 typedef struct {
   1043   uint32_t startCE;
   1044   uint32_t startContCE;
   1045   uint32_t limitCE;
   1046   uint32_t limitContCE;
   1047 } indirectBoundaries;
   1048 
   1049 /* these values are used for finding CE values for indirect positioning. */
   1050 /* Indirect positioning is a mechanism for allowing resets on symbolic   */
   1051 /* values. It only works for resets and you cannot tailor indirect names */
   1052 /* An indirect name can define either an anchor point or a range. An     */
   1053 /* anchor point behaves in exactly the same way as a code point in reset */
   1054 /* would, except that it cannot be tailored. A range (we currently only  */
   1055 /* know for the [top] range will explicitly set the upper bound for      */
   1056 /* generated CEs, thus allowing for better control over how many CEs can */
   1057 /* be squeezed between in the range without performance penalty.         */
   1058 /* In that respect, we use [top] for tailoring of locales that use CJK   */
   1059 /* characters. Other indirect values are currently a pure convenience,   */
   1060 /* they can be used to assure that the CEs will be always positioned in  */
   1061 /* the same place relative to a point with known properties (e.g. first  */
   1062 /* primary ignorable). */
   1063 static indirectBoundaries ucolIndirectBoundaries[15];
   1064 static UBool indirectBoundariesSet = FALSE;
   1065 static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
   1066     /* Set values for the top - TODO: once we have values for all the indirects, we are going */
   1067     /* to initalize here. */
   1068     ucolIndirectBoundaries[indexR].startCE = start[0];
   1069     ucolIndirectBoundaries[indexR].startContCE = start[1];
   1070     if(end) {
   1071         ucolIndirectBoundaries[indexR].limitCE = end[0];
   1072         ucolIndirectBoundaries[indexR].limitContCE = end[1];
   1073     } else {
   1074         ucolIndirectBoundaries[indexR].limitCE = 0;
   1075         ucolIndirectBoundaries[indexR].limitContCE = 0;
   1076     }
   1077 }
   1078 
   1079 static void testCEs(UCollator *coll, UErrorCode *status) {
   1080     const UChar *rules = NULL, *current = NULL;
   1081     int32_t ruleLen = 0;
   1082 
   1083     uint32_t strength = 0;
   1084     uint32_t maxStrength = UCOL_IDENTICAL;
   1085     uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
   1086     uint32_t lastCE;
   1087     uint32_t lastContCE;
   1088 
   1089     int32_t result = 0;
   1090     uint32_t chOffset = 0; uint32_t chLen = 0;
   1091     uint32_t exOffset = 0; uint32_t exLen = 0;
   1092     uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
   1093     uint32_t oldOffset = 0;
   1094 
   1095     /* uint32_t rExpsLen = 0; */
   1096     /* uint32_t firstLen = 0; */
   1097     uint16_t specs = 0;
   1098     UBool varT = FALSE; UBool top_ = TRUE;
   1099     UBool startOfRules = TRUE;
   1100     UBool before = FALSE;
   1101     UColTokenParser src;
   1102     UColOptionSet opts;
   1103     UParseError parseError;
   1104     UChar *rulesCopy = NULL;
   1105     collIterate *c = uprv_new_collIterate(status);
   1106     UCAConstants *consts = NULL;
   1107     uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
   1108         UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
   1109     const char *colLoc;
   1110     UCollator *UCA = ucol_open("root", status);
   1111 
   1112     if (U_FAILURE(*status)) {
   1113         log_err("Could not open root collator %s\n", u_errorName(*status));
   1114         uprv_delete_collIterate(c);
   1115         return;
   1116     }
   1117 
   1118     colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
   1119     if (U_FAILURE(*status)) {
   1120         log_err("Could not get collator name: %s\n", u_errorName(*status));
   1121         ucol_close(UCA);
   1122         uprv_delete_collIterate(c);
   1123         return;
   1124     }
   1125 
   1126     uprv_memset(&src, 0, sizeof(UColTokenParser));
   1127 
   1128     consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
   1129     UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
   1130     /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
   1131     UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
   1132     UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
   1133 
   1134     baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
   1135 
   1136     src.opts = &opts;
   1137 
   1138     rules = ucol_getRules(coll, &ruleLen);
   1139 
   1140     src.invUCA = ucol_initInverseUCA(status);
   1141 
   1142     if(indirectBoundariesSet == FALSE) {
   1143         /* UCOL_RESET_TOP_VALUE */
   1144         setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
   1145         /* UCOL_FIRST_PRIMARY_IGNORABLE */
   1146         setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
   1147         /* UCOL_LAST_PRIMARY_IGNORABLE */
   1148         setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
   1149         /* UCOL_FIRST_SECONDARY_IGNORABLE */
   1150         setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
   1151         /* UCOL_LAST_SECONDARY_IGNORABLE */
   1152         setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
   1153         /* UCOL_FIRST_TERTIARY_IGNORABLE */
   1154         setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
   1155         /* UCOL_LAST_TERTIARY_IGNORABLE */
   1156         setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
   1157         /* UCOL_FIRST_VARIABLE */
   1158         setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
   1159         /* UCOL_LAST_VARIABLE */
   1160         setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
   1161         /* UCOL_FIRST_NON_VARIABLE */
   1162         setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
   1163         /* UCOL_LAST_NON_VARIABLE */
   1164         setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
   1165         /* UCOL_FIRST_IMPLICIT */
   1166         setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
   1167         /* UCOL_LAST_IMPLICIT */
   1168         setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
   1169         /* UCOL_FIRST_TRAILING */
   1170         setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
   1171         /* UCOL_LAST_TRAILING */
   1172         setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
   1173         ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
   1174         indirectBoundariesSet = TRUE;
   1175     }
   1176 
   1177 
   1178     if(U_SUCCESS(*status) && ruleLen > 0) {
   1179         rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
   1180         uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
   1181         src.current = src.source = rulesCopy;
   1182         src.end = rulesCopy+ruleLen;
   1183         src.extraCurrent = src.end;
   1184         src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   1185 
   1186 	    /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
   1187 	       the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
   1188         while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
   1189             strength = src.parsedToken.strength;
   1190             chOffset = src.parsedToken.charsOffset;
   1191             chLen = src.parsedToken.charsLen;
   1192             exOffset = src.parsedToken.extensionOffset;
   1193             exLen = src.parsedToken.extensionLen;
   1194             prefixOffset = src.parsedToken.prefixOffset;
   1195             prefixLen = src.parsedToken.prefixLen;
   1196             specs = src.parsedToken.flags;
   1197 
   1198             startOfRules = FALSE;
   1199             varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
   1200             top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
   1201 
   1202             uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
   1203 
   1204             currCE = ucol_getNextCE(coll, c, status);
   1205             if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
   1206                 log_verbose("Thai prevowel detected. Will pick next CE\n");
   1207                 currCE = ucol_getNextCE(coll, c, status);
   1208             }
   1209 
   1210             currContCE = ucol_getNextCE(coll, c, status);
   1211             if(!isContinuation(currContCE)) {
   1212                 currContCE = 0;
   1213             }
   1214 
   1215             /* we need to repack CEs here */
   1216 
   1217             if(strength == UCOL_TOK_RESET) {
   1218                 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
   1219                 if(top_ == TRUE) {
   1220                     int32_t tokenIndex = src.parsedToken.indirectIndex;
   1221 
   1222                     nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
   1223                     nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
   1224                 } else {
   1225                     nextCE = baseCE = currCE;
   1226                     nextContCE = baseContCE = currContCE;
   1227                 }
   1228                 maxStrength = UCOL_IDENTICAL;
   1229             } else {
   1230                 if(strength < maxStrength) {
   1231                     maxStrength = strength;
   1232                     if(baseCE == UCOL_RESET_TOP_VALUE) {
   1233                         log_verbose("Resetting to [top]\n");
   1234                         nextCE = UCOL_NEXT_TOP_VALUE;
   1235                         nextContCE = UCOL_NEXT_TOP_CONT;
   1236                     } else {
   1237                         result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
   1238                     }
   1239                     if(result < 0) {
   1240                         if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
   1241                             log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
   1242                             return;
   1243                         } else {
   1244                             log_err("%s: couldn't find the CE\n", colLoc);
   1245                             return;
   1246                         }
   1247                     }
   1248                 }
   1249 
   1250                 currCE &= 0xFFFFFF3F;
   1251                 currContCE &= 0xFFFFFFBF;
   1252 
   1253                 if(maxStrength == UCOL_IDENTICAL) {
   1254                     if(baseCE != currCE || baseContCE != currContCE) {
   1255                         log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
   1256                     }
   1257                 } else {
   1258                     if(strength == UCOL_IDENTICAL) {
   1259                         if(lastCE != currCE || lastContCE != currContCE) {
   1260                             log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
   1261                         }
   1262                     } else {
   1263                         if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
   1264                             /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
   1265                             log_err("%s: current CE is not less than base CE\n", colLoc);
   1266                         }
   1267                         if(!before) {
   1268                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
   1269                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
   1270                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
   1271                             }
   1272                         } else {
   1273                             before = FALSE;
   1274                             if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
   1275                                 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
   1276                                 log_err("%s: sequence of generated CEs is broken\n", colLoc);
   1277                             }
   1278                         }
   1279                     }
   1280                 }
   1281 
   1282             }
   1283 
   1284             oldOffset = chOffset;
   1285             lastCE = currCE & 0xFFFFFF3F;
   1286             lastContCE = currContCE & 0xFFFFFFBF;
   1287         }
   1288         uprv_free(src.source);
   1289     }
   1290     ucol_close(UCA);
   1291     uprv_delete_collIterate(c);
   1292 }
   1293 
   1294 #if 0
   1295 /* these locales are now picked from index RB */
   1296 static const char* localesToTest[] = {
   1297 "ar", "bg", "ca", "cs", "da",
   1298 "el", "en_BE", "en_US_POSIX",
   1299 "es", "et", "fi", "fr", "hi",
   1300 "hr", "hu", "is", "iw", "ja",
   1301 "ko", "lt", "lv", "mk", "mt",
   1302 "nb", "nn", "nn_NO", "pl", "ro",
   1303 "ru", "sh", "sk", "sl", "sq",
   1304 "sr", "sv", "th", "tr", "uk",
   1305 "vi", "zh", "zh_TW"
   1306 };
   1307 #endif
   1308 
   1309 static const char* rulesToTest[] = {
   1310   /* Funky fa rule */
   1311   "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
   1312   /*"& Z < p, P",*/
   1313     /* Cui Mins rules */
   1314     "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
   1315     "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
   1316     "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
   1317     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
   1318     "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
   1319     "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
   1320     "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U"  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
   1321 };
   1322 
   1323 
   1324 static void TestCollations(void) {
   1325     int32_t noOfLoc = uloc_countAvailable();
   1326     int32_t i = 0, j = 0;
   1327 
   1328     UErrorCode status = U_ZERO_ERROR;
   1329     char cName[256];
   1330     UChar name[256];
   1331     int32_t nameSize;
   1332 
   1333 
   1334     const char *locName = NULL;
   1335     UCollator *coll = NULL;
   1336     UCollator *UCA = ucol_open("", &status);
   1337     UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
   1338     if (U_FAILURE(status)) {
   1339         log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
   1340         return;
   1341     }
   1342     ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
   1343 
   1344     for(i = 0; i<noOfLoc; i++) {
   1345         status = U_ZERO_ERROR;
   1346         locName = uloc_getAvailable(i);
   1347         if(uprv_strcmp("ja", locName) == 0) {
   1348             log_verbose("Don't know how to test prefixes\n");
   1349             continue;
   1350         }
   1351         if(hasCollationElements(locName)) {
   1352             nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
   1353             for(j = 0; j<nameSize; j++) {
   1354                 cName[j] = (char)name[j];
   1355             }
   1356             cName[nameSize] = 0;
   1357             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
   1358             coll = ucol_open(locName, &status);
   1359             if(U_SUCCESS(status)) {
   1360                 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
   1361                 ucol_close(coll);
   1362             } else {
   1363                 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
   1364                 status = U_ZERO_ERROR;
   1365             }
   1366         }
   1367     }
   1368     ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
   1369     ucol_close(UCA);
   1370 }
   1371 
   1372 static void RamsRulesTest(void) {
   1373     UErrorCode status = U_ZERO_ERROR;
   1374     int32_t i = 0;
   1375     UCollator *coll = NULL;
   1376     UChar rule[2048];
   1377     uint32_t ruleLen;
   1378     int32_t noOfLoc = uloc_countAvailable();
   1379     const char *locName = NULL;
   1380 
   1381     log_verbose("RamsRulesTest\n");
   1382 
   1383     if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
   1384         /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
   1385         return;
   1386     }
   1387 
   1388     for(i = 0; i<noOfLoc; i++) {
   1389         locName = uloc_getAvailable(i);
   1390         if(hasCollationElements(locName)) {
   1391             if (uprv_strcmp("ja", locName)==0) {
   1392                 log_verbose("Don't know how to test Japanese because of prefixes\n");
   1393                 continue;
   1394             }
   1395             if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
   1396                 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
   1397                 continue;
   1398             }
   1399             if (uprv_strcmp("bn", locName)==0 ||
   1400                 uprv_strcmp("en_US_POSIX", locName)==0 ||
   1401                 uprv_strcmp("km", locName)==0 ||
   1402                 uprv_strcmp("km_KH", locName)==0 ||
   1403                 uprv_strcmp("my", locName)==0 ||
   1404                 uprv_strcmp("si", locName)==0 ||
   1405                 uprv_strcmp("si_LK", locName)==0 ||
   1406                 uprv_strcmp("zh", locName)==0 ||
   1407                 uprv_strcmp("zh_Hant", locName)==0
   1408             ) {
   1409                 log_verbose("Don't know how to test %s. "
   1410                             "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
   1411                 continue;
   1412             }
   1413             log_verbose("Testing locale %s\n", locName);
   1414             status = U_ZERO_ERROR;
   1415             coll = ucol_open(locName, &status);
   1416             if(U_SUCCESS(status)) {
   1417               if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
   1418                 if(coll->image->jamoSpecial == TRUE) {
   1419                   log_err("%s has special JAMOs\n", locName);
   1420                 }
   1421                 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
   1422                 testCollator(coll, &status);
   1423                 testCEs(coll, &status);
   1424               } else {
   1425                 log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
   1426               }
   1427               ucol_close(coll);
   1428             } else {
   1429               log_err("Could not open %s: %s\n", locName, u_errorName(status));
   1430             }
   1431         }
   1432     }
   1433 
   1434     for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
   1435         log_verbose("Testing rule: %s\n", rulesToTest[i]);
   1436         ruleLen = u_unescape(rulesToTest[i], rule, 2048);
   1437         status = U_ZERO_ERROR;
   1438         coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   1439         if(U_SUCCESS(status)) {
   1440             testCollator(coll, &status);
   1441             testCEs(coll, &status);
   1442             ucol_close(coll);
   1443         } else {
   1444           log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
   1445         }
   1446     }
   1447 
   1448 }
   1449 
   1450 static void IsTailoredTest(void) {
   1451     UErrorCode status = U_ZERO_ERROR;
   1452     uint32_t i = 0;
   1453     UCollator *coll = NULL;
   1454     UChar rule[2048];
   1455     UChar tailored[2048];
   1456     UChar notTailored[2048];
   1457     uint32_t ruleLen, tailoredLen, notTailoredLen;
   1458 
   1459     log_verbose("IsTailoredTest\n");
   1460 
   1461     u_uastrcpy(rule, "&Z < A, B, C;c < d");
   1462     ruleLen = u_strlen(rule);
   1463 
   1464     u_uastrcpy(tailored, "ABCcd");
   1465     tailoredLen = u_strlen(tailored);
   1466 
   1467     u_uastrcpy(notTailored, "ZabD");
   1468     notTailoredLen = u_strlen(notTailored);
   1469 
   1470     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   1471     if(U_SUCCESS(status)) {
   1472         for(i = 0; i<tailoredLen; i++) {
   1473             if(!ucol_isTailored(coll, tailored[i], &status)) {
   1474                 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
   1475             }
   1476         }
   1477         for(i = 0; i<notTailoredLen; i++) {
   1478             if(ucol_isTailored(coll, notTailored[i], &status)) {
   1479                 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
   1480             }
   1481         }
   1482         ucol_close(coll);
   1483     }
   1484     else {
   1485         log_err_status(status, "Can't tailor rules\n");
   1486     }
   1487     /* Code coverage */
   1488     status = U_ZERO_ERROR;
   1489     coll = ucol_open("ja", &status);
   1490     if(!ucol_isTailored(coll, 0x4E9C, &status)) {
   1491         log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
   1492     }
   1493     ucol_close(coll);
   1494 }
   1495 
   1496 
   1497 const static char chTest[][20] = {
   1498   "c",
   1499   "C",
   1500   "ca", "cb", "cx", "cy", "CZ",
   1501   "c\\u030C", "C\\u030C",
   1502   "h",
   1503   "H",
   1504   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
   1505   "ch", "cH", "Ch", "CH",
   1506   "cha", "charly", "che", "chh", "chch", "chr",
   1507   "i", "I", "iarly",
   1508   "r", "R",
   1509   "r\\u030C", "R\\u030C",
   1510   "s",
   1511   "S",
   1512   "s\\u030C", "S\\u030C",
   1513   "z", "Z",
   1514   "z\\u030C", "Z\\u030C"
   1515 };
   1516 
   1517 static void TestChMove(void) {
   1518     UChar t1[256] = {0};
   1519     UChar t2[256] = {0};
   1520 
   1521     uint32_t i = 0, j = 0;
   1522     uint32_t size = 0;
   1523     UErrorCode status = U_ZERO_ERROR;
   1524 
   1525     UCollator *coll = ucol_open("cs", &status);
   1526 
   1527     if(U_SUCCESS(status)) {
   1528         size = sizeof(chTest)/sizeof(chTest[0]);
   1529         for(i = 0; i < size-1; i++) {
   1530             for(j = i+1; j < size; j++) {
   1531                 u_unescape(chTest[i], t1, 256);
   1532                 u_unescape(chTest[j], t2, 256);
   1533                 doTest(coll, t1, t2, UCOL_LESS);
   1534             }
   1535         }
   1536     }
   1537     else {
   1538         log_data_err("Can't open collator");
   1539     }
   1540     ucol_close(coll);
   1541 }
   1542 
   1543 
   1544 
   1545 
   1546 const static char impTest[][20] = {
   1547   "\\u4e00",
   1548     "a",
   1549     "A",
   1550     "b",
   1551     "B",
   1552     "\\u4e01"
   1553 };
   1554 
   1555 
   1556 static void TestImplicitTailoring(void) {
   1557   static const struct {
   1558     const char *rules;
   1559     const char *data[10];
   1560     const uint32_t len;
   1561   } tests[] = {
   1562       { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
   1563       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
   1564       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
   1565       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
   1566   };
   1567 
   1568   int32_t i = 0;
   1569 
   1570   for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   1571       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   1572   }
   1573 
   1574 /*
   1575   UChar t1[256] = {0};
   1576   UChar t2[256] = {0};
   1577 
   1578   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
   1579 
   1580   uint32_t i = 0, j = 0;
   1581   uint32_t size = 0;
   1582   uint32_t ruleLen = 0;
   1583   UErrorCode status = U_ZERO_ERROR;
   1584   UCollator *coll = NULL;
   1585   ruleLen = u_unescape(rule, t1, 256);
   1586 
   1587   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
   1588 
   1589   if(U_SUCCESS(status)) {
   1590     size = sizeof(impTest)/sizeof(impTest[0]);
   1591     for(i = 0; i < size-1; i++) {
   1592       for(j = i+1; j < size; j++) {
   1593         u_unescape(impTest[i], t1, 256);
   1594         u_unescape(impTest[j], t2, 256);
   1595         doTest(coll, t1, t2, UCOL_LESS);
   1596       }
   1597     }
   1598   }
   1599   else {
   1600     log_err("Can't open collator");
   1601   }
   1602   ucol_close(coll);
   1603   */
   1604 }
   1605 
   1606 static void TestFCDProblem(void) {
   1607   UChar t1[256] = {0};
   1608   UChar t2[256] = {0};
   1609 
   1610   const char *s1 = "\\u0430\\u0306\\u0325";
   1611   const char *s2 = "\\u04D1\\u0325";
   1612 
   1613   UErrorCode status = U_ZERO_ERROR;
   1614   UCollator *coll = ucol_open("", &status);
   1615   u_unescape(s1, t1, 256);
   1616   u_unescape(s2, t2, 256);
   1617 
   1618   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
   1619   doTest(coll, t1, t2, UCOL_EQUAL);
   1620 
   1621   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   1622   doTest(coll, t1, t2, UCOL_EQUAL);
   1623 
   1624   ucol_close(coll);
   1625 }
   1626 
   1627 /*
   1628 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
   1629 We're only using NFC/NFD in this test.
   1630 */
   1631 #define NORM_BUFFER_TEST_LEN 18
   1632 typedef struct {
   1633   UChar32 u;
   1634   UChar NFC[NORM_BUFFER_TEST_LEN];
   1635   UChar NFD[NORM_BUFFER_TEST_LEN];
   1636 } tester;
   1637 
   1638 static void TestComposeDecompose(void) {
   1639     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
   1640     static const UChar UNICODESET_STR[] = {
   1641         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
   1642         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
   1643         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
   1644     };
   1645     int32_t noOfLoc;
   1646     int32_t i = 0, j = 0;
   1647 
   1648     UErrorCode status = U_ZERO_ERROR;
   1649     const char *locName = NULL;
   1650     uint32_t nfcSize;
   1651     uint32_t nfdSize;
   1652     tester **t;
   1653     uint32_t noCases = 0;
   1654     UCollator *coll = NULL;
   1655     UChar32 u = 0;
   1656     UChar comp[NORM_BUFFER_TEST_LEN];
   1657     uint32_t len = 0;
   1658     UCollationElements *iter;
   1659     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
   1660     int32_t charsToTestSize;
   1661 
   1662     noOfLoc = uloc_countAvailable();
   1663 
   1664     coll = ucol_open("", &status);
   1665     if (U_FAILURE(status)) {
   1666         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
   1667         return;
   1668     }
   1669     charsToTestSize = uset_size(charsToTest);
   1670     if (charsToTestSize <= 0) {
   1671         log_err("Set was zero. Missing data?\n");
   1672         return;
   1673     }
   1674     t = malloc(charsToTestSize * sizeof(tester *));
   1675     t[0] = (tester *)malloc(sizeof(tester));
   1676     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
   1677 
   1678     for(u = 0; u < charsToTestSize; u++) {
   1679         UChar32 ch = uset_charAt(charsToTest, u);
   1680         len = 0;
   1681         UTF_APPEND_CHAR_UNSAFE(comp, len, ch);
   1682         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
   1683         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
   1684 
   1685         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
   1686           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
   1687             t[noCases]->u = ch;
   1688             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
   1689                 u_strncpy(t[noCases]->NFC, comp, len);
   1690                 t[noCases]->NFC[len] = 0;
   1691             }
   1692             noCases++;
   1693             t[noCases] = (tester *)malloc(sizeof(tester));
   1694             uprv_memset(t[noCases], 0, sizeof(tester));
   1695         }
   1696     }
   1697     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
   1698     uset_close(charsToTest);
   1699     charsToTest = NULL;
   1700 
   1701     for(u=0; u<(UChar32)noCases; u++) {
   1702         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
   1703             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
   1704             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
   1705         }
   1706     }
   1707     /*
   1708     for(u = 0; u < charsToTestSize; u++) {
   1709       if(!(u&0xFFFF)) {
   1710         log_verbose("%08X ", u);
   1711       }
   1712       uprv_memset(t[noCases], 0, sizeof(tester));
   1713       t[noCases]->u = u;
   1714       len = 0;
   1715       UTF_APPEND_CHAR_UNSAFE(comp, len, u);
   1716       comp[len] = 0;
   1717       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
   1718       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
   1719       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
   1720       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
   1721     }
   1722     */
   1723 
   1724     ucol_close(coll);
   1725 
   1726     log_verbose("Testing locales, number of cases = %i\n", noCases);
   1727     for(i = 0; i<noOfLoc; i++) {
   1728         status = U_ZERO_ERROR;
   1729         locName = uloc_getAvailable(i);
   1730         if(hasCollationElements(locName)) {
   1731             char cName[256];
   1732             UChar name[256];
   1733             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
   1734 
   1735             for(j = 0; j<nameSize; j++) {
   1736                 cName[j] = (char)name[j];
   1737             }
   1738             cName[nameSize] = 0;
   1739             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
   1740 
   1741             coll = ucol_open(locName, &status);
   1742             ucol_setStrength(coll, UCOL_IDENTICAL);
   1743             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
   1744 
   1745             for(u=0; u<(UChar32)noCases; u++) {
   1746                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
   1747                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
   1748                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
   1749                     log_verbose("Testing NFC\n");
   1750                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
   1751                     backAndForth(iter);
   1752                     log_verbose("Testing NFD\n");
   1753                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
   1754                     backAndForth(iter);
   1755                 }
   1756             }
   1757             ucol_closeElements(iter);
   1758             ucol_close(coll);
   1759         }
   1760     }
   1761     for(u = 0; u <= (UChar32)noCases; u++) {
   1762         free(t[u]);
   1763     }
   1764     free(t);
   1765 }
   1766 
   1767 static void TestEmptyRule(void) {
   1768   UErrorCode status = U_ZERO_ERROR;
   1769   UChar rulez[] = { 0 };
   1770   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
   1771 
   1772   ucol_close(coll);
   1773 }
   1774 
   1775 static void TestUCARules(void) {
   1776   UErrorCode status = U_ZERO_ERROR;
   1777   UChar b[256];
   1778   UChar *rules = b;
   1779   uint32_t ruleLen = 0;
   1780   UCollator *UCAfromRules = NULL;
   1781   UCollator *coll = ucol_open("", &status);
   1782   if(status == U_FILE_ACCESS_ERROR) {
   1783     log_data_err("Is your data around?\n");
   1784     return;
   1785   } else if(U_FAILURE(status)) {
   1786     log_err("Error opening collator\n");
   1787     return;
   1788   }
   1789   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
   1790 
   1791   log_verbose("TestUCARules\n");
   1792   if(ruleLen > 256) {
   1793     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
   1794     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
   1795   }
   1796   log_verbose("Rules length is %d\n", ruleLen);
   1797   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   1798   if(U_SUCCESS(status)) {
   1799     ucol_close(UCAfromRules);
   1800   } else {
   1801     log_verbose("Unable to create a collator from UCARules!\n");
   1802   }
   1803 /*
   1804   u_unescape(blah, b, 256);
   1805   ucol_getSortKey(coll, b, 1, res, 256);
   1806 */
   1807   ucol_close(coll);
   1808   if(rules != b) {
   1809     free(rules);
   1810   }
   1811 }
   1812 
   1813 
   1814 /* Pinyin tonal order */
   1815 /*
   1816     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
   1817           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
   1818     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
   1819     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
   1820     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
   1821     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
   1822       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
   1823 .. (\u00fc)
   1824 
   1825 However, in testing we got the following order:
   1826     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
   1827           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
   1828     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
   1829 .. (\u0113)
   1830     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
   1831     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
   1832     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
   1833 .. (\u01d8)
   1834       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
   1835 */
   1836 
   1837 static void TestBefore(void) {
   1838   const static char *data[] = {
   1839       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
   1840       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
   1841       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
   1842       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
   1843       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
   1844       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
   1845   };
   1846   genericRulesStarter(
   1847     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
   1848     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
   1849     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
   1850     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
   1851     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
   1852     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
   1853     data, sizeof(data)/sizeof(data[0]));
   1854 }
   1855 
   1856 #if 0
   1857 /* superceded by TestBeforePinyin */
   1858 static void TestJ784(void) {
   1859   const static char *data[] = {
   1860       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
   1861       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
   1862       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
   1863       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
   1864       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
   1865       "\\u00fc",
   1866            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
   1867   };
   1868   genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
   1869 }
   1870 #endif
   1871 
   1872 #if 0
   1873 /* superceded by the changes to the lv locale */
   1874 static void TestJ831(void) {
   1875   const static char *data[] = {
   1876     "I",
   1877       "i",
   1878       "Y",
   1879       "y"
   1880   };
   1881   genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
   1882 }
   1883 #endif
   1884 
   1885 static void TestJ815(void) {
   1886   const static char *data[] = {
   1887     "aa",
   1888       "Aa",
   1889       "ab",
   1890       "Ab",
   1891       "ad",
   1892       "Ad",
   1893       "ae",
   1894       "Ae",
   1895       "\\u00e6",
   1896       "\\u00c6",
   1897       "af",
   1898       "Af",
   1899       "b",
   1900       "B"
   1901   };
   1902   genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
   1903   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
   1904 }
   1905 
   1906 
   1907 /*
   1908 "& a < b < c < d& r < c",                                   "& a < b < d& r < c",
   1909 "& a < b < c < d& c < m",                                   "& a < b < c < m < d",
   1910 "& a < b < c < d& a < m",                                   "& a < m < b < c < d",
   1911 "& a <<< b << c < d& a < m",                                "& a <<< b << c < m < d",
   1912 "& a < b < c < d& [before 1] c < m",                        "& a < b < m < c < d",
   1913 "& a < b <<< c << d <<< e& [before 3] e <<< x",            "& a < b <<< c << d <<< x <<< e",
   1914 "& a < b <<< c << d <<< e& [before 2] e <<< x",            "& a < b <<< c <<< x << d <<< e",
   1915 "& a < b <<< c << d <<< e& [before 1] e <<< x",            "& a <<< x < b <<< c << d <<< e",
   1916 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",    "& a < b <<< c << d <<< e <<< f < x < g",
   1917 */
   1918 static void TestRedundantRules(void) {
   1919   int32_t i;
   1920 
   1921   static const struct {
   1922       const char *rules;
   1923       const char *expectedRules;
   1924       const char *testdata[8];
   1925       uint32_t testdatalen;
   1926   } tests[] = {
   1927     /* this test conflicts with positioning of CODAN placeholder */
   1928        /*{
   1929         "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
   1930         "&\\u2089<<<x",
   1931         {"\\u2089", "x"}, 2
   1932        }, */
   1933     /* this test conflicts with the [before x] syntax tightening */
   1934       /*{
   1935         "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
   1936         "&\\u0252<<<x",
   1937         {"\\u0252", "x"}, 2
   1938       }, */
   1939     /* this test conflicts with the [before x] syntax tightening */
   1940       /*{
   1941          "& a < b <<< c << d <<< e& [before 1] e <<< x",
   1942          "& a <<< x < b <<< c << d <<< e",
   1943         {"a", "x", "b", "c", "d", "e"}, 6
   1944       }, */
   1945       {
   1946         "& a < b < c < d& [before 1] c < m",
   1947         "& a < b < m < c < d",
   1948         {"a", "b", "m", "c", "d"}, 5
   1949       },
   1950       {
   1951         "& a < b <<< c << d <<< e& [before 3] e <<< x",
   1952         "& a < b <<< c << d <<< x <<< e",
   1953         {"a", "b", "c", "d", "x", "e"}, 6
   1954       },
   1955     /* this test conflicts with the [before x] syntax tightening */
   1956       /* {
   1957         "& a < b <<< c << d <<< e& [before 2] e <<< x",
   1958         "& a < b <<< c <<< x << d <<< e",
   1959         {"a", "b", "c", "x", "d", "e"},, 6
   1960       }, */
   1961       {
   1962         "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
   1963         "& a < b <<< c << d <<< e <<< f < x < g",
   1964         {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
   1965       },
   1966       {
   1967         "& a <<< b << c < d& a < m",
   1968         "& a <<< b << c < m < d",
   1969         {"a", "b", "c", "m", "d"}, 5
   1970       },
   1971       {
   1972         "&a<b<<b\\u0301 &z<b",
   1973         "&a<b\\u0301 &z<b",
   1974         {"a", "b\\u0301", "z", "b"}, 4
   1975       },
   1976       {
   1977         "&z<m<<<q<<<m",
   1978         "&z<q<<<m",
   1979         {"z", "q", "m"},3
   1980       },
   1981       {
   1982         "&z<<<m<q<<<m",
   1983         "&z<q<<<m",
   1984         {"z", "q", "m"}, 3
   1985       },
   1986       {
   1987         "& a < b < c < d& r < c",
   1988         "& a < b < d& r < c",
   1989         {"a", "b", "d"}, 3
   1990       },
   1991       {
   1992         "& a < b < c < d& r < c",
   1993         "& a < b < d& r < c",
   1994         {"r", "c"}, 2
   1995       },
   1996       {
   1997         "& a < b < c < d& c < m",
   1998         "& a < b < c < m < d",
   1999         {"a", "b", "c", "m", "d"}, 5
   2000       },
   2001       {
   2002         "& a < b < c < d& a < m",
   2003         "& a < m < b < c < d",
   2004         {"a", "m", "b", "c", "d"}, 5
   2005       }
   2006   };
   2007 
   2008 
   2009   UCollator *credundant = NULL;
   2010   UCollator *cresulting = NULL;
   2011   UErrorCode status = U_ZERO_ERROR;
   2012   UChar rlz[2048] = { 0 };
   2013   uint32_t rlen = 0;
   2014 
   2015   for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
   2016     log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
   2017     rlen = u_unescape(tests[i].rules, rlz, 2048);
   2018 
   2019     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
   2020     if(status == U_FILE_ACCESS_ERROR) {
   2021       log_data_err("Is your data around?\n");
   2022       return;
   2023     } else if(U_FAILURE(status)) {
   2024       log_err("Error opening collator\n");
   2025       return;
   2026     }
   2027 
   2028     rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
   2029     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
   2030 
   2031     testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
   2032 
   2033     ucol_close(credundant);
   2034     ucol_close(cresulting);
   2035 
   2036     log_verbose("testing using data\n");
   2037 
   2038     genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
   2039   }
   2040 
   2041 }
   2042 
   2043 static void TestExpansionSyntax(void) {
   2044   int32_t i;
   2045 
   2046   const static char *rules[] = {
   2047     "&AE <<< a << b <<< c &d <<< f",
   2048     "&AE <<< a <<< b << c << d < e < f <<< g",
   2049     "&AE <<< B <<< C / D <<< F"
   2050   };
   2051 
   2052   const static char *expectedRules[] = {
   2053     "&A <<< a / E << b / E <<< c /E  &d <<< f",
   2054     "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
   2055     "&A <<< B / E <<< C / ED <<< F / E"
   2056   };
   2057 
   2058   const static char *testdata[][8] = {
   2059     {"AE", "a", "b", "c"},
   2060     {"AE", "a", "b", "c", "d", "e", "f", "g"},
   2061     {"AE", "B", "C"} /* / ED <<< F / E"},*/
   2062   };
   2063 
   2064   const static uint32_t testdatalen[] = {
   2065       4,
   2066       8,
   2067       3
   2068   };
   2069 
   2070 
   2071 
   2072   UCollator *credundant = NULL;
   2073   UCollator *cresulting = NULL;
   2074   UErrorCode status = U_ZERO_ERROR;
   2075   UChar rlz[2048] = { 0 };
   2076   uint32_t rlen = 0;
   2077 
   2078   for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
   2079     log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
   2080     rlen = u_unescape(rules[i], rlz, 2048);
   2081 
   2082     credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2083     if(status == U_FILE_ACCESS_ERROR) {
   2084       log_data_err("Is your data around?\n");
   2085       return;
   2086     } else if(U_FAILURE(status)) {
   2087       log_err("Error opening collator\n");
   2088       return;
   2089     }
   2090     rlen = u_unescape(expectedRules[i], rlz, 2048);
   2091     cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
   2092 
   2093     /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
   2094     /* as a hard error test, but only in information mode */
   2095     testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
   2096 
   2097     ucol_close(credundant);
   2098     ucol_close(cresulting);
   2099 
   2100     log_verbose("testing using data\n");
   2101 
   2102     genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
   2103   }
   2104 }
   2105 
   2106 static void TestCase(void)
   2107 {
   2108     const static UChar gRules[MAX_TOKEN_LEN] =
   2109     /*" & 0 < 1,\u2461<a,A"*/
   2110     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
   2111 
   2112     const static UChar testCase[][MAX_TOKEN_LEN] =
   2113     {
   2114         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
   2115         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
   2116         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
   2117         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
   2118     };
   2119 
   2120     const static UCollationResult caseTestResults[][9] =
   2121     {
   2122         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
   2123         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
   2124         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
   2125         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
   2126     };
   2127 
   2128     const static UColAttributeValue caseTestAttributes[][2] =
   2129     {
   2130         { UCOL_LOWER_FIRST, UCOL_OFF},
   2131         { UCOL_UPPER_FIRST, UCOL_OFF},
   2132         { UCOL_LOWER_FIRST, UCOL_ON},
   2133         { UCOL_UPPER_FIRST, UCOL_ON}
   2134     };
   2135     int32_t i,j,k;
   2136     UErrorCode status = U_ZERO_ERROR;
   2137     UCollationElements *iter;
   2138     UCollator  *myCollation;
   2139     myCollation = ucol_open("en_US", &status);
   2140 
   2141     if(U_FAILURE(status)){
   2142         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   2143         return;
   2144     }
   2145     log_verbose("Testing different case settings\n");
   2146     ucol_setStrength(myCollation, UCOL_TERTIARY);
   2147 
   2148     for(k = 0; k<4; k++) {
   2149       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
   2150       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
   2151       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
   2152       for (i = 0; i < 3 ; i++) {
   2153         for(j = i+1; j<4; j++) {
   2154           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
   2155         }
   2156       }
   2157     }
   2158     ucol_close(myCollation);
   2159 
   2160     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
   2161     if(U_FAILURE(status)){
   2162         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   2163         return;
   2164     }
   2165     log_verbose("Testing different case settings with custom rules\n");
   2166     ucol_setStrength(myCollation, UCOL_TERTIARY);
   2167 
   2168     for(k = 0; k<4; k++) {
   2169       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
   2170       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
   2171       for (i = 0; i < 3 ; i++) {
   2172         for(j = i+1; j<4; j++) {
   2173           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
   2174           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
   2175           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
   2176           backAndForth(iter);
   2177           ucol_closeElements(iter);
   2178           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
   2179           backAndForth(iter);
   2180           ucol_closeElements(iter);
   2181         }
   2182       }
   2183     }
   2184     ucol_close(myCollation);
   2185     {
   2186       const static char *lowerFirst[] = {
   2187         "h",
   2188         "H",
   2189         "ch",
   2190         "Ch",
   2191         "CH",
   2192         "cha",
   2193         "chA",
   2194         "Cha",
   2195         "ChA",
   2196         "CHa",
   2197         "CHA",
   2198         "i",
   2199         "I"
   2200       };
   2201 
   2202       const static char *upperFirst[] = {
   2203         "H",
   2204         "h",
   2205         "CH",
   2206         "Ch",
   2207         "ch",
   2208         "CHA",
   2209         "CHa",
   2210         "ChA",
   2211         "Cha",
   2212         "chA",
   2213         "cha",
   2214         "I",
   2215         "i"
   2216       };
   2217       log_verbose("mixed case test\n");
   2218       log_verbose("lower first, case level off\n");
   2219       genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
   2220       log_verbose("upper first, case level off\n");
   2221       genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
   2222       log_verbose("lower first, case level on\n");
   2223       genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
   2224       log_verbose("upper first, case level on\n");
   2225       genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
   2226     }
   2227 
   2228 }
   2229 
   2230 static void TestIncrementalNormalize(void) {
   2231 
   2232     /*UChar baseA     =0x61;*/
   2233     UChar baseA     =0x41;
   2234 /*    UChar baseB     = 0x42;*/
   2235     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
   2236     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
   2237     /*
   2238         0x316 is combining grave accent below, cc=220
   2239         0x321 is combining palatalized hook below, cc=202
   2240         0x300 is combining grave accent, cc=230
   2241     */
   2242 
   2243 #define MAXSLEN 2000
   2244     /*int          maxSLen   = 64000;*/
   2245     int          sLen;
   2246     int          i;
   2247 
   2248     UCollator        *coll;
   2249     UErrorCode       status = U_ZERO_ERROR;
   2250     UCollationResult result;
   2251 
   2252     int32_t myQ = getTestOption(QUICK_OPTION);
   2253 
   2254     if(getTestOption(QUICK_OPTION) < 0) {
   2255         setTestOption(QUICK_OPTION, 1);
   2256     }
   2257 
   2258     {
   2259         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
   2260         /*          most buffers along the way.*/
   2261         UChar            strA[MAXSLEN+1];
   2262         UChar            strB[MAXSLEN+1];
   2263 
   2264         coll = ucol_open("en_US", &status);
   2265         if(status == U_FILE_ACCESS_ERROR) {
   2266           log_data_err("Is your data around?\n");
   2267           return;
   2268         } else if(U_FAILURE(status)) {
   2269           log_err("Error opening collator\n");
   2270           return;
   2271         }
   2272         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   2273 
   2274         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
   2275         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
   2276         /*for (sLen = 1000; sLen<1001; sLen++) {*/
   2277         for (sLen = 500; sLen<501; sLen++) {
   2278         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
   2279             strA[0] = baseA;
   2280             strB[0] = baseA;
   2281             for (i=1; i<=sLen-1; i++) {
   2282                 strA[i] = ccMix[i % 3];
   2283                 strB[sLen-i] = ccMix[i % 3];
   2284             }
   2285             strA[sLen]   = 0;
   2286             strB[sLen]   = 0;
   2287 
   2288             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
   2289             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
   2290             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
   2291             doTest(coll, strA, strB, UCOL_EQUAL);
   2292         }
   2293     }
   2294 
   2295     setTestOption(QUICK_OPTION, myQ);
   2296 
   2297 
   2298     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
   2299     /*         of the string.  Checks a couple of edge cases.*/
   2300 
   2301     {
   2302         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
   2303         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
   2304         ucol_setStrength(coll, UCOL_TERTIARY);
   2305         doTest(coll, strA, strB, UCOL_EQUAL);
   2306     }
   2307 
   2308     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
   2309 
   2310     {
   2311       /* New UCA  3.1.1.
   2312        * test below used a code point from Desseret, which sorts differently
   2313        * than d800 dc00
   2314        */
   2315         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
   2316         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
   2317         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
   2318         ucol_setStrength(coll, UCOL_TERTIARY);
   2319         doTest(coll, strA, strB, UCOL_GREATER);
   2320     }
   2321 
   2322     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
   2323 
   2324     {
   2325         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
   2326         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
   2327         char  sortKeyA[50];
   2328         char  sortKeyAz[50];
   2329         char  sortKeyB[50];
   2330         char  sortKeyBz[50];
   2331         int   r;
   2332 
   2333         /* there used to be -3 here. Hmmmm.... */
   2334         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
   2335         result = ucol_strcoll(coll, strA, 3, strB, 3);
   2336         if (result != UCOL_GREATER) {
   2337             log_err("ERROR 1 in test 4\n");
   2338         }
   2339         result = ucol_strcoll(coll, strA, -1, strB, -1);
   2340         if (result != UCOL_EQUAL) {
   2341             log_err("ERROR 2 in test 4\n");
   2342         }
   2343 
   2344         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2345         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2346         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2347         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2348 
   2349         r = strcmp(sortKeyA, sortKeyAz);
   2350         if (r <= 0) {
   2351             log_err("Error 3 in test 4\n");
   2352         }
   2353         r = strcmp(sortKeyA, sortKeyB);
   2354         if (r <= 0) {
   2355             log_err("Error 4 in test 4\n");
   2356         }
   2357         r = strcmp(sortKeyAz, sortKeyBz);
   2358         if (r != 0) {
   2359             log_err("Error 5 in test 4\n");
   2360         }
   2361 
   2362         ucol_setStrength(coll, UCOL_IDENTICAL);
   2363         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2364         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2365         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2366         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2367 
   2368         r = strcmp(sortKeyA, sortKeyAz);
   2369         if (r <= 0) {
   2370             log_err("Error 6 in test 4\n");
   2371         }
   2372         r = strcmp(sortKeyA, sortKeyB);
   2373         if (r <= 0) {
   2374             log_err("Error 7 in test 4\n");
   2375         }
   2376         r = strcmp(sortKeyAz, sortKeyBz);
   2377         if (r != 0) {
   2378             log_err("Error 8 in test 4\n");
   2379         }
   2380         ucol_setStrength(coll, UCOL_TERTIARY);
   2381     }
   2382 
   2383 
   2384     /*  Test 5:  Null characters in non-normal source strings.*/
   2385 
   2386     {
   2387         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
   2388         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
   2389         char  sortKeyA[50];
   2390         char  sortKeyAz[50];
   2391         char  sortKeyB[50];
   2392         char  sortKeyBz[50];
   2393         int   r;
   2394 
   2395         result = ucol_strcoll(coll, strA, 6, strB, 6);
   2396         if (result != UCOL_GREATER) {
   2397             log_err("ERROR 1 in test 5\n");
   2398         }
   2399         result = ucol_strcoll(coll, strA, -1, strB, -1);
   2400         if (result != UCOL_EQUAL) {
   2401             log_err("ERROR 2 in test 5\n");
   2402         }
   2403 
   2404         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2405         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2406         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2407         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2408 
   2409         r = strcmp(sortKeyA, sortKeyAz);
   2410         if (r <= 0) {
   2411             log_err("Error 3 in test 5\n");
   2412         }
   2413         r = strcmp(sortKeyA, sortKeyB);
   2414         if (r <= 0) {
   2415             log_err("Error 4 in test 5\n");
   2416         }
   2417         r = strcmp(sortKeyAz, sortKeyBz);
   2418         if (r != 0) {
   2419             log_err("Error 5 in test 5\n");
   2420         }
   2421 
   2422         ucol_setStrength(coll, UCOL_IDENTICAL);
   2423         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   2424         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   2425         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   2426         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   2427 
   2428         r = strcmp(sortKeyA, sortKeyAz);
   2429         if (r <= 0) {
   2430             log_err("Error 6 in test 5\n");
   2431         }
   2432         r = strcmp(sortKeyA, sortKeyB);
   2433         if (r <= 0) {
   2434             log_err("Error 7 in test 5\n");
   2435         }
   2436         r = strcmp(sortKeyAz, sortKeyBz);
   2437         if (r != 0) {
   2438             log_err("Error 8 in test 5\n");
   2439         }
   2440         ucol_setStrength(coll, UCOL_TERTIARY);
   2441     }
   2442 
   2443 
   2444     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
   2445 
   2446     {
   2447         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
   2448         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
   2449 
   2450         result = ucol_strcoll(coll, strA, 5, strB, 5);
   2451         if (result != UCOL_LESS) {
   2452             log_err("Error 1 in test 6\n");
   2453         }
   2454         result = ucol_strcoll(coll, strA, -1, strB, -1);
   2455         if (result != UCOL_EQUAL) {
   2456             log_err("Error 2 in test 6\n");
   2457         }
   2458     }
   2459 
   2460     ucol_close(coll);
   2461 }
   2462 
   2463 
   2464 
   2465 #if 0
   2466 static void TestGetCaseBit(void) {
   2467   static const char *caseBitData[] = {
   2468     "a", "A", "ch", "Ch", "CH",
   2469       "\\uFF9E", "\\u0009"
   2470   };
   2471 
   2472   static const uint8_t results[] = {
   2473     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
   2474       UCOL_UPPER_CASE, UCOL_LOWER_CASE
   2475   };
   2476 
   2477   uint32_t i, blen = 0;
   2478   UChar b[256] = {0};
   2479   UErrorCode status = U_ZERO_ERROR;
   2480   UCollator *UCA = ucol_open("", &status);
   2481   uint8_t res = 0;
   2482 
   2483   for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
   2484     blen = u_unescape(caseBitData[i], b, 256);
   2485     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
   2486     if(results[i] != res) {
   2487       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
   2488     }
   2489   }
   2490 }
   2491 #endif
   2492 
   2493 static void TestHangulTailoring(void) {
   2494     static const char *koreanData[] = {
   2495         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
   2496             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
   2497             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
   2498             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
   2499             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
   2500             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
   2501     };
   2502 
   2503     const char *rules =
   2504         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
   2505         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
   2506         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
   2507         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
   2508         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
   2509         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
   2510 
   2511 
   2512   UErrorCode status = U_ZERO_ERROR;
   2513   UChar rlz[2048] = { 0 };
   2514   uint32_t rlen = u_unescape(rules, rlz, 2048);
   2515 
   2516   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   2517   if(status == U_FILE_ACCESS_ERROR) {
   2518     log_data_err("Is your data around?\n");
   2519     return;
   2520   } else if(U_FAILURE(status)) {
   2521     log_err("Error opening collator\n");
   2522     return;
   2523   }
   2524 
   2525   log_verbose("Using start of korean rules\n");
   2526 
   2527   if(U_SUCCESS(status)) {
   2528     genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   2529   } else {
   2530     log_err("Unable to open collator with rules %s\n", rules);
   2531   }
   2532 
   2533   log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
   2534   ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home  */
   2535   genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   2536 
   2537   ucol_close(coll);
   2538 
   2539   log_verbose("Using ko__LOTUS locale\n");
   2540   genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   2541 }
   2542 
   2543 static void TestCompressOverlap(void) {
   2544     UChar       secstr[150];
   2545     UChar       tertstr[150];
   2546     UErrorCode  status = U_ZERO_ERROR;
   2547     UCollator  *coll;
   2548     char        result[200];
   2549     uint32_t    resultlen;
   2550     int         count = 0;
   2551     char       *tempptr;
   2552 
   2553     coll = ucol_open("", &status);
   2554 
   2555     if (U_FAILURE(status)) {
   2556         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
   2557         return;
   2558     }
   2559     while (count < 149) {
   2560         secstr[count] = 0x0020; /* [06, 05, 05] */
   2561         tertstr[count] = 0x0020;
   2562         count ++;
   2563     }
   2564 
   2565     /* top down compression ----------------------------------- */
   2566     secstr[count] = 0x0332; /* [, 87, 05] */
   2567     tertstr[count] = 0x3000; /* [06, 05, 07] */
   2568 
   2569     /* no compression secstr should have 150 secondary bytes, tertstr should
   2570     have 150 tertiary bytes.
   2571     with correct overlapping compression, secstr should have 4 secondary
   2572     bytes, tertstr should have > 2 tertiary bytes */
   2573     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
   2574     tempptr = uprv_strchr(result, 1) + 1;
   2575     while (*(tempptr + 1) != 1) {
   2576         /* the last secondary collation element is not checked since it is not
   2577         part of the compression */
   2578         if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
   2579             log_err("Secondary compression overlapped\n");
   2580         }
   2581         tempptr ++;
   2582     }
   2583 
   2584     /* tertiary top/bottom/common for en_US is similar to the secondary
   2585     top/bottom/common */
   2586     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
   2587     tempptr = uprv_strrchr(result, 1) + 1;
   2588     while (*(tempptr + 1) != 0) {
   2589         /* the last secondary collation element is not checked since it is not
   2590         part of the compression */
   2591         if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
   2592             log_err("Tertiary compression overlapped\n");
   2593         }
   2594         tempptr ++;
   2595     }
   2596 
   2597     /* bottom up compression ------------------------------------- */
   2598     secstr[count] = 0;
   2599     tertstr[count] = 0;
   2600     resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
   2601     tempptr = uprv_strchr(result, 1) + 1;
   2602     while (*(tempptr + 1) != 1) {
   2603         /* the last secondary collation element is not checked since it is not
   2604         part of the compression */
   2605         if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
   2606             log_err("Secondary compression overlapped\n");
   2607         }
   2608         tempptr ++;
   2609     }
   2610 
   2611     /* tertiary top/bottom/common for en_US is similar to the secondary
   2612     top/bottom/common */
   2613     resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
   2614     tempptr = uprv_strrchr(result, 1) + 1;
   2615     while (*(tempptr + 1) != 0) {
   2616         /* the last secondary collation element is not checked since it is not
   2617         part of the compression */
   2618         if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
   2619             log_err("Tertiary compression overlapped\n");
   2620         }
   2621         tempptr ++;
   2622     }
   2623 
   2624     ucol_close(coll);
   2625 }
   2626 
   2627 static void TestCyrillicTailoring(void) {
   2628   static const char *test[] = {
   2629     "\\u0410b",
   2630       "\\u0410\\u0306a",
   2631       "\\u04d0A"
   2632   };
   2633 
   2634     /* Russian overrides contractions, so this test is not valid anymore */
   2635     /*genericLocaleStarter("ru", test, 3);*/
   2636 
   2637     genericLocaleStarter("root", test, 3);
   2638     genericRulesStarter("&\\u0410 = \\u0410", test, 3);
   2639     genericRulesStarter("&Z < \\u0410", test, 3);
   2640     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
   2641     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
   2642     genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
   2643     genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
   2644 }
   2645 
   2646 static void TestSuppressContractions(void) {
   2647 
   2648   static const char *testNoCont2[] = {
   2649       "\\u0410\\u0302a",
   2650       "\\u0410\\u0306b",
   2651       "\\u0410c"
   2652   };
   2653   static const char *testNoCont[] = {
   2654       "a\\u0410",
   2655       "A\\u0410\\u0306",
   2656       "\\uFF21\\u0410\\u0302"
   2657   };
   2658 
   2659   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
   2660   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
   2661 }
   2662 
   2663 static void TestContraction(void) {
   2664     const static char *testrules[] = {
   2665         "&A = AB / B",
   2666         "&A = A\\u0306/\\u0306",
   2667         "&c = ch / h"
   2668     };
   2669     const static UChar testdata[][2] = {
   2670         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
   2671         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
   2672         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
   2673     };
   2674     const static UChar testdata2[][2] = {
   2675         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
   2676         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
   2677         {0x0063 /* 'c' */, 0x006C /* 'l' */}
   2678     };
   2679     const static char *testrules3[] = {
   2680         "&z < xyz &xyzw << B",
   2681         "&z < xyz &xyz << B / w",
   2682         "&z < ch &achm << B",
   2683         "&z < ch &a << B / chm",
   2684         "&\\ud800\\udc00w << B",
   2685         "&\\ud800\\udc00 << B / w",
   2686         "&a\\ud800\\udc00m << B",
   2687         "&a << B / \\ud800\\udc00m",
   2688     };
   2689 
   2690     UErrorCode  status   = U_ZERO_ERROR;
   2691     UCollator  *coll;
   2692     UChar       rule[256] = {0};
   2693     uint32_t    rlen     = 0;
   2694     int         i;
   2695 
   2696     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
   2697         UCollationElements *iter1;
   2698         int j = 0;
   2699         log_verbose("Rule %s for testing\n", testrules[i]);
   2700         rlen = u_unescape(testrules[i], rule, 32);
   2701         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2702         if (U_FAILURE(status)) {
   2703             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   2704             return;
   2705         }
   2706         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
   2707         if (U_FAILURE(status)) {
   2708             log_err("Collation iterator creation failed\n");
   2709             return;
   2710         }
   2711         while (j < 2) {
   2712             UCollationElements *iter2 = ucol_openElements(coll,
   2713                                                          &(testdata[i][j]),
   2714                                                          1, &status);
   2715             uint32_t ce;
   2716             if (U_FAILURE(status)) {
   2717                 log_err("Collation iterator creation failed\n");
   2718                 return;
   2719             }
   2720             ce = ucol_next(iter2, &status);
   2721             while (ce != UCOL_NULLORDER) {
   2722                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
   2723                     log_err("Collation elements in contraction split does not match\n");
   2724                     return;
   2725                 }
   2726                 ce = ucol_next(iter2, &status);
   2727             }
   2728             j ++;
   2729             ucol_closeElements(iter2);
   2730         }
   2731         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
   2732             log_err("Collation elements not exhausted\n");
   2733             return;
   2734         }
   2735         ucol_closeElements(iter1);
   2736         ucol_close(coll);
   2737     }
   2738 
   2739     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
   2740     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2741     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
   2742         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   2743                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
   2744                 testdata2[1][1]);
   2745         return;
   2746     }
   2747     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
   2748         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   2749                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
   2750                 testdata2[2][1]);
   2751         return;
   2752     }
   2753     ucol_close(coll);
   2754 
   2755     for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
   2756         UCollator          *coll1,
   2757                            *coll2;
   2758         UCollationElements *iter1,
   2759                            *iter2;
   2760         UChar               ch = 0x0042 /* 'B' */;
   2761         uint32_t            ce;
   2762         rlen = u_unescape(testrules3[i], rule, 32);
   2763         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2764         rlen = u_unescape(testrules3[i + 1], rule, 32);
   2765         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2766         if (U_FAILURE(status)) {
   2767             log_err("Collator creation failed %s\n", testrules[i]);
   2768             return;
   2769         }
   2770         iter1 = ucol_openElements(coll1, &ch, 1, &status);
   2771         iter2 = ucol_openElements(coll2, &ch, 1, &status);
   2772         if (U_FAILURE(status)) {
   2773             log_err("Collation iterator creation failed\n");
   2774             return;
   2775         }
   2776         ce = ucol_next(iter1, &status);
   2777         if (U_FAILURE(status)) {
   2778             log_err("Retrieving ces failed\n");
   2779             return;
   2780         }
   2781         while (ce != UCOL_NULLORDER) {
   2782             if (ce != (uint32_t)ucol_next(iter2, &status)) {
   2783                 log_err("CEs does not match\n");
   2784                 return;
   2785             }
   2786             ce = ucol_next(iter1, &status);
   2787             if (U_FAILURE(status)) {
   2788                 log_err("Retrieving ces failed\n");
   2789                 return;
   2790             }
   2791         }
   2792         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
   2793             log_err("CEs not exhausted\n");
   2794             return;
   2795         }
   2796         ucol_closeElements(iter1);
   2797         ucol_closeElements(iter2);
   2798         ucol_close(coll1);
   2799         ucol_close(coll2);
   2800     }
   2801 }
   2802 
   2803 static void TestExpansion(void) {
   2804     const static char *testrules[] = {
   2805         "&J << K / B & K << M",
   2806         "&J << K / B << M"
   2807     };
   2808     const static UChar testdata[][3] = {
   2809         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
   2810         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
   2811         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
   2812         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
   2813         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
   2814         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
   2815     };
   2816 
   2817     UErrorCode  status   = U_ZERO_ERROR;
   2818     UCollator  *coll;
   2819     UChar       rule[256] = {0};
   2820     uint32_t    rlen     = 0;
   2821     int         i;
   2822 
   2823     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
   2824         int j = 0;
   2825         log_verbose("Rule %s for testing\n", testrules[i]);
   2826         rlen = u_unescape(testrules[i], rule, 32);
   2827         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   2828         if (U_FAILURE(status)) {
   2829             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   2830             return;
   2831         }
   2832 
   2833         for (j = 0; j < 5; j ++) {
   2834             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
   2835         }
   2836         ucol_close(coll);
   2837     }
   2838 }
   2839 
   2840 #if 0
   2841 /* this test tests the current limitations of the engine */
   2842 /* it always fail, so it is disabled by default */
   2843 static void TestLimitations(void) {
   2844   /* recursive expansions */
   2845   {
   2846     static const char *rule = "&a=b/c&d=c/e";
   2847     static const char *tlimit01[] = {"add","b","adf"};
   2848     static const char *tlimit02[] = {"aa","b","af"};
   2849     log_verbose("recursive expansions\n");
   2850     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
   2851     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
   2852   }
   2853   /* contractions spanning expansions */
   2854   {
   2855     static const char *rule = "&a<<<c/e&g<<<eh";
   2856     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
   2857     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
   2858     log_verbose("contractions spanning expansions\n");
   2859     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
   2860     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
   2861   }
   2862   /* normalization: nulls in contractions */
   2863   {
   2864     static const char *rule = "&a<<<\\u0000\\u0302";
   2865     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   2866     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   2867     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   2868     static const UColAttributeValue valOn[] = { UCOL_ON };
   2869     static const UColAttributeValue valOff[] = { UCOL_OFF };
   2870 
   2871     log_verbose("NULL in contractions\n");
   2872     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   2873     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   2874     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   2875     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   2876 
   2877   }
   2878   /* normalization: contractions spanning normalization */
   2879   {
   2880     static const char *rule = "&a<<<\\u0000\\u0302";
   2881     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   2882     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   2883     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   2884     static const UColAttributeValue valOn[] = { UCOL_ON };
   2885     static const UColAttributeValue valOff[] = { UCOL_OFF };
   2886 
   2887     log_verbose("contractions spanning normalization\n");
   2888     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   2889     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   2890     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   2891     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   2892 
   2893   }
   2894   /* variable top:  */
   2895   {
   2896     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
   2897     static const char *rule = "&\\u2010<x<[variable top]=z";
   2898     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
   2899     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
   2900     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
   2901     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
   2902     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
   2903     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
   2904     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
   2905 
   2906     log_verbose("variable top\n");
   2907     genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2908     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2909     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2910     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
   2911     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
   2912 
   2913   }
   2914   /* case level */
   2915   {
   2916     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
   2917     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
   2918     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
   2919     static const UColAttribute att[] = { UCOL_CASE_FIRST};
   2920     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
   2921     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
   2922     log_verbose("case level\n");
   2923     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2924     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   2925     /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
   2926     /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
   2927   }
   2928 
   2929 }
   2930 #endif
   2931 
   2932 static void TestBocsuCoverage(void) {
   2933   UErrorCode status = U_ZERO_ERROR;
   2934   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
   2935   UChar       test[256] = {0};
   2936   uint32_t    tlen     = u_unescape(testString, test, 32);
   2937   uint8_t key[256]     = {0};
   2938   uint32_t klen         = 0;
   2939 
   2940   UCollator *coll = ucol_open("", &status);
   2941   if(U_SUCCESS(status)) {
   2942   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
   2943 
   2944   klen = ucol_getSortKey(coll, test, tlen, key, 256);
   2945 
   2946   ucol_close(coll);
   2947   } else {
   2948     log_data_err("Couldn't open UCA\n");
   2949   }
   2950 }
   2951 
   2952 static void TestVariableTopSetting(void) {
   2953   UErrorCode status = U_ZERO_ERROR;
   2954   const UChar *current = NULL;
   2955   uint32_t varTopOriginal = 0, varTop1, varTop2;
   2956   UCollator *coll = ucol_open("", &status);
   2957   if(U_SUCCESS(status)) {
   2958 
   2959   uint32_t strength = 0;
   2960   uint16_t specs = 0;
   2961   uint32_t chOffset = 0;
   2962   uint32_t chLen = 0;
   2963   uint32_t exOffset = 0;
   2964   uint32_t exLen = 0;
   2965   uint32_t oldChOffset = 0;
   2966   uint32_t oldChLen = 0;
   2967   uint32_t oldExOffset = 0;
   2968   uint32_t oldExLen = 0;
   2969   uint32_t prefixOffset = 0;
   2970   uint32_t prefixLen = 0;
   2971 
   2972   UBool startOfRules = TRUE;
   2973   UColTokenParser src;
   2974   UColOptionSet opts;
   2975 
   2976   UChar *rulesCopy = NULL;
   2977   uint32_t rulesLen;
   2978 
   2979   UCollationResult result;
   2980 
   2981   UChar first[256] = { 0 };
   2982   UChar second[256] = { 0 };
   2983   UParseError parseError;
   2984   int32_t myQ = getTestOption(QUICK_OPTION);
   2985 
   2986   uprv_memset(&src, 0, sizeof(UColTokenParser));
   2987 
   2988   src.opts = &opts;
   2989 
   2990   if(getTestOption(QUICK_OPTION) <= 0) {
   2991     setTestOption(QUICK_OPTION, 1);
   2992   }
   2993 
   2994   /* this test will fail when normalization is turned on */
   2995   /* therefore we always turn off exhaustive mode for it */
   2996   { /* QUICK > 0*/
   2997     log_verbose("Slide variable top over UCARules\n");
   2998     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
   2999     rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
   3000     rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
   3001 
   3002     if(U_SUCCESS(status) && rulesLen > 0) {
   3003       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   3004       src.current = src.source = rulesCopy;
   3005       src.end = rulesCopy+rulesLen;
   3006       src.extraCurrent = src.end;
   3007       src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
   3008 
   3009 	  /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
   3010 	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
   3011       while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
   3012         strength = src.parsedToken.strength;
   3013         chOffset = src.parsedToken.charsOffset;
   3014         chLen = src.parsedToken.charsLen;
   3015         exOffset = src.parsedToken.extensionOffset;
   3016         exLen = src.parsedToken.extensionLen;
   3017         prefixOffset = src.parsedToken.prefixOffset;
   3018         prefixLen = src.parsedToken.prefixLen;
   3019         specs = src.parsedToken.flags;
   3020 
   3021         startOfRules = FALSE;
   3022         {
   3023           log_verbose("%04X %d ", *(src.source+chOffset), chLen);
   3024         }
   3025         if(strength == UCOL_PRIMARY) {
   3026           status = U_ZERO_ERROR;
   3027           varTopOriginal = ucol_getVariableTop(coll, &status);
   3028           varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
   3029           if(U_FAILURE(status)) {
   3030             char buffer[256];
   3031             char *buf = buffer;
   3032             uint32_t i = 0, j;
   3033             uint32_t CE = UCOL_NO_MORE_CES;
   3034 
   3035             /* before we start screaming, let's see if there is a problem with the rules */
   3036             UErrorCode collIterateStatus = U_ZERO_ERROR;
   3037             collIterate *s = uprv_new_collIterate(&collIterateStatus);
   3038             uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
   3039 
   3040             CE = ucol_getNextCE(coll, s, &status);
   3041 
   3042             for(i = 0; i < oldChLen; i++) {
   3043               j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
   3044               buf += j;
   3045             }
   3046             if(status == U_PRIMARY_TOO_LONG_ERROR) {
   3047               log_verbose("= Expected failure for %s =", buffer);
   3048             } else {
   3049               if(uprv_collIterateAtEnd(s)) {
   3050                 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
   3051                   oldChOffset, u_errorName(status), buffer);
   3052               } else {
   3053                 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
   3054                   buffer);
   3055               }
   3056             }
   3057             uprv_delete_collIterate(s);
   3058           }
   3059           varTop2 = ucol_getVariableTop(coll, &status);
   3060           if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
   3061             log_err("cannot retrieve set varTop value!\n");
   3062             continue;
   3063           }
   3064 
   3065           if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
   3066 
   3067             u_strncpy(first, src.source+oldChOffset, oldChLen);
   3068             u_strncpy(first+oldChLen, src.source+chOffset, chLen);
   3069             u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
   3070             first[2*oldChLen+chLen] = 0;
   3071 
   3072             if(oldExLen == 0) {
   3073               u_strncpy(second, src.source+chOffset, chLen);
   3074               second[chLen] = 0;
   3075             } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
   3076               u_strncpy(second, src.source+oldExOffset, oldExLen);
   3077               u_strncpy(second+oldChLen, src.source+chOffset, chLen);
   3078               u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
   3079               second[2*oldExLen+chLen] = 0;
   3080             }
   3081             result = ucol_strcoll(coll, first, -1, second, -1);
   3082             if(result == UCOL_EQUAL) {
   3083               doTest(coll, first, second, UCOL_EQUAL);
   3084             } else {
   3085               log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
   3086             }
   3087           }
   3088         }
   3089         if(strength != UCOL_TOK_RESET) {
   3090           oldChOffset = chOffset;
   3091           oldChLen = chLen;
   3092           oldExOffset = exOffset;
   3093           oldExLen = exLen;
   3094         }
   3095       }
   3096       status = U_ZERO_ERROR;
   3097     }
   3098     else {
   3099       log_err("Unexpected failure getting rules %s\n", u_errorName(status));
   3100       return;
   3101     }
   3102     if (U_FAILURE(status)) {
   3103         log_err("Error parsing rules %s\n", u_errorName(status));
   3104         return;
   3105     }
   3106     status = U_ZERO_ERROR;
   3107   }
   3108 
   3109   setTestOption(QUICK_OPTION, myQ);
   3110 
   3111   log_verbose("Testing setting variable top to contractions\n");
   3112   {
   3113     /* uint32_t tailoredCE = UCOL_NOT_FOUND; */
   3114     /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/
   3115     UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
   3116     while(*conts != 0) {
   3117       if((*(conts+2) == 0) || (*(conts+1)==0)) { /* contracts or pre-context contractions */
   3118         varTop1 = ucol_setVariableTop(coll, conts, -1, &status);
   3119       } else {
   3120         varTop1 = ucol_setVariableTop(coll, conts, 3, &status);
   3121       }
   3122       if(U_FAILURE(status)) {
   3123         if(status == U_PRIMARY_TOO_LONG_ERROR) {
   3124           /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
   3125            * therefore it is not an error when it complains about them. */
   3126           log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
   3127                       *conts, *(conts+1), *(conts+2));
   3128         } else {
   3129           log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
   3130                   *conts, *(conts+1), *(conts+2), u_errorName(status));
   3131         }
   3132         status = U_ZERO_ERROR;
   3133       }
   3134       conts+=3;
   3135     }
   3136 
   3137     status = U_ZERO_ERROR;
   3138 
   3139     first[0] = 0x0040;
   3140     first[1] = 0x0050;
   3141     first[2] = 0x0000;
   3142 
   3143     ucol_setVariableTop(coll, first, -1, &status);
   3144 
   3145     if(U_SUCCESS(status)) {
   3146       log_err("Invalid contraction succeded in setting variable top!\n");
   3147     }
   3148 
   3149   }
   3150 
   3151   log_verbose("Test restoring variable top\n");
   3152 
   3153   status = U_ZERO_ERROR;
   3154   ucol_restoreVariableTop(coll, varTopOriginal, &status);
   3155   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
   3156     log_err("Couldn't restore old variable top\n");
   3157   }
   3158 
   3159   log_verbose("Testing calling with error set\n");
   3160 
   3161   status = U_INTERNAL_PROGRAM_ERROR;
   3162   varTop1 = ucol_setVariableTop(coll, first, 1, &status);
   3163   varTop2 = ucol_getVariableTop(coll, &status);
   3164   ucol_restoreVariableTop(coll, varTop2, &status);
   3165   varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
   3166   varTop2 = ucol_getVariableTop(NULL, &status);
   3167   ucol_restoreVariableTop(NULL, varTop2, &status);
   3168   if(status != U_INTERNAL_PROGRAM_ERROR) {
   3169     log_err("Bad reaction to passed error!\n");
   3170   }
   3171   uprv_free(src.source);
   3172   ucol_close(coll);
   3173   } else {
   3174     log_data_err("Couldn't open UCA collator\n");
   3175   }
   3176 
   3177 }
   3178 
   3179 static void TestNonChars(void) {
   3180   static const char *test[] = {
   3181       "\\u0000",  /* ignorable */
   3182       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
   3183       "\\uFDD0", "\\uFDEF",
   3184       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
   3185       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
   3186       "\\U0003FFFE", "\\U0003FFFF",
   3187       "\\U0004FFFE", "\\U0004FFFF",
   3188       "\\U0005FFFE", "\\U0005FFFF",
   3189       "\\U0006FFFE", "\\U0006FFFF",
   3190       "\\U0007FFFE", "\\U0007FFFF",
   3191       "\\U0008FFFE", "\\U0008FFFF",
   3192       "\\U0009FFFE", "\\U0009FFFF",
   3193       "\\U000AFFFE", "\\U000AFFFF",
   3194       "\\U000BFFFE", "\\U000BFFFF",
   3195       "\\U000CFFFE", "\\U000CFFFF",
   3196       "\\U000DFFFE", "\\U000DFFFF",
   3197       "\\U000EFFFE", "\\U000EFFFF",
   3198       "\\U000FFFFE", "\\U000FFFFF",
   3199       "\\U0010FFFE", "\\U0010FFFF",
   3200       "\\uFFFF"  /* special character with maximum primary weight */
   3201   };
   3202   UErrorCode status = U_ZERO_ERROR;
   3203   UCollator *coll = ucol_open("en_US", &status);
   3204 
   3205   log_verbose("Test non characters\n");
   3206 
   3207   if(U_SUCCESS(status)) {
   3208     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
   3209   } else {
   3210     log_err_status(status, "Unable to open collator\n");
   3211   }
   3212 
   3213   ucol_close(coll);
   3214 }
   3215 
   3216 static void TestExtremeCompression(void) {
   3217   static char *test[4];
   3218   int32_t j = 0, i = 0;
   3219 
   3220   for(i = 0; i<4; i++) {
   3221     test[i] = (char *)malloc(2048*sizeof(char));
   3222   }
   3223 
   3224   for(j = 20; j < 500; j++) {
   3225     for(i = 0; i<4; i++) {
   3226       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   3227       test[i][j-1] = (char)('a'+i);
   3228       test[i][j] = 0;
   3229     }
   3230     genericLocaleStarter("en_US", (const char **)test, 4);
   3231   }
   3232 
   3233 
   3234   for(i = 0; i<4; i++) {
   3235     free(test[i]);
   3236   }
   3237 }
   3238 
   3239 #if 0
   3240 static void TestExtremeCompression(void) {
   3241   static char *test[4];
   3242   int32_t j = 0, i = 0;
   3243   UErrorCode status = U_ZERO_ERROR;
   3244   UCollator *coll = ucol_open("en_US", status);
   3245   for(i = 0; i<4; i++) {
   3246     test[i] = (char *)malloc(2048*sizeof(char));
   3247   }
   3248   for(j = 10; j < 2048; j++) {
   3249     for(i = 0; i<4; i++) {
   3250       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
   3251       test[i][j-1] = (char)('a'+i);
   3252       test[i][j] = 0;
   3253     }
   3254   }
   3255   genericLocaleStarter("en_US", (const char **)test, 4);
   3256 
   3257   for(j = 10; j < 2048; j++) {
   3258     for(i = 0; i<1; i++) {
   3259       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   3260       test[i][j] = 0;
   3261     }
   3262   }
   3263   for(i = 0; i<4; i++) {
   3264     free(test[i]);
   3265   }
   3266 }
   3267 #endif
   3268 
   3269 static void TestSurrogates(void) {
   3270   static const char *test[] = {
   3271     "z","\\ud900\\udc25",  "\\ud805\\udc50",
   3272        "\\ud800\\udc00y",  "\\ud800\\udc00r",
   3273        "\\ud800\\udc00f",  "\\ud800\\udc00",
   3274        "\\ud800\\udc00c", "\\ud800\\udc00b",
   3275        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
   3276        "\\ud800\\udc00a",
   3277        "c", "b"
   3278   };
   3279 
   3280   static const char *rule =
   3281     "&z < \\ud900\\udc25   < \\ud805\\udc50"
   3282        "< \\ud800\\udc00y  < \\ud800\\udc00r"
   3283        "< \\ud800\\udc00f  << \\ud800\\udc00"
   3284        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
   3285        "< \\ud800\\udc00a  < c < b" ;
   3286 
   3287   genericRulesStarter(rule, test, 14);
   3288 }
   3289 
   3290 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
   3291 static void TestPrefix(void) {
   3292   uint32_t i;
   3293 
   3294   static const struct {
   3295     const char *rules;
   3296     const char *data[50];
   3297     const uint32_t len;
   3298   } tests[] = {
   3299     { "&z <<< z|a",
   3300       {"zz", "za"}, 2 },
   3301 
   3302     { "&z <<< z|   a",
   3303       {"zz", "za"}, 2 },
   3304     { "[strength I]"
   3305       "&a=\\ud900\\udc25"
   3306       "&z<<<\\ud900\\udc25|a",
   3307       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
   3308   };
   3309 
   3310 
   3311   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3312     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3313   }
   3314 }
   3315 
   3316 /* This test uses data suplied by Masashiko Maedera to test the implementation */
   3317 /* JIS X 4061 collation order implementation                                   */
   3318 static void TestNewJapanese(void) {
   3319 
   3320   static const char * const test1[] = {
   3321       "\\u30b7\\u30e3\\u30fc\\u30ec",
   3322       "\\u30b7\\u30e3\\u30a4",
   3323       "\\u30b7\\u30e4\\u30a3",
   3324       "\\u30b7\\u30e3\\u30ec",
   3325       "\\u3061\\u3087\\u3053",
   3326       "\\u3061\\u3088\\u3053",
   3327       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
   3328       "\\u3066\\u30fc\\u305f",
   3329       "\\u30c6\\u30fc\\u30bf",
   3330       "\\u30c6\\u30a7\\u30bf",
   3331       "\\u3066\\u3048\\u305f",
   3332       "\\u3067\\u30fc\\u305f",
   3333       "\\u30c7\\u30fc\\u30bf",
   3334       "\\u30c7\\u30a7\\u30bf",
   3335       "\\u3067\\u3048\\u305f",
   3336       "\\u3066\\u30fc\\u305f\\u30fc",
   3337       "\\u30c6\\u30fc\\u30bf\\u30a1",
   3338       "\\u30c6\\u30a7\\u30bf\\u30fc",
   3339       "\\u3066\\u3047\\u305f\\u3041",
   3340       "\\u3066\\u3048\\u305f\\u30fc",
   3341       "\\u3067\\u30fc\\u305f\\u30fc",
   3342       "\\u30c7\\u30fc\\u30bf\\u30a1",
   3343       "\\u3067\\u30a7\\u305f\\u30a1",
   3344       "\\u30c7\\u3047\\u30bf\\u3041",
   3345       "\\u30c7\\u30a8\\u30bf\\u30a2",
   3346       "\\u3072\\u3086",
   3347       "\\u3073\\u3085\\u3042",
   3348       "\\u3074\\u3085\\u3042",
   3349       "\\u3073\\u3085\\u3042\\u30fc",
   3350       "\\u30d3\\u30e5\\u30a2\\u30fc",
   3351       "\\u3074\\u3085\\u3042\\u30fc",
   3352       "\\u30d4\\u30e5\\u30a2\\u30fc",
   3353       "\\u30d2\\u30e5\\u30a6",
   3354       "\\u30d2\\u30e6\\u30a6",
   3355       "\\u30d4\\u30e5\\u30a6\\u30a2",
   3356       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
   3357       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
   3358       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
   3359       "\\u3072\\u3085\\u3093",
   3360       "\\u3074\\u3085\\u3093",
   3361       "\\u3075\\u30fc\\u308a",
   3362       "\\u30d5\\u30fc\\u30ea",
   3363       "\\u3075\\u3045\\u308a",
   3364       "\\u3075\\u30a5\\u308a",
   3365       "\\u3075\\u30a5\\u30ea",
   3366       "\\u30d5\\u30a6\\u30ea",
   3367       "\\u3076\\u30fc\\u308a",
   3368       "\\u30d6\\u30fc\\u30ea",
   3369       "\\u3076\\u3045\\u308a",
   3370       "\\u30d6\\u30a5\\u308a",
   3371       "\\u3077\\u3046\\u308a",
   3372       "\\u30d7\\u30a6\\u30ea",
   3373       "\\u3075\\u30fc\\u308a\\u30fc",
   3374       "\\u30d5\\u30a5\\u30ea\\u30fc",
   3375       "\\u3075\\u30a5\\u308a\\u30a3",
   3376       "\\u30d5\\u3045\\u308a\\u3043",
   3377       "\\u30d5\\u30a6\\u30ea\\u30fc",
   3378       "\\u3075\\u3046\\u308a\\u3043",
   3379       "\\u30d6\\u30a6\\u30ea\\u30a4",
   3380       "\\u3077\\u30fc\\u308a\\u30fc",
   3381       "\\u3077\\u30a5\\u308a\\u30a4",
   3382       "\\u3077\\u3046\\u308a\\u30fc",
   3383       "\\u30d7\\u30a6\\u30ea\\u30a4",
   3384       "\\u30d5\\u30fd",
   3385       "\\u3075\\u309e",
   3386       "\\u3076\\u309d",
   3387       "\\u3076\\u3075",
   3388       "\\u3076\\u30d5",
   3389       "\\u30d6\\u3075",
   3390       "\\u30d6\\u30d5",
   3391       "\\u3076\\u309e",
   3392       "\\u3076\\u3077",
   3393       "\\u30d6\\u3077",
   3394       "\\u3077\\u309d",
   3395       "\\u30d7\\u30fd",
   3396       "\\u3077\\u3075",
   3397 };
   3398 
   3399   static const char *test2[] = {
   3400     "\\u306f\\u309d", /* H\\u309d */
   3401     "\\u30cf\\u30fd", /* K\\u30fd */
   3402     "\\u306f\\u306f", /* HH */
   3403     "\\u306f\\u30cf", /* HK */
   3404     "\\u30cf\\u30cf", /* KK */
   3405     "\\u306f\\u309e", /* H\\u309e */
   3406     "\\u30cf\\u30fe", /* K\\u30fe */
   3407     "\\u306f\\u3070", /* HH\\u309b */
   3408     "\\u30cf\\u30d0", /* KK\\u309b */
   3409     "\\u306f\\u3071", /* HH\\u309c */
   3410     "\\u30cf\\u3071", /* KH\\u309c */
   3411     "\\u30cf\\u30d1", /* KK\\u309c */
   3412     "\\u3070\\u309d", /* H\\u309b\\u309d */
   3413     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
   3414     "\\u3070\\u306f", /* H\\u309bH */
   3415     "\\u30d0\\u30cf", /* K\\u309bK */
   3416     "\\u3070\\u309e", /* H\\u309b\\u309e */
   3417     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
   3418     "\\u3070\\u3070", /* H\\u309bH\\u309b */
   3419     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
   3420     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
   3421     "\\u3070\\u3071", /* H\\u309bH\\u309c */
   3422     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
   3423     "\\u3071\\u309d", /* H\\u309c\\u309d */
   3424     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
   3425     "\\u3071\\u306f", /* H\\u309cH */
   3426     "\\u30d1\\u30cf", /* K\\u309cK */
   3427     "\\u3071\\u3070", /* H\\u309cH\\u309b */
   3428     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
   3429     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
   3430     "\\u3071\\u3071", /* H\\u309cH\\u309c */
   3431     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
   3432   };
   3433   /*
   3434   static const char *test3[] = {
   3435     "\\u221er\\u221e",
   3436     "\\u221eR#",
   3437     "\\u221et\\u221e",
   3438     "#r\\u221e",
   3439     "#R#",
   3440     "#t%",
   3441     "#T%",
   3442     "8t\\u221e",
   3443     "8T\\u221e",
   3444     "8t#",
   3445     "8T#",
   3446     "8t%",
   3447     "8T%",
   3448     "8t8",
   3449     "8T8",
   3450     "\\u03c9r\\u221e",
   3451     "\\u03a9R%",
   3452     "rr\\u221e",
   3453     "rR\\u221e",
   3454     "Rr\\u221e",
   3455     "RR\\u221e",
   3456     "RT%",
   3457     "rt8",
   3458     "tr\\u221e",
   3459     "tr8",
   3460     "TR8",
   3461     "tt8",
   3462     "\\u30b7\\u30e3\\u30fc\\u30ec",
   3463   };
   3464   */
   3465   static const UColAttribute att[] = { UCOL_STRENGTH };
   3466   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
   3467 
   3468   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
   3469   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
   3470 
   3471   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
   3472   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
   3473   /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
   3474   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
   3475   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
   3476 }
   3477 
   3478 static void TestStrCollIdenticalPrefix(void) {
   3479   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
   3480   const char* test[] = {
   3481     "ab\\ud9b0\\udc70",
   3482     "ab\\ud9b0\\udc71"
   3483   };
   3484   genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
   3485 }
   3486 /* Contractions should have all their canonically equivalent */
   3487 /* strings included */
   3488 static void TestContractionClosure(void) {
   3489   static const struct {
   3490     const char *rules;
   3491     const char *data[10];
   3492     const uint32_t len;
   3493   } tests[] = {
   3494     {   "&b=\\u00e4\\u00e4",
   3495       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
   3496     {   "&b=\\u00C5",
   3497       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
   3498   };
   3499   uint32_t i;
   3500 
   3501 
   3502   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3503     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
   3504   }
   3505 }
   3506 
   3507 /* This tests also fails*/
   3508 static void TestBeforePrefixFailure(void) {
   3509   static const struct {
   3510     const char *rules;
   3511     const char *data[10];
   3512     const uint32_t len;
   3513   } tests[] = {
   3514     { "&g <<< a"
   3515       "&[before 3]\\uff41 <<< x",
   3516       {"x", "\\uff41"}, 2 },
   3517     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3518         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   3519         "&[before 3]\\u30a7<<<\\u30a9",
   3520       {"\\u30a9", "\\u30a7"}, 2 },
   3521     {   "&[before 3]\\u30a7<<<\\u30a9"
   3522         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3523         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
   3524       {"\\u30a9", "\\u30a7"}, 2 },
   3525   };
   3526   uint32_t i;
   3527 
   3528 
   3529   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3530     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3531   }
   3532 
   3533 #if 0
   3534   const char* rule1 =
   3535         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3536         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   3537         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
   3538   const char* rule2 =
   3539         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
   3540         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   3541         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
   3542   const char* test[] = {
   3543       "\\u30c6\\u30fc\\u30bf",
   3544       "\\u30c6\\u30a7\\u30bf",
   3545   };
   3546   genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
   3547   genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
   3548 /* this piece of code should be in some sort of verbose mode     */
   3549 /* it gets the collation elements for elements and prints them   */
   3550 /* This is useful when trying to see whether the problem is      */
   3551   {
   3552     UErrorCode status = U_ZERO_ERROR;
   3553     uint32_t i = 0;
   3554     UCollationElements *it = NULL;
   3555     uint32_t CE;
   3556     UChar string[256];
   3557     uint32_t uStringLen;
   3558     UCollator *coll = NULL;
   3559 
   3560     uStringLen = u_unescape(rule1, string, 256);
   3561 
   3562     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   3563 
   3564     /*coll = ucol_open("ja_JP_JIS", &status);*/
   3565     it = ucol_openElements(coll, string, 0, &status);
   3566 
   3567     for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
   3568       log_verbose("%s\n", test[i]);
   3569       uStringLen = u_unescape(test[i], string, 256);
   3570       ucol_setText(it, string, uStringLen, &status);
   3571 
   3572       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
   3573         log_verbose("%08X\n", CE);
   3574       }
   3575       log_verbose("\n");
   3576 
   3577     }
   3578 
   3579     ucol_closeElements(it);
   3580     ucol_close(coll);
   3581   }
   3582 #endif
   3583 }
   3584 
   3585 static void TestPrefixCompose(void) {
   3586   const char* rule1 =
   3587         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
   3588   /*
   3589   const char* test[] = {
   3590       "\\u30c6\\u30fc\\u30bf",
   3591       "\\u30c6\\u30a7\\u30bf",
   3592   };
   3593   */
   3594   {
   3595     UErrorCode status = U_ZERO_ERROR;
   3596     /*uint32_t i = 0;*/
   3597     /*UCollationElements *it = NULL;*/
   3598 /*    uint32_t CE;*/
   3599     UChar string[256];
   3600     uint32_t uStringLen;
   3601     UCollator *coll = NULL;
   3602 
   3603     uStringLen = u_unescape(rule1, string, 256);
   3604 
   3605     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   3606     ucol_close(coll);
   3607   }
   3608 
   3609 
   3610 }
   3611 
   3612 /*
   3613 [last variable] last variable value
   3614 [last primary ignorable] largest CE for primary ignorable
   3615 [last secondary ignorable] largest CE for secondary ignorable
   3616 [last tertiary ignorable] largest CE for tertiary ignorable
   3617 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
   3618 */
   3619 
   3620 static void TestRuleOptions(void) {
   3621   /* values here are hardcoded and are correct for the current UCA
   3622    * when the UCA changes, one might be forced to change these
   3623    * values.
   3624    */
   3625 
   3626   /*
   3627    * These strings contain the last character before [variable top]
   3628    * and the first and second characters (by primary weights) after it.
   3629    * See FractionalUCA.txt. For example:
   3630       [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
   3631       [variable top = 0C FE]
   3632       [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
   3633      and
   3634       00B4; [0D 0C, 05, 05]
   3635    *
   3636    * Note: Starting with UCA 6.0, the [variable top] collation element
   3637    * is not the weight of any character or string,
   3638    * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
   3639    */
   3640 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
   3641 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
   3642 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
   3643 
   3644   /*
   3645    * This string has to match the character that has the [last regular] weight
   3646    * which changes with each UCA version.
   3647    * See the bottom of FractionalUCA.txt which says something like
   3648       [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
   3649    *
   3650    * Note: Starting with UCA 6.0, the [last regular] collation element
   3651    * is not the weight of any character or string,
   3652    * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
   3653    */
   3654 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
   3655 
   3656   static const struct {
   3657     const char *rules;
   3658     const char *data[10];
   3659     const uint32_t len;
   3660   } tests[] = {
   3661     /* - all befores here amount to zero */
   3662     { "&[before 3][first tertiary ignorable]<<<a",
   3663         { "\\u0000", "a"}, 2
   3664     }, /* you cannot go before first tertiary ignorable */
   3665 
   3666     { "&[before 3][last tertiary ignorable]<<<a",
   3667         { "\\u0000", "a"}, 2
   3668     }, /* you cannot go before last tertiary ignorable */
   3669 
   3670     { "&[before 3][first secondary ignorable]<<<a",
   3671         { "\\u0000", "a"}, 2
   3672     }, /* you cannot go before first secondary ignorable */
   3673 
   3674     { "&[before 3][last secondary ignorable]<<<a",
   3675         { "\\u0000", "a"}, 2
   3676     }, /* you cannot go before first secondary ignorable */
   3677 
   3678     /* 'normal' befores */
   3679 
   3680     { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
   3681         {  "c", "b", "\\u0332", "a" }, 4
   3682     },
   3683 
   3684     /* we don't have a code point that corresponds to
   3685      * the last primary ignorable
   3686      */
   3687     { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
   3688         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
   3689     },
   3690 
   3691     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
   3692         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
   3693     },
   3694 
   3695     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
   3696         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
   3697     },
   3698 
   3699     { "&[first regular]<a"
   3700       "&[before 1][first regular]<b",
   3701       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
   3702     },
   3703 
   3704     { "&[before 1][last regular]<b"
   3705       "&[last regular]<a",
   3706         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
   3707     },
   3708 
   3709     { "&[before 1][first implicit]<b"
   3710       "&[first implicit]<a",
   3711         { "b", "\\u4e00", "a", "\\u4e01"}, 4
   3712     },
   3713 
   3714     { "&[before 1][last implicit]<b"
   3715       "&[last implicit]<a",
   3716         { "b", "\\U0010FFFD", "a" }, 3
   3717     },
   3718 
   3719     { "&[last variable]<z"
   3720       "&[last primary ignorable]<x"
   3721       "&[last secondary ignorable]<<y"
   3722       "&[last tertiary ignorable]<<<w"
   3723       "&[top]<u",
   3724       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
   3725     }
   3726 
   3727   };
   3728   uint32_t i;
   3729 
   3730   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3731     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3732   }
   3733 }
   3734 
   3735 
   3736 static void TestOptimize(void) {
   3737   /* this is not really a test - just trying out
   3738    * whether copying of UCA contents will fail
   3739    * Cannot really test, since the functionality
   3740    * remains the same.
   3741    */
   3742   static const struct {
   3743     const char *rules;
   3744     const char *data[10];
   3745     const uint32_t len;
   3746   } tests[] = {
   3747     /* - all befores here amount to zero */
   3748     { "[optimize [\\uAC00-\\uD7FF]]",
   3749     { "a", "b"}, 2}
   3750   };
   3751   uint32_t i;
   3752 
   3753   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   3754     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   3755   }
   3756 }
   3757 
   3758 /*
   3759 cycheng (at) ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
   3760 weiv    ucol_strcollIter?
   3761 cycheng (at) ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
   3762 weiv    these are the input strings?
   3763 cycheng (at) ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
   3764 weiv    will check - could be a problem with utf-8 iterator
   3765 cycheng (at) ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
   3766 weiv    hmmm
   3767 cycheng (at) ca.ibm.c... note that we have a standalone high surrogate
   3768 weiv    that doesn't sound right
   3769 cycheng (at) ca.ibm.c... we got the same inconsistent results on AIX and Win2000
   3770 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
   3771 cycheng (at) ca.ibm.c... yes
   3772 weiv    and then do the comparison
   3773 cycheng (at) ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
   3774 weiv    utf-16 strings look like a little endian ones in the example you sent me
   3775 weiv    It could be a bug - let me try to test it out
   3776 cycheng (at) ca.ibm.c... ok
   3777 cycheng (at) ca.ibm.c... we can wait till the conf. call
   3778 cycheng (at) ca.ibm.c... next weke
   3779 weiv    that would be great
   3780 weiv    hmmm
   3781 weiv    I might be wrong
   3782 weiv    let me play with it some more
   3783 cycheng (at) ca.ibm.c... ok
   3784 cycheng (at) ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
   3785 cycheng (at) ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
   3786 cycheng (at) ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
   3787 weiv    ok
   3788 cycheng (at) ca.ibm.c... i ask sherman to send you more inconsistent data
   3789 weiv    thanks
   3790 cycheng (at) ca.ibm.c... the 4 strings we sent are just samples
   3791 */
   3792 #if 0
   3793 static void Alexis(void) {
   3794   UErrorCode status = U_ZERO_ERROR;
   3795   UCollator *coll = ucol_open("", &status);
   3796 
   3797 
   3798   const char utf16be[2][4] = {
   3799     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
   3800     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
   3801   };
   3802 
   3803   const char utf8[2][4] = {
   3804     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
   3805     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
   3806   };
   3807 
   3808   UCharIterator iterU161, iterU162;
   3809   UCharIterator iterU81, iterU82;
   3810 
   3811   UCollationResult resU16, resU8;
   3812 
   3813   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
   3814   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
   3815 
   3816   uiter_setUTF8(&iterU81, utf8[0], 4);
   3817   uiter_setUTF8(&iterU82, utf8[1], 4);
   3818 
   3819   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3820 
   3821   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
   3822   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
   3823 
   3824 
   3825   if(resU16 != resU8) {
   3826     log_err("different results\n");
   3827   }
   3828 
   3829   ucol_close(coll);
   3830 }
   3831 #endif
   3832 
   3833 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
   3834 static void Alexis2(void) {
   3835   UErrorCode status = U_ZERO_ERROR;
   3836   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   3837   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   3838   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   3839   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
   3840 
   3841   UConverter *conv = NULL;
   3842 
   3843   UCharIterator U16BEItS, U16BEItT;
   3844   UCharIterator U8ItS, U8ItT;
   3845 
   3846   UCollationResult resU16, resU16BE, resU8;
   3847 
   3848   static const char* const pairs[][2] = {
   3849     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
   3850     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
   3851     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
   3852     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
   3853     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
   3854     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
   3855     { "\\u0020", "\\u0020\\u0000"}
   3856 /*
   3857 5F20 (my result here)
   3858 5F204E008E3F
   3859 5F20 (your result here)
   3860 */
   3861   };
   3862 
   3863   int32_t i = 0;
   3864 
   3865   UCollator *coll = ucol_open("", &status);
   3866   if(status == U_FILE_ACCESS_ERROR) {
   3867     log_data_err("Is your data around?\n");
   3868     return;
   3869   } else if(U_FAILURE(status)) {
   3870     log_err("Error opening collator\n");
   3871     return;
   3872   }
   3873   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3874   conv = ucnv_open("UTF16BE", &status);
   3875   for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
   3876     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   3877     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   3878 
   3879     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
   3880 
   3881     log_verbose("Result of strcoll is %i\n", resU16);
   3882 
   3883     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
   3884     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
   3885 
   3886     /* use the original sizes, as the result from converter is in bytes */
   3887     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
   3888     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
   3889 
   3890     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
   3891 
   3892     log_verbose("Result of U16BE is %i\n", resU16BE);
   3893 
   3894     if(resU16 != resU16BE) {
   3895       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
   3896     }
   3897 
   3898     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
   3899     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
   3900 
   3901     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
   3902     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
   3903 
   3904     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
   3905 
   3906     if(resU16 != resU8) {
   3907       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
   3908     }
   3909 
   3910   }
   3911 
   3912   ucol_close(coll);
   3913   ucnv_close(conv);
   3914 }
   3915 
   3916 static void TestHebrewUCA(void) {
   3917   UErrorCode status = U_ZERO_ERROR;
   3918   static const char *first[] = {
   3919     "d790d6b8d79cd795d6bcd7a9",
   3920     "d790d79cd79ed7a7d799d799d7a1",
   3921     "d790d6b4d79ed795d6bcd7a9",
   3922   };
   3923 
   3924   char utf8String[3][256];
   3925   UChar utf16String[3][256];
   3926 
   3927   int32_t i = 0, j = 0;
   3928   int32_t sizeUTF8[3];
   3929   int32_t sizeUTF16[3];
   3930 
   3931   UCollator *coll = ucol_open("", &status);
   3932   if (U_FAILURE(status)) {
   3933       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
   3934       return;
   3935   }
   3936   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
   3937 
   3938   for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
   3939     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
   3940     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
   3941     log_verbose("%i: ");
   3942     for(j = 0; j < sizeUTF16[i]; j++) {
   3943       /*log_verbose("\\u%04X", utf16String[i][j]);*/
   3944       log_verbose("%04X", utf16String[i][j]);
   3945     }
   3946     log_verbose("\n");
   3947   }
   3948   for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
   3949     for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
   3950       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
   3951     }
   3952   }
   3953 
   3954   ucol_close(coll);
   3955 
   3956 }
   3957 
   3958 static void TestPartialSortKeyTermination(void) {
   3959   static const char* cases[] = {
   3960     "\\u1234\\u1234\\udc00",
   3961     "\\udc00\\ud800\\ud800"
   3962   };
   3963 
   3964   int32_t i = sizeof(UCollator);
   3965 
   3966   UErrorCode status = U_ZERO_ERROR;
   3967 
   3968   UCollator *coll = ucol_open("", &status);
   3969 
   3970   UCharIterator iter;
   3971 
   3972   UChar currCase[256];
   3973   int32_t length = 0;
   3974   int32_t pKeyLen = 0;
   3975 
   3976   uint8_t key[256];
   3977 
   3978   for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
   3979     uint32_t state[2] = {0, 0};
   3980     length = u_unescape(cases[i], currCase, 256);
   3981     uiter_setString(&iter, currCase, length);
   3982     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
   3983 
   3984     log_verbose("Done\n");
   3985 
   3986   }
   3987   ucol_close(coll);
   3988 }
   3989 
   3990 static void TestSettings(void) {
   3991   static const char* cases[] = {
   3992     "apple",
   3993       "Apple"
   3994   };
   3995 
   3996   static const char* locales[] = {
   3997     "",
   3998       "en"
   3999   };
   4000 
   4001   UErrorCode status = U_ZERO_ERROR;
   4002 
   4003   int32_t i = 0, j = 0;
   4004 
   4005   UChar source[256], target[256];
   4006   int32_t sLen = 0, tLen = 0;
   4007 
   4008   UCollator *collateObject = NULL;
   4009   for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
   4010     collateObject = ucol_open(locales[i], &status);
   4011     ucol_setStrength(collateObject, UCOL_PRIMARY);
   4012     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
   4013     for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
   4014       sLen = u_unescape(cases[j-1], source, 256);
   4015       source[sLen] = 0;
   4016       tLen = u_unescape(cases[j], target, 256);
   4017       source[tLen] = 0;
   4018       doTest(collateObject, source, target, UCOL_EQUAL);
   4019     }
   4020     ucol_close(collateObject);
   4021   }
   4022 }
   4023 
   4024 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
   4025     UErrorCode status = U_ZERO_ERROR;
   4026     int32_t errorNo = 0;
   4027     /*const UChar *sourceRules = NULL;*/
   4028     /*int32_t sourceRulesLen = 0;*/
   4029     UColAttributeValue french = UCOL_OFF;
   4030     int32_t cloneSize = 0;
   4031 
   4032     if(!ucol_equals(source, target)) {
   4033         log_err("Same collators, different address not equal\n");
   4034         errorNo++;
   4035     }
   4036     ucol_close(target);
   4037     if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
   4038         /* currently, safeClone is implemented through getRules/openRules
   4039         * so it is the same as the test below - I will comment that test out.
   4040         */
   4041         /* real thing */
   4042         target = ucol_safeClone(source, NULL, &cloneSize, &status);
   4043         if(U_FAILURE(status)) {
   4044             log_err("Error creating clone\n");
   4045             errorNo++;
   4046             return errorNo;
   4047         }
   4048         if(!ucol_equals(source, target)) {
   4049             log_err("Collator different from it's clone\n");
   4050             errorNo++;
   4051         }
   4052         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
   4053         if(french == UCOL_ON) {
   4054             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
   4055         } else {
   4056             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
   4057         }
   4058         if(U_FAILURE(status)) {
   4059             log_err("Error setting attributes\n");
   4060             errorNo++;
   4061             return errorNo;
   4062         }
   4063         if(ucol_equals(source, target)) {
   4064             log_err("Collators same even when options changed\n");
   4065             errorNo++;
   4066         }
   4067         ucol_close(target);
   4068         /* commented out since safeClone uses exactly the same technique */
   4069         /*
   4070         sourceRules = ucol_getRules(source, &sourceRulesLen);
   4071         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   4072         if(U_FAILURE(status)) {
   4073         log_err("Error instantiating target from rules\n");
   4074         errorNo++;
   4075         return errorNo;
   4076         }
   4077         if(!ucol_equals(source, target)) {
   4078         log_err("Collator different from collator that was created from the same rules\n");
   4079         errorNo++;
   4080         }
   4081         ucol_close(target);
   4082         */
   4083     }
   4084     return errorNo;
   4085 }
   4086 
   4087 
   4088 static void TestEquals(void) {
   4089     /* ucol_equals is not currently a public API. There is a chance that it will become
   4090     * something like this, but currently it is only used by RuleBasedCollator::operator==
   4091     */
   4092     /* test whether the two collators instantiated from the same locale are equal */
   4093     UErrorCode status = U_ZERO_ERROR;
   4094     UParseError parseError;
   4095     int32_t noOfLoc = uloc_countAvailable();
   4096     const char *locName = NULL;
   4097     UCollator *source = NULL, *target = NULL;
   4098     int32_t i = 0;
   4099 
   4100     const char* rules[] = {
   4101         "&l < lj <<< Lj <<< LJ",
   4102         "&n < nj <<< Nj <<< NJ",
   4103         "&ae <<< \\u00e4",
   4104         "&AE <<< \\u00c4"
   4105     };
   4106     /*
   4107     const char* badRules[] = {
   4108     "&l <<< Lj",
   4109     "&n < nj <<< nJ <<< NJ",
   4110     "&a <<< \\u00e4",
   4111     "&AE <<< \\u00c4 <<< x"
   4112     };
   4113     */
   4114 
   4115     UChar sourceRules[1024], targetRules[1024];
   4116     int32_t sourceRulesSize = 0, targetRulesSize = 0;
   4117     int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
   4118 
   4119     for(i = 0; i < rulesSize; i++) {
   4120         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
   4121         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
   4122     }
   4123 
   4124     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   4125     if(status == U_FILE_ACCESS_ERROR) {
   4126         log_data_err("Is your data around?\n");
   4127         return;
   4128     } else if(U_FAILURE(status)) {
   4129         log_err("Error opening collator\n");
   4130         return;
   4131     }
   4132     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   4133     if(!ucol_equals(source, target)) {
   4134         log_err("Equivalent collators not equal!\n");
   4135     }
   4136     ucol_close(source);
   4137     ucol_close(target);
   4138 
   4139     source = ucol_open("root", &status);
   4140     target = ucol_open("root", &status);
   4141     log_verbose("Testing root\n");
   4142     if(!ucol_equals(source, source)) {
   4143         log_err("Same collator not equal\n");
   4144     }
   4145     if(TestEqualsForCollator(locName, source, target)) {
   4146         log_err("Errors for root\n", locName);
   4147     }
   4148     ucol_close(source);
   4149 
   4150     for(i = 0; i<noOfLoc; i++) {
   4151         status = U_ZERO_ERROR;
   4152         locName = uloc_getAvailable(i);
   4153         /*if(hasCollationElements(locName)) {*/
   4154         log_verbose("Testing equality for locale %s\n", locName);
   4155         source = ucol_open(locName, &status);
   4156         target = ucol_open(locName, &status);
   4157         if (U_FAILURE(status)) {
   4158             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
   4159             continue;
   4160         }
   4161         if(TestEqualsForCollator(locName, source, target)) {
   4162             log_err("Errors for locale %s\n", locName);
   4163         }
   4164         ucol_close(source);
   4165         /*}*/
   4166     }
   4167 }
   4168 
   4169 static void TestJ2726(void) {
   4170     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
   4171     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
   4172     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
   4173     UErrorCode status = U_ZERO_ERROR;
   4174     UCollator *coll = ucol_open("en", &status);
   4175     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   4176     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   4177     doTest(coll, a, aSpace, UCOL_EQUAL);
   4178     doTest(coll, aSpace, a, UCOL_EQUAL);
   4179     doTest(coll, a, spaceA, UCOL_EQUAL);
   4180     doTest(coll, spaceA, a, UCOL_EQUAL);
   4181     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
   4182     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
   4183     ucol_close(coll);
   4184 }
   4185 
   4186 static void NullRule(void) {
   4187     UChar r[3] = {0};
   4188     UErrorCode status = U_ZERO_ERROR;
   4189     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   4190     if(U_SUCCESS(status)) {
   4191         log_err("This should have been an error!\n");
   4192         ucol_close(coll);
   4193     } else {
   4194         status = U_ZERO_ERROR;
   4195     }
   4196     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   4197     if(U_FAILURE(status)) {
   4198         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
   4199     } else {
   4200         ucol_close(coll);
   4201     }
   4202 }
   4203 
   4204 /**
   4205  * Test for CollationElementIterator previous and next for the whole set of
   4206  * unicode characters with normalization on.
   4207  */
   4208 static void TestNumericCollation(void)
   4209 {
   4210     UErrorCode status = U_ZERO_ERROR;
   4211 
   4212     const static char *basicTestStrings[]={
   4213     "hello1",
   4214     "hello2",
   4215     "hello2002",
   4216     "hello2003",
   4217     "hello123456",
   4218     "hello1234567",
   4219     "hello10000000",
   4220     "hello100000000",
   4221     "hello1000000000",
   4222     "hello10000000000",
   4223     };
   4224 
   4225     const static char *preZeroTestStrings[]={
   4226     "avery10000",
   4227     "avery010000",
   4228     "avery0010000",
   4229     "avery00010000",
   4230     "avery000010000",
   4231     "avery0000010000",
   4232     "avery00000010000",
   4233     "avery000000010000",
   4234     };
   4235 
   4236     const static char *thirtyTwoBitNumericStrings[]={
   4237     "avery42949672960",
   4238     "avery42949672961",
   4239     "avery42949672962",
   4240     "avery429496729610"
   4241     };
   4242 
   4243      const static char *longNumericStrings[]={
   4244      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
   4245         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
   4246         are treated as multiple collation elements. */
   4247     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
   4248     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
   4249     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
   4250     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
   4251     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
   4252     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
   4253     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
   4254     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
   4255     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
   4256     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
   4257     };
   4258 
   4259     const static char *supplementaryDigits[] = {
   4260       "\\uD835\\uDFCE", /* 0 */
   4261       "\\uD835\\uDFCF", /* 1 */
   4262       "\\uD835\\uDFD0", /* 2 */
   4263       "\\uD835\\uDFD1", /* 3 */
   4264       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
   4265       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
   4266       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
   4267       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
   4268       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
   4269       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
   4270     };
   4271 
   4272     const static char *foreignDigits[] = {
   4273       "\\u0661",
   4274         "\\u0662",
   4275         "\\u0663",
   4276       "\\u0661\\u0660",
   4277       "\\u0661\\u0662",
   4278       "\\u0661\\u0663",
   4279       "\\u0662\\u0660",
   4280       "\\u0662\\u0662",
   4281       "\\u0662\\u0663",
   4282       "\\u0663\\u0660",
   4283       "\\u0663\\u0662",
   4284       "\\u0663\\u0663"
   4285     };
   4286 
   4287     const static char *evenZeroes[] = {
   4288       "2000",
   4289       "2001",
   4290         "2002",
   4291         "2003"
   4292     };
   4293 
   4294     UColAttribute att = UCOL_NUMERIC_COLLATION;
   4295     UColAttributeValue val = UCOL_ON;
   4296 
   4297     /* Open our collator. */
   4298     UCollator* coll = ucol_open("root", &status);
   4299     if (U_FAILURE(status)){
   4300         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
   4301               myErrorName(status));
   4302         return;
   4303     }
   4304     genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
   4305     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
   4306     genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
   4307     genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
   4308     genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
   4309     genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
   4310 
   4311     /* Setting up our collator to do digits. */
   4312     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   4313     if (U_FAILURE(status)){
   4314         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
   4315               myErrorName(status));
   4316         return;
   4317     }
   4318 
   4319     /*
   4320        Testing that prepended zeroes still yield the correct collation behavior.
   4321        We expect that every element in our strings array will be equal.
   4322     */
   4323     genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
   4324 
   4325     ucol_close(coll);
   4326 }
   4327 
   4328 static void TestTibetanConformance(void)
   4329 {
   4330     const char* test[] = {
   4331         "\\u0FB2\\u0591\\u0F71\\u0061",
   4332         "\\u0FB2\\u0F71\\u0061"
   4333     };
   4334 
   4335     UErrorCode status = U_ZERO_ERROR;
   4336     UCollator *coll = ucol_open("", &status);
   4337     UChar source[100];
   4338     UChar target[100];
   4339     int result;
   4340     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   4341     if (U_SUCCESS(status)) {
   4342         u_unescape(test[0], source, 100);
   4343         u_unescape(test[1], target, 100);
   4344         doTest(coll, source, target, UCOL_EQUAL);
   4345         result = ucol_strcoll(coll, source, -1,   target, -1);
   4346         log_verbose("result %d\n", result);
   4347         if (UCOL_EQUAL != result) {
   4348             log_err("Tibetan comparison error\n");
   4349         }
   4350     }
   4351     ucol_close(coll);
   4352 
   4353     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
   4354 }
   4355 
   4356 static void TestPinyinProblem(void) {
   4357     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
   4358     genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
   4359 }
   4360 
   4361 #define TST_UCOL_MAX_INPUT 0x220001
   4362 #define topByte 0xFF000000;
   4363 #define bottomByte 0xFF;
   4364 #define fourBytes 0xFFFFFFFF;
   4365 
   4366 
   4367 static void showImplicit(UChar32 i) {
   4368     if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
   4369         log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
   4370     }
   4371 }
   4372 
   4373 static void TestImplicitGeneration(void) {
   4374     UErrorCode status = U_ZERO_ERROR;
   4375     UChar32 last = 0;
   4376     UChar32 current;
   4377     UChar32 i = 0, j = 0;
   4378     UChar32 roundtrip = 0;
   4379     UChar32 lastBottom = 0;
   4380     UChar32 currentBottom = 0;
   4381     UChar32 lastTop = 0;
   4382     UChar32 currentTop = 0;
   4383 
   4384     UCollator *coll = ucol_open("root", &status);
   4385     if(U_FAILURE(status)) {
   4386         log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   4387         return;
   4388     }
   4389 
   4390     uprv_uca_getRawFromImplicit(0xE20303E7);
   4391 
   4392     for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
   4393         current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
   4394 
   4395         /* check that it round-trips AND that all intervening ones are illegal*/
   4396         roundtrip = uprv_uca_getRawFromImplicit(current);
   4397         if (roundtrip != i) {
   4398             log_err("No roundtrip %08X\n", i);
   4399         }
   4400         if (last != 0) {
   4401             for (j = last + 1; j < current; ++j) {
   4402                 roundtrip = uprv_uca_getRawFromImplicit(j);
   4403                 /* raise an error if it *doesn't* find an error*/
   4404                 if (roundtrip != -1) {
   4405                     log_err("Fails to recognize illegal %08X\n", j);
   4406                 }
   4407             }
   4408         }
   4409         /* now do other consistency checks*/
   4410         lastBottom = last & bottomByte;
   4411         currentBottom = current & bottomByte;
   4412         lastTop = last & topByte;
   4413         currentTop = current & topByte;
   4414 
   4415         /* print out some values for spot-checking*/
   4416         if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
   4417             showImplicit(i-3);
   4418             showImplicit(i-2);
   4419             showImplicit(i-1);
   4420             showImplicit(i);
   4421             showImplicit(i+1);
   4422             showImplicit(i+2);
   4423         }
   4424         last = current;
   4425 
   4426         if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
   4427             log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
   4428         }
   4429     }
   4430     showImplicit(TST_UCOL_MAX_INPUT-2);
   4431     showImplicit(TST_UCOL_MAX_INPUT-1);
   4432     showImplicit(TST_UCOL_MAX_INPUT);
   4433     ucol_close(coll);
   4434 }
   4435 
   4436 /**
   4437  * Iterate through the given iterator, checking to see that all the strings
   4438  * in the expected array are present.
   4439  * @param expected array of strings we expect to see, or NULL
   4440  * @param expectedCount number of elements of expected, or 0
   4441  */
   4442 static int32_t checkUEnumeration(const char* msg,
   4443                                  UEnumeration* iter,
   4444                                  const char** expected,
   4445                                  int32_t expectedCount) {
   4446     UErrorCode ec = U_ZERO_ERROR;
   4447     int32_t i = 0, n, j, bit;
   4448     int32_t seenMask = 0;
   4449 
   4450     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
   4451     n = uenum_count(iter, &ec);
   4452     if (!assertSuccess("count", &ec)) return -1;
   4453     log_verbose("%s = [", msg);
   4454     for (;; ++i) {
   4455         const char* s = uenum_next(iter, NULL, &ec);
   4456         if (!assertSuccess("snext", &ec) || s == NULL) break;
   4457         if (i != 0) log_verbose(",");
   4458         log_verbose("%s", s);
   4459         /* check expected list */
   4460         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   4461             if ((seenMask&bit) == 0 &&
   4462                 uprv_strcmp(s, expected[j]) == 0) {
   4463                 seenMask |= bit;
   4464                 break;
   4465             }
   4466         }
   4467     }
   4468     log_verbose("] (%d)\n", i);
   4469     assertTrue("count verified", i==n);
   4470     /* did we see all expected strings? */
   4471     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   4472         if ((seenMask&bit)!=0) {
   4473             log_verbose("Ok: \"%s\" seen\n", expected[j]);
   4474         } else {
   4475             log_err("FAIL: \"%s\" not seen\n", expected[j]);
   4476         }
   4477     }
   4478     return n;
   4479 }
   4480 
   4481 /**
   4482  * Test new API added for separate collation tree.
   4483  */
   4484 static void TestSeparateTrees(void) {
   4485     UErrorCode ec = U_ZERO_ERROR;
   4486     UEnumeration *e = NULL;
   4487     int32_t n = -1;
   4488     UBool isAvailable;
   4489     char loc[256];
   4490 
   4491     static const char* AVAIL[] = { "en", "de" };
   4492 
   4493     static const char* KW[] = { "collation" };
   4494 
   4495     static const char* KWVAL[] = { "phonebook", "stroke" };
   4496 
   4497 #if !UCONFIG_NO_SERVICE
   4498     e = ucol_openAvailableLocales(&ec);
   4499     if (e != NULL) {
   4500         assertSuccess("ucol_openAvailableLocales", &ec);
   4501         assertTrue("ucol_openAvailableLocales!=0", e!=0);
   4502         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
   4503         /* Don't need to check n because we check list */
   4504         uenum_close(e);
   4505     } else {
   4506         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
   4507     }
   4508 #endif
   4509 
   4510     e = ucol_getKeywords(&ec);
   4511     if (e != NULL) {
   4512         assertSuccess("ucol_getKeywords", &ec);
   4513         assertTrue("ucol_getKeywords!=0", e!=0);
   4514         n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
   4515         /* Don't need to check n because we check list */
   4516         uenum_close(e);
   4517     } else {
   4518         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
   4519     }
   4520 
   4521     e = ucol_getKeywordValues(KW[0], &ec);
   4522     if (e != NULL) {
   4523         assertSuccess("ucol_getKeywordValues", &ec);
   4524         assertTrue("ucol_getKeywordValues!=0", e!=0);
   4525         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
   4526         /* Don't need to check n because we check list */
   4527         uenum_close(e);
   4528     } else {
   4529         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
   4530     }
   4531 
   4532     /* Try setting a warning before calling ucol_getKeywordValues */
   4533     ec = U_USING_FALLBACK_WARNING;
   4534     e = ucol_getKeywordValues(KW[0], &ec);
   4535     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
   4536         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
   4537         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
   4538         /* Don't need to check n because we check list */
   4539         uenum_close(e);
   4540     }
   4541 
   4542     /*
   4543 U_DRAFT int32_t U_EXPORT2
   4544 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
   4545                              const char* locale, UBool* isAvailable,
   4546                              UErrorCode* status);
   4547 }
   4548 */
   4549     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
   4550                                      &isAvailable, &ec);
   4551     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   4552         assertEquals("getFunctionalEquivalent(de)", "de", loc);
   4553         assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
   4554                    isAvailable == TRUE);
   4555     }
   4556 
   4557     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
   4558                                      &isAvailable, &ec);
   4559     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   4560         assertEquals("getFunctionalEquivalent(de_DE)", "de", loc);
   4561         assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
   4562                    isAvailable == TRUE);
   4563     }
   4564 }
   4565 
   4566 /* supercedes TestJ784 */
   4567 static void TestBeforePinyin(void) {
   4568     const static char rules[] = {
   4569         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
   4570         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
   4571         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
   4572         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
   4573         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
   4574         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
   4575     };
   4576 
   4577     const static char *test[] = {
   4578         "l\\u0101",
   4579         "la",
   4580         "l\\u0101n",
   4581         "lan ",
   4582         "l\\u0113",
   4583         "le",
   4584         "l\\u0113n",
   4585         "len"
   4586     };
   4587 
   4588     const static char *test2[] = {
   4589         "x\\u0101",
   4590         "x\\u0100",
   4591         "X\\u0101",
   4592         "X\\u0100",
   4593         "x\\u00E1",
   4594         "x\\u00C1",
   4595         "X\\u00E1",
   4596         "X\\u00C1",
   4597         "x\\u01CE",
   4598         "x\\u01CD",
   4599         "X\\u01CE",
   4600         "X\\u01CD",
   4601         "x\\u00E0",
   4602         "x\\u00C0",
   4603         "X\\u00E0",
   4604         "X\\u00C0",
   4605         "xa",
   4606         "xA",
   4607         "Xa",
   4608         "XA",
   4609         "x\\u0101x",
   4610         "x\\u0100x",
   4611         "x\\u00E1x",
   4612         "x\\u00C1x",
   4613         "x\\u01CEx",
   4614         "x\\u01CDx",
   4615         "x\\u00E0x",
   4616         "x\\u00C0x",
   4617         "xax",
   4618         "xAx"
   4619     };
   4620 
   4621     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
   4622     genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
   4623     genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
   4624     genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
   4625 }
   4626 
   4627 static void TestBeforeTightening(void) {
   4628     static const struct {
   4629         const char *rules;
   4630         UErrorCode expectedStatus;
   4631     } tests[] = {
   4632         { "&[before 1]a<x", U_ZERO_ERROR },
   4633         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
   4634         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
   4635         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
   4636         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
   4637         { "&[before 2]a<<x",U_ZERO_ERROR },
   4638         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
   4639         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
   4640         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
   4641         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
   4642         { "&[before 3]a<<<x",U_ZERO_ERROR },
   4643         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
   4644         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
   4645     };
   4646 
   4647     int32_t i = 0;
   4648 
   4649     UErrorCode status = U_ZERO_ERROR;
   4650     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   4651     uint32_t rlen = 0;
   4652 
   4653     UCollator *coll = NULL;
   4654 
   4655 
   4656     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   4657         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
   4658         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   4659         if(status != tests[i].expectedStatus) {
   4660             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
   4661                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
   4662         }
   4663         ucol_close(coll);
   4664         status = U_ZERO_ERROR;
   4665     }
   4666 
   4667 }
   4668 
   4669 #if 0
   4670 &m < a
   4671 &[before 1] a < x <<< X << q <<< Q < z
   4672 assert: m <<< M < x <<< X << q <<< Q < z < a < n
   4673 
   4674 &m < a
   4675 &[before 2] a << x <<< X << q <<< Q < z
   4676 assert: m <<< M < x <<< X << q <<< Q << a < z < n
   4677 
   4678 &m < a
   4679 &[before 3] a <<< x <<< X << q <<< Q < z
   4680 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
   4681 
   4682 
   4683 &m << a
   4684 &[before 1] a < x <<< X << q <<< Q < z
   4685 assert: x <<< X << q <<< Q < z < m <<< M << a < n
   4686 
   4687 &m << a
   4688 &[before 2] a << x <<< X << q <<< Q < z
   4689 assert: m <<< M << x <<< X << q <<< Q << a < z < n
   4690 
   4691 &m << a
   4692 &[before 3] a <<< x <<< X << q <<< Q < z
   4693 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
   4694 
   4695 
   4696 &m <<< a
   4697 &[before 1] a < x <<< X << q <<< Q < z
   4698 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
   4699 
   4700 &m <<< a
   4701 &[before 2] a << x <<< X << q <<< Q < z
   4702 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
   4703 
   4704 &m <<< a
   4705 &[before 3] a <<< x <<< X << q <<< Q < z
   4706 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
   4707 
   4708 
   4709 &[before 1] s < x <<< X << q <<< Q < z
   4710 assert: r <<< R < x <<< X << q <<< Q < z < s < n
   4711 
   4712 &[before 2] s << x <<< X << q <<< Q < z
   4713 assert: r <<< R < x <<< X << q <<< Q << s < z < n
   4714 
   4715 &[before 3] s <<< x <<< X << q <<< Q < z
   4716 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
   4717 
   4718 
   4719 &[before 1] \u24DC < x <<< X << q <<< Q < z
   4720 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
   4721 
   4722 &[before 2] \u24DC << x <<< X << q <<< Q < z
   4723 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
   4724 
   4725 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
   4726 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
   4727 #endif
   4728 
   4729 
   4730 #if 0
   4731 /* requires features not yet supported */
   4732 static void TestMoreBefore(void) {
   4733     static const struct {
   4734         const char* rules;
   4735         const char* order[16];
   4736         int32_t size;
   4737     } tests[] = {
   4738         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
   4739         { "m","M","x","X","q","Q","z","a","n" }, 9},
   4740         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
   4741         { "m","M","x","X","q","Q","a","z","n" }, 9},
   4742         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
   4743         { "m","M","x","X","a","q","Q","z","n" }, 9},
   4744         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
   4745         { "x","X","q","Q","z","m","M","a","n" }, 9},
   4746         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
   4747         { "m","M","x","X","q","Q","a","z","n" }, 9},
   4748         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
   4749         { "m","M","x","X","a","q","Q","z","n" }, 9},
   4750         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
   4751         { "x","X","q","Q","z","n","m","a","M" }, 9},
   4752         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
   4753         { "x","X","q","Q","m","a","M","z","n" }, 9},
   4754         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
   4755         { "m","x","X","a","M","q","Q","z","n" }, 9},
   4756         { "&[before 1] s < x <<< X << q <<< Q < z",
   4757         { "r","R","x","X","q","Q","z","s","n" }, 9},
   4758         { "&[before 2] s << x <<< X << q <<< Q < z",
   4759         { "r","R","x","X","q","Q","s","z","n" }, 9},
   4760         { "&[before 3] s <<< x <<< X << q <<< Q < z",
   4761         { "r","R","x","X","s","q","Q","z","n" }, 9},
   4762         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
   4763         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
   4764         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
   4765         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
   4766         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
   4767         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
   4768     };
   4769 
   4770     int32_t i = 0;
   4771 
   4772     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   4773         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
   4774     }
   4775 }
   4776 #endif
   4777 
   4778 static void TestTailorNULL( void ) {
   4779     const static char* rule = "&a <<< '\\u0000'";
   4780     UErrorCode status = U_ZERO_ERROR;
   4781     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   4782     uint32_t rlen = 0;
   4783     UChar a = 1, null = 0;
   4784     UCollationResult res = UCOL_EQUAL;
   4785 
   4786     UCollator *coll = NULL;
   4787 
   4788 
   4789     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
   4790     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   4791 
   4792     if(U_FAILURE(status)) {
   4793         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
   4794     } else {
   4795         res = ucol_strcoll(coll, &a, 1, &null, 1);
   4796 
   4797         if(res != UCOL_LESS) {
   4798             log_err("NULL was not tailored properly!\n");
   4799         }
   4800     }
   4801 
   4802     ucol_close(coll);
   4803 }
   4804 
   4805 static void
   4806 TestUpperFirstQuaternary(void)
   4807 {
   4808   const char* tests[] = { "B", "b", "Bb", "bB" };
   4809   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
   4810   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
   4811   genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
   4812 }
   4813 
   4814 static void
   4815 TestJ4960(void)
   4816 {
   4817   const char* tests[] = { "\\u00e2T", "aT" };
   4818   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
   4819   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
   4820   const char* tests2[] = { "a", "A" };
   4821   const char* rule = "&[first tertiary ignorable]=A=a";
   4822   UColAttribute att2[] = { UCOL_CASE_LEVEL };
   4823   UColAttributeValue attVals2[] = { UCOL_ON };
   4824   /* Test whether we correctly ignore primary ignorables on case level when */
   4825   /* we have only primary & case level */
   4826   genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
   4827   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
   4828   /* and case level */
   4829   genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
   4830   /* Test whether completely ignorable letters have case level info (they shouldn't) */
   4831   genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
   4832 }
   4833 
   4834 static void
   4835 TestJ5223(void)
   4836 {
   4837   static const char *test = "this is a test string";
   4838   UChar ustr[256];
   4839   int32_t ustr_length = u_unescape(test, ustr, 256);
   4840   unsigned char sortkey[256];
   4841   int32_t sortkey_length;
   4842   UErrorCode status = U_ZERO_ERROR;
   4843   static UCollator *coll = NULL;
   4844   coll = ucol_open("root", &status);
   4845   if(U_FAILURE(status)) {
   4846     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   4847     return;
   4848   }
   4849   ucol_setStrength(coll, UCOL_PRIMARY);
   4850   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   4851   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   4852   if (U_FAILURE(status)) {
   4853     log_err("Failed setting atributes\n");
   4854     return;
   4855   }
   4856   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
   4857   if (sortkey_length > 256) return;
   4858 
   4859   /* we mark the position where the null byte should be written in advance */
   4860   sortkey[sortkey_length-1] = 0xAA;
   4861 
   4862   /* we set the buffer size one byte higher than needed */
   4863   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
   4864     sortkey_length+1);
   4865 
   4866   /* no error occurs (for me) */
   4867   if (sortkey[sortkey_length-1] == 0xAA) {
   4868     log_err("Hit bug at first try\n");
   4869   }
   4870 
   4871   /* we mark the position where the null byte should be written again */
   4872   sortkey[sortkey_length-1] = 0xAA;
   4873 
   4874   /* this time we set the buffer size to the exact amount needed */
   4875   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
   4876     sortkey_length);
   4877 
   4878   /* now the trailing null byte is not written */
   4879   if (sortkey[sortkey_length-1] == 0xAA) {
   4880     log_err("Hit bug at second try\n");
   4881   }
   4882 
   4883   ucol_close(coll);
   4884 }
   4885 
   4886 /* Regression test for Thai partial sort key problem */
   4887 static void
   4888 TestJ5232(void)
   4889 {
   4890     const static char *test[] = {
   4891         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
   4892         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
   4893     };
   4894 
   4895     genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
   4896 }
   4897 
   4898 static void
   4899 TestJ5367(void)
   4900 {
   4901     const static char *test[] = { "a", "y" };
   4902     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
   4903     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
   4904 }
   4905 
   4906 static void
   4907 TestVI5913(void)
   4908 {
   4909     UErrorCode status = U_ZERO_ERROR;
   4910     int32_t i, j;
   4911     UCollator *coll =NULL;
   4912     uint8_t  resColl[100], expColl[100];
   4913     int32_t  rLen, tLen, ruleLen, sLen, kLen;
   4914     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &a<0x1FF3-omega with Ypogegrammeni*/
   4915     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
   4916     UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0};  /* &z<a+e with circumflex.*/
   4917     static const UChar tData[][20]={
   4918         {0x1EAC, 0},
   4919         {0x0041, 0x0323, 0x0302, 0},
   4920         {0x1EA0, 0x0302, 0},
   4921         {0x00C2, 0x0323, 0},
   4922         {0x1ED8, 0},  /* O with dot and circumflex */
   4923         {0x1ECC, 0x0302, 0},
   4924         {0x1EB7, 0},
   4925         {0x1EA1, 0x0306, 0},
   4926     };
   4927     static const UChar tailorData[][20]={
   4928         {0x1FA2, 0},  /* Omega with 3 combining marks */
   4929         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
   4930         {0x1FF3, 0x0313, 0x0300, 0},
   4931         {0x1F60, 0x0300, 0x0345, 0},
   4932         {0x1F62, 0x0345, 0},
   4933         {0x1FA0, 0x0300, 0},
   4934     };
   4935     static const UChar tailorData2[][20]={
   4936         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
   4937         {0x0073, 0x0323, 0x030C, 0},
   4938         {0x0073, 0x030C, 0x0323, 0},
   4939     };
   4940     static const UChar tailorData3[][20]={
   4941         {0x007a, 0},  /*  z */
   4942         {0x0061, 0x0065, 0},  /*  a + e */
   4943         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
   4944         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
   4945         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
   4946         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
   4947         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
   4948         {0x00EA, 0},  /* e with circumflex  */
   4949     };
   4950 
   4951     /* Test Vietnamese sort. */
   4952     coll = ucol_open("vi", &status);
   4953     if(U_FAILURE(status)) {
   4954         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
   4955         return;
   4956     }
   4957     log_verbose("\n\nVI collation:");
   4958     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
   4959         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
   4960     }
   4961     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
   4962         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
   4963     }
   4964     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
   4965         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
   4966     }
   4967     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
   4968         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
   4969     }
   4970 
   4971     for (j=0; j<8; j++) {
   4972         tLen = u_strlen(tData[j]);
   4973         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
   4974         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
   4975         for(i = 0; i<rLen; i++) {
   4976             log_verbose(" %02X", resColl[i]);
   4977         }
   4978     }
   4979 
   4980     ucol_close(coll);
   4981 
   4982     /* Test Romanian sort. */
   4983     coll = ucol_open("ro", &status);
   4984     log_verbose("\n\nRO collation:");
   4985     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
   4986         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
   4987     }
   4988     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
   4989         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
   4990     }
   4991     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
   4992         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
   4993     }
   4994 
   4995     for (j=4; j<8; j++) {
   4996         tLen = u_strlen(tData[j]);
   4997         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
   4998         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
   4999         for(i = 0; i<rLen; i++) {
   5000             log_verbose(" %02X", resColl[i]);
   5001         }
   5002     }
   5003     ucol_close(coll);
   5004 
   5005     /* Test the precomposed Greek character with 3 combining marks. */
   5006     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
   5007     ruleLen = u_strlen(rule);
   5008     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5009     if (U_FAILURE(status)) {
   5010         log_err("ucol_openRules failed with %s\n", u_errorName(status));
   5011         return;
   5012     }
   5013     sLen = u_strlen(tailorData[0]);
   5014     for (j=1; j<6; j++) {
   5015         tLen = u_strlen(tailorData[j]);
   5016         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
   5017             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
   5018         }
   5019     }
   5020     /* Test getSortKey. */
   5021     tLen = u_strlen(tailorData[0]);
   5022     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
   5023     for (j=0; j<6; j++) {
   5024         tLen = u_strlen(tailorData[j]);
   5025         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
   5026         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   5027             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5028             for(i = 0; i<rLen; i++) {
   5029                 log_err(" %02X", resColl[i]);
   5030             }
   5031         }
   5032     }
   5033     ucol_close(coll);
   5034 
   5035     log_verbose("\n\nTailoring test for s with caron:");
   5036     ruleLen = u_strlen(rule2);
   5037     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5038     tLen = u_strlen(tailorData2[0]);
   5039     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
   5040     for (j=1; j<3; j++) {
   5041         tLen = u_strlen(tailorData2[j]);
   5042         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
   5043         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   5044             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5045             for(i = 0; i<rLen; i++) {
   5046                 log_err(" %02X", resColl[i]);
   5047             }
   5048         }
   5049     }
   5050     ucol_close(coll);
   5051 
   5052     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
   5053     ruleLen = u_strlen(rule3);
   5054     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5055     tLen = u_strlen(tailorData3[3]);
   5056     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
   5057     for (j=4; j<6; j++) {
   5058         tLen = u_strlen(tailorData3[j]);
   5059         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
   5060 
   5061         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   5062             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5063             for(i = 0; i<rLen; i++) {
   5064                 log_err(" %02X", resColl[i]);
   5065             }
   5066         }
   5067 
   5068         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   5069          for(i = 0; i<rLen; i++) {
   5070              log_verbose(" %02X", resColl[i]);
   5071          }
   5072     }
   5073     ucol_close(coll);
   5074 }
   5075 
   5076 static void
   5077 TestTailor6179(void)
   5078 {
   5079     UErrorCode status = U_ZERO_ERROR;
   5080     int32_t i;
   5081     UCollator *coll =NULL;
   5082     uint8_t  resColl[100];
   5083     int32_t  rLen, tLen, ruleLen;
   5084     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
   5085     UChar rule1[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
   5086             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
   5087             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
   5088             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
   5089     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
   5090     UChar rule2[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
   5091             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
   5092             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
   5093             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
   5094             0x3C,0x3C,0x20,0x62,0};
   5095 
   5096     UChar tData1[][20]={
   5097         {0x61, 0},
   5098         {0x62, 0},
   5099         { 0xFDD0,0x009E, 0}
   5100     };
   5101     UChar tData2[][20]={
   5102             {0x61, 0},
   5103             {0x62, 0},
   5104             { 0xFDD0,0x009E, 0}
   5105      };
   5106 
   5107     /*
   5108      * These values from FractionalUCA.txt will change,
   5109      * and need to be updated here.
   5110      */
   5111     uint8_t firstPrimaryIgnCE[6]={1, 87, 1, 5, 1, 0};
   5112     uint8_t lastPrimaryIgnCE[6]={1, 0xE3, 0xC9, 1, 5, 0};
   5113     uint8_t firstSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};
   5114     uint8_t lastSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};
   5115 
   5116     /* Test [Last Primary ignorable] */
   5117 
   5118     log_verbose("\n\nTailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b ");
   5119     ruleLen = u_strlen(rule1);
   5120     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5121     if (U_FAILURE(status)) {
   5122         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
   5123         return;
   5124     }
   5125     tLen = u_strlen(tData1[0]);
   5126     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
   5127     if (uprv_memcmp(resColl, lastPrimaryIgnCE, uprv_min(rLen,6)) < 0) {
   5128         log_err("\n Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
   5129         for(i = 0; i<rLen; i++) {
   5130             log_err(" %02X", resColl[i]);
   5131         }
   5132     }
   5133     tLen = u_strlen(tData1[1]);
   5134     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
   5135     if (uprv_memcmp(resColl, firstPrimaryIgnCE, uprv_min(rLen, 6)) < 0) {
   5136         log_err("\n Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
   5137         for(i = 0; i<rLen; i++) {
   5138             log_err(" %02X", resColl[i]);
   5139         }
   5140     }
   5141     ucol_close(coll);
   5142 
   5143 
   5144     /* Test [Last Secondary ignorable] */
   5145     log_verbose("\n\nTailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b ");
   5146     ruleLen = u_strlen(rule1);
   5147     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5148     if (U_FAILURE(status)) {
   5149         log_err("Tailoring test: &[last primary ignorable] failed!");
   5150         return;
   5151     }
   5152     tLen = u_strlen(tData2[0]);
   5153     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
   5154     log_verbose("\n Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
   5155     for(i = 0; i<rLen; i++) {
   5156         log_verbose(" %02X", resColl[i]);
   5157     }
   5158     if (uprv_memcmp(resColl, lastSecondaryIgnCE, uprv_min(rLen, 3)) < 0) {
   5159         log_err("\n Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
   5160         for(i = 0; i<rLen; i++) {
   5161             log_err(" %02X", resColl[i]);
   5162         }
   5163     }
   5164     tLen = u_strlen(tData2[1]);
   5165     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
   5166     log_verbose("\n Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
   5167     for(i = 0; i<rLen; i++) {
   5168         log_verbose(" %02X", resColl[i]);
   5169     }
   5170     if (uprv_memcmp(resColl, firstSecondaryIgnCE, uprv_min(rLen, 4)) < 0) {
   5171         log_err("\n Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
   5172         for(i = 0; i<rLen; i++) {
   5173             log_err(" %02X", resColl[i]);
   5174         }
   5175     }
   5176     ucol_close(coll);
   5177 }
   5178 
   5179 static void
   5180 TestUCAPrecontext(void)
   5181 {
   5182     UErrorCode status = U_ZERO_ERROR;
   5183     int32_t i, j;
   5184     UCollator *coll =NULL;
   5185     uint8_t  resColl[100], prevColl[100];
   5186     int32_t  rLen, tLen, ruleLen;
   5187     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
   5188     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
   5189     /* & l middle-dot << a  a is an expansion. */
   5190 
   5191     UChar tData1[][20]={
   5192             { 0xb7, 0},  /* standalone middle dot(0xb7) */
   5193             { 0x387, 0}, /* standalone middle dot(0x387) */
   5194             { 0x61, 0},  /* a */
   5195             { 0x6C, 0},  /* l */
   5196             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
   5197             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
   5198             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
   5199             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
   5200             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
   5201             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
   5202             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
   5203      };
   5204 
   5205     log_verbose("\n\nEN collation:");
   5206     coll = ucol_open("en", &status);
   5207     if (U_FAILURE(status)) {
   5208         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
   5209         return;
   5210     }
   5211     for (j=0; j<11; j++) {
   5212         tLen = u_strlen(tData1[j]);
   5213         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5214         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5215             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5216                     j, tData1[j]);
   5217         }
   5218         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5219         for(i = 0; i<rLen; i++) {
   5220             log_verbose(" %02X", resColl[i]);
   5221         }
   5222         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5223      }
   5224      ucol_close(coll);
   5225 
   5226 
   5227      log_verbose("\n\nJA collation:");
   5228      coll = ucol_open("ja", &status);
   5229      if (U_FAILURE(status)) {
   5230          log_err("Tailoring test: &z <<a|- failed!");
   5231          return;
   5232      }
   5233      for (j=0; j<11; j++) {
   5234          tLen = u_strlen(tData1[j]);
   5235          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5236          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5237              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5238                      j, tData1[j]);
   5239          }
   5240          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5241          for(i = 0; i<rLen; i++) {
   5242              log_verbose(" %02X", resColl[i]);
   5243          }
   5244          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5245       }
   5246       ucol_close(coll);
   5247 
   5248 
   5249       log_verbose("\n\nTailoring test: & middle dot < a ");
   5250       ruleLen = u_strlen(rule1);
   5251       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5252       if (U_FAILURE(status)) {
   5253           log_err("Tailoring test: & middle dot < a failed!");
   5254           return;
   5255       }
   5256       for (j=0; j<11; j++) {
   5257           tLen = u_strlen(tData1[j]);
   5258           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5259           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5260               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5261                       j, tData1[j]);
   5262           }
   5263           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5264           for(i = 0; i<rLen; i++) {
   5265               log_verbose(" %02X", resColl[i]);
   5266           }
   5267           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5268        }
   5269        ucol_close(coll);
   5270 
   5271 
   5272        log_verbose("\n\nTailoring test: & l middle-dot << a ");
   5273        ruleLen = u_strlen(rule2);
   5274        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   5275        if (U_FAILURE(status)) {
   5276            log_err("Tailoring test: & l middle-dot << a failed!");
   5277            return;
   5278        }
   5279        for (j=0; j<11; j++) {
   5280            tLen = u_strlen(tData1[j]);
   5281            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   5282            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   5283                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   5284                        j, tData1[j]);
   5285            }
   5286            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
   5287                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
   5288                        j, tData1[j]);
   5289            }
   5290            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   5291            for(i = 0; i<rLen; i++) {
   5292                log_verbose(" %02X", resColl[i]);
   5293            }
   5294            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   5295         }
   5296         ucol_close(coll);
   5297 }
   5298 
   5299 static void
   5300 TestOutOfBuffer5468(void)
   5301 {
   5302     static const char *test = "\\u4e00";
   5303     UChar ustr[256];
   5304     int32_t ustr_length = u_unescape(test, ustr, 256);
   5305     unsigned char shortKeyBuf[1];
   5306     int32_t sortkey_length;
   5307     UErrorCode status = U_ZERO_ERROR;
   5308     static UCollator *coll = NULL;
   5309 
   5310     coll = ucol_open("root", &status);
   5311     if(U_FAILURE(status)) {
   5312       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   5313       return;
   5314     }
   5315     ucol_setStrength(coll, UCOL_PRIMARY);
   5316     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   5317     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   5318     if (U_FAILURE(status)) {
   5319       log_err("Failed setting atributes\n");
   5320       return;
   5321     }
   5322 
   5323     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
   5324     if (sortkey_length != 4) {
   5325         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
   5326     }
   5327     log_verbose("length of sortKey is %d", sortkey_length);
   5328     ucol_close(coll);
   5329 }
   5330 
   5331 #define TSKC_DATA_SIZE 5
   5332 #define TSKC_BUF_SIZE  50
   5333 static void
   5334 TestSortKeyConsistency(void)
   5335 {
   5336     UErrorCode icuRC = U_ZERO_ERROR;
   5337     UCollator* ucol;
   5338     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
   5339 
   5340     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
   5341     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
   5342     int32_t i, j, i2;
   5343 
   5344     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
   5345     if (U_FAILURE(icuRC))
   5346     {
   5347         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
   5348         return;
   5349     }
   5350 
   5351     for (i = 0; i < TSKC_DATA_SIZE; i++)
   5352     {
   5353         UCharIterator uiter;
   5354         uint32_t state[2] = { 0, 0 };
   5355         int32_t dataLen = i+1;
   5356         for (j=0; j<TSKC_BUF_SIZE; j++)
   5357             bufFull[i][j] = bufPart[i][j] = 0;
   5358 
   5359         /* Full sort key */
   5360         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
   5361 
   5362         /* Partial sort key */
   5363         uiter_setString(&uiter, data, dataLen);
   5364         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
   5365         if (U_FAILURE(icuRC))
   5366         {
   5367             log_err("ucol_nextSortKeyPart failed\n");
   5368             ucol_close(ucol);
   5369             return;
   5370         }
   5371 
   5372         for (i2=0; i2<i; i2++)
   5373         {
   5374             UBool fullMatch = TRUE;
   5375             UBool partMatch = TRUE;
   5376             for (j=0; j<TSKC_BUF_SIZE; j++)
   5377             {
   5378                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
   5379                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
   5380             }
   5381             if (fullMatch != partMatch) {
   5382                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
   5383                                   : "partial key was consistent, but full key changed\n");
   5384                 ucol_close(ucol);
   5385                 return;
   5386             }
   5387         }
   5388     }
   5389 
   5390     /*=============================================*/
   5391    ucol_close(ucol);
   5392 }
   5393 
   5394 /* ticket: 6101 */
   5395 static void TestCroatianSortKey(void) {
   5396     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
   5397     UErrorCode status = U_ZERO_ERROR;
   5398     UCollator *ucol;
   5399     UCharIterator iter;
   5400 
   5401     static const UChar text[] = { 0x0044, 0xD81A };
   5402 
   5403     size_t length = sizeof(text)/sizeof(*text);
   5404 
   5405     uint8_t textSortKey[32];
   5406     size_t lenSortKey = 32;
   5407     size_t actualSortKeyLen;
   5408     uint32_t uStateInfo[2] = { 0, 0 };
   5409 
   5410     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
   5411     if (U_FAILURE(status)) {
   5412         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
   5413         return;
   5414     }
   5415 
   5416     uiter_setString(&iter, text, length);
   5417 
   5418     actualSortKeyLen = ucol_nextSortKeyPart(
   5419         ucol, &iter, (uint32_t*)uStateInfo,
   5420         textSortKey, lenSortKey, &status
   5421         );
   5422 
   5423     if (actualSortKeyLen == lenSortKey) {
   5424         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
   5425     }
   5426 
   5427     ucol_close(ucol);
   5428 }
   5429 
   5430 /* ticket: 6140 */
   5431 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
   5432  * they are both Hiragana and Katakana
   5433  */
   5434 #define SORTKEYLEN 50
   5435 static void TestHiragana(void) {
   5436     UErrorCode status = U_ZERO_ERROR;
   5437     UCollator* ucol;
   5438     UCollationResult strcollresult;
   5439     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
   5440     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
   5441     int32_t data1Len = sizeof(data1)/sizeof(*data1);
   5442     int32_t data2Len = sizeof(data2)/sizeof(*data2);
   5443     int32_t i, j;
   5444     uint8_t sortKey1[SORTKEYLEN];
   5445     uint8_t sortKey2[SORTKEYLEN];
   5446 
   5447     UCharIterator uiter1;
   5448     UCharIterator uiter2;
   5449     uint32_t state1[2] = { 0, 0 };
   5450     uint32_t state2[2] = { 0, 0 };
   5451     int32_t keySize1;
   5452     int32_t keySize2;
   5453 
   5454     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
   5455             &status);
   5456     if (U_FAILURE(status)) {
   5457         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
   5458         return;
   5459     }
   5460 
   5461     /* Start of full sort keys */
   5462     /* Full sort key1 */
   5463     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
   5464     /* Full sort key2 */
   5465     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
   5466     if (keySize1 == keySize2) {
   5467         for (i = 0; i < keySize1; i++) {
   5468             if (sortKey1[i] != sortKey2[i]) {
   5469                 log_err("Full sort keys are different. Should be equal.");
   5470             }
   5471         }
   5472     } else {
   5473         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
   5474     }
   5475     /* End of full sort keys */
   5476 
   5477     /* Start of partial sort keys */
   5478     /* Partial sort key1 */
   5479     uiter_setString(&uiter1, data1, data1Len);
   5480     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
   5481     /* Partial sort key2 */
   5482     uiter_setString(&uiter2, data2, data2Len);
   5483     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
   5484     if (U_SUCCESS(status) && keySize1 == keySize2) {
   5485         for (j = 0; j < keySize1; j++) {
   5486             if (sortKey1[j] != sortKey2[j]) {
   5487                 log_err("Partial sort keys are different. Should be equal");
   5488             }
   5489         }
   5490     } else {
   5491         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
   5492     }
   5493     /* End of partial sort keys */
   5494 
   5495     /* Start of strcoll */
   5496     /* Use ucol_strcoll() to determine ordering */
   5497     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
   5498     if (strcollresult != UCOL_EQUAL) {
   5499         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
   5500     }
   5501 
   5502     ucol_close(ucol);
   5503 }
   5504 
   5505 /* Convenient struct for running collation tests */
   5506 typedef struct {
   5507   const UChar source[MAX_TOKEN_LEN];  /* String on left */
   5508   const UChar target[MAX_TOKEN_LEN];  /* String on right */
   5509   UCollationResult result;            /* -1, 0 or +1, depending on collation */
   5510 } OneTestCase;
   5511 
   5512 /*
   5513  * Utility function to test one collation test case.
   5514  * @param testcases Array of test cases.
   5515  * @param n_testcases Size of the array testcases.
   5516  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
   5517  * @param n_rules Size of the array str_rules.
   5518  */
   5519 static void doTestOneTestCase(const OneTestCase testcases[],
   5520                               int n_testcases,
   5521                               const char* str_rules[],
   5522                               int n_rules)
   5523 {
   5524   int rule_no, testcase_no;
   5525   UChar rule[500];
   5526   int32_t length = 0;
   5527   UErrorCode status = U_ZERO_ERROR;
   5528   UParseError parse_error;
   5529   UCollator  *myCollation;
   5530 
   5531   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
   5532 
   5533     length = u_unescape(str_rules[rule_no], rule, 500);
   5534     if (length == 0) {
   5535         log_err("ERROR: The rule cannot be unescaped: %s\n");
   5536         return;
   5537     }
   5538     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
   5539     if(U_FAILURE(status)){
   5540         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5541         return;
   5542     }
   5543     log_verbose("Testing the <<* syntax\n");
   5544     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   5545     ucol_setStrength(myCollation, UCOL_TERTIARY);
   5546     for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
   5547       doTest(myCollation,
   5548              testcases[testcase_no].source,
   5549              testcases[testcase_no].target,
   5550              testcases[testcase_no].result
   5551              );
   5552     }
   5553     ucol_close(myCollation);
   5554   }
   5555 }
   5556 
   5557 const static OneTestCase rangeTestcases[] = {
   5558   { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
   5559   { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
   5560   { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
   5561 
   5562   { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
   5563   { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
   5564   { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
   5565   { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
   5566   { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
   5567 
   5568   { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
   5569   { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
   5570   { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
   5571   { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
   5572 
   5573   { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
   5574   { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
   5575   { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
   5576   { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
   5577   { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
   5578   { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
   5579   { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
   5580   { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
   5581 };
   5582 
   5583 static int nRangeTestcases = LEN(rangeTestcases);
   5584 
   5585 const static OneTestCase rangeTestcasesSupplemental[] = {
   5586   { {0xfffe},                            {0xffff},                          UCOL_LESS }, /* U+FFFE < U+FFFF */
   5587   { {0xffff},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFF < U+10000 */
   5588   { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
   5589   { {0xfffe},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+FFFE < U+10001 */
   5590   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
   5591   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
   5592   { {0xfffe},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+FFFE < U+10001 */
   5593 };
   5594 
   5595 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
   5596 
   5597 const static OneTestCase rangeTestcasesQwerty[] = {
   5598   { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
   5599   { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
   5600 
   5601   { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
   5602   { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
   5603 
   5604   { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
   5605   { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
   5606 
   5607   { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
   5608   { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
   5609 
   5610   { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
   5611     {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
   5612   { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
   5613     {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
   5614 };
   5615 
   5616 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
   5617 
   5618 static void TestSameStrengthList(void)
   5619 {
   5620   const char* strRules[] = {
   5621     /* Normal */
   5622     "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
   5623 
   5624     /* Lists */
   5625     "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
   5626   };
   5627   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   5628 }
   5629 
   5630 static void TestSameStrengthListQuoted(void)
   5631 {
   5632   const char* strRules[] = {
   5633     /* Lists with quoted characters */
   5634     "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
   5635     "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
   5636 
   5637     "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
   5638     "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
   5639 
   5640     "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
   5641     "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
   5642   };
   5643   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   5644 }
   5645 
   5646 static void TestSameStrengthListSupplemental(void)
   5647 {
   5648   const char* strRules[] = {
   5649     "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
   5650     "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
   5651     "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
   5652     "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
   5653   };
   5654   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
   5655 }
   5656 
   5657 static void TestSameStrengthListQwerty(void)
   5658 {
   5659   const char* strRules[] = {
   5660     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
   5661     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
   5662     "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
   5663     "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
   5664     "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
   5665 
   5666     /* Quoted characters also will work if two quoted characters are not consecutive.  */
   5667     "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
   5668 
   5669     /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
   5670     /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
   5671 
   5672  };
   5673   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
   5674 }
   5675 
   5676 static void TestSameStrengthListQuotedQwerty(void)
   5677 {
   5678   const char* strRules[] = {
   5679     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
   5680     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
   5681     "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
   5682 
   5683     /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
   5684     /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
   5685    };
   5686   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
   5687 }
   5688 
   5689 static void TestSameStrengthListRanges(void)
   5690 {
   5691   const char* strRules[] = {
   5692     "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
   5693   };
   5694   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   5695 }
   5696 
   5697 static void TestSameStrengthListSupplementalRanges(void)
   5698 {
   5699   const char* strRules[] = {
   5700     "&\\ufffe<*\\uffff-\\U00010002",
   5701   };
   5702   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
   5703 }
   5704 
   5705 static void TestSpecialCharacters(void)
   5706 {
   5707   const char* strRules[] = {
   5708     /* Normal */
   5709     "&';'<'+'<','<'-'<'&'<'*'",
   5710 
   5711     /* List */
   5712     "&';'<*'+,-&*'",
   5713 
   5714     /* Range */
   5715     "&';'<*'+'-'-&*'",
   5716   };
   5717 
   5718   const static OneTestCase specialCharacterStrings[] = {
   5719     { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
   5720     { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
   5721     { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
   5722     { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
   5723   };
   5724   doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
   5725 }
   5726 
   5727 static void TestPrivateUseCharacters(void)
   5728 {
   5729   const char* strRules[] = {
   5730     /* Normal */
   5731     "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
   5732     "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
   5733   };
   5734 
   5735   const static OneTestCase privateUseCharacterStrings[] = {
   5736     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   5737     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   5738     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   5739     { {0xe2da}, {0xe2db}, UCOL_LESS },
   5740     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   5741     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   5742   };
   5743   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5744 }
   5745 
   5746 static void TestPrivateUseCharactersInList(void)
   5747 {
   5748   const char* strRules[] = {
   5749     /* List */
   5750     "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
   5751     /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
   5752     "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
   5753   };
   5754 
   5755   const static OneTestCase privateUseCharacterStrings[] = {
   5756     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   5757     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   5758     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   5759     { {0xe2da}, {0xe2db}, UCOL_LESS },
   5760     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   5761     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   5762   };
   5763   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5764 }
   5765 
   5766 static void TestPrivateUseCharactersInRange(void)
   5767 {
   5768   const char* strRules[] = {
   5769     /* Range */
   5770     "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
   5771     "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
   5772     /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
   5773   };
   5774 
   5775   const static OneTestCase privateUseCharacterStrings[] = {
   5776     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   5777     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   5778     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   5779     { {0xe2da}, {0xe2db}, UCOL_LESS },
   5780     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   5781     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   5782   };
   5783   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5784 }
   5785 
   5786 static void TestInvalidListsAndRanges(void)
   5787 {
   5788   const char* invalidRules[] = {
   5789     /* Range not in starred expression */
   5790     "&\\ufffe<\\uffff-\\U00010002",
   5791 
   5792     /* Range without start */
   5793     "&a<*-c",
   5794 
   5795     /* Range without end */
   5796     "&a<*b-",
   5797 
   5798     /* More than one hyphen */
   5799     "&a<*b-g-l",
   5800 
   5801     /* Range in the wrong order */
   5802     "&a<*k-b",
   5803 
   5804   };
   5805 
   5806   UChar rule[500];
   5807   UErrorCode status = U_ZERO_ERROR;
   5808   UParseError parse_error;
   5809   int n_rules = LEN(invalidRules);
   5810   int rule_no;
   5811   int length;
   5812   UCollator  *myCollation;
   5813 
   5814   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
   5815 
   5816     length = u_unescape(invalidRules[rule_no], rule, 500);
   5817     if (length == 0) {
   5818         log_err("ERROR: The rule cannot be unescaped: %s\n");
   5819         return;
   5820     }
   5821     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
   5822     if(!U_FAILURE(status)){
   5823       log_err("ERROR: Could not cause a failure as expected: \n");
   5824     }
   5825     status = U_ZERO_ERROR;
   5826   }
   5827 }
   5828 
   5829 /*
   5830  * This test ensures that characters placed before a character in a different script have the same lead byte
   5831  * in their collation key before and after script reordering.
   5832  */
   5833 static void TestBeforeRuleWithScriptReordering(void)
   5834 {
   5835     UParseError error;
   5836     UErrorCode status = U_ZERO_ERROR;
   5837     UCollator  *myCollation;
   5838     char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
   5839     UChar rules[500];
   5840     uint32_t rulesLength = 0;
   5841     int32_t reorderCodes[1] = {USCRIPT_GREEK};
   5842     UCollationResult collResult;
   5843 
   5844     uint8_t baseKey[256];
   5845     uint32_t baseKeyLength;
   5846     uint8_t beforeKey[256];
   5847     uint32_t beforeKeyLength;
   5848 
   5849     UChar base[] = { 0x03b1 }; /* base */
   5850     int32_t baseLen = sizeof(base)/sizeof(*base);
   5851 
   5852     UChar before[] = { 0x0e01 }; /* ko kai */
   5853     int32_t beforeLen = sizeof(before)/sizeof(*before);
   5854 
   5855     /*UChar *data[] = { before, base };
   5856     genericRulesStarter(srules, data, 2);*/
   5857 
   5858     log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
   5859 
   5860 
   5861     /* build collator */
   5862     log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
   5863 
   5864     rulesLength = u_unescape(srules, rules, LEN(rules));
   5865     myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   5866     if(U_FAILURE(status)) {
   5867         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5868         return;
   5869     }
   5870 
   5871     /* check collation results - before rule applied but not script reordering */
   5872     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
   5873     if (collResult != UCOL_GREATER) {
   5874         log_err("Collation result not correct before script reordering = %d\n", collResult);
   5875     }
   5876 
   5877     /* check the lead byte of the collation keys before script reordering */
   5878     baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
   5879     beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
   5880     if (baseKey[0] != beforeKey[0]) {
   5881       log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
   5882    }
   5883 
   5884     /* reorder the scripts */
   5885     ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
   5886     if(U_FAILURE(status)) {
   5887         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
   5888         return;
   5889     }
   5890 
   5891     /* check collation results - before rule applied and after script reordering */
   5892     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
   5893     if (collResult != UCOL_GREATER) {
   5894         log_err("Collation result not correct after script reordering = %d\n", collResult);
   5895     }
   5896 
   5897     /* check the lead byte of the collation keys after script reordering */
   5898     ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
   5899     ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
   5900     if (baseKey[0] != beforeKey[0]) {
   5901         log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
   5902     }
   5903 
   5904     ucol_close(myCollation);
   5905 }
   5906 
   5907 /*
   5908  * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
   5909  */
   5910 static void TestNonLeadBytesDuringCollationReordering(void)
   5911 {
   5912     UErrorCode status = U_ZERO_ERROR;
   5913     UCollator  *myCollation;
   5914     int32_t reorderCodes[1] = {USCRIPT_GREEK};
   5915     UCollationResult collResult;
   5916 
   5917     uint8_t baseKey[256];
   5918     uint32_t baseKeyLength;
   5919     uint8_t reorderKey[256];
   5920     uint32_t reorderKeyLength;
   5921 
   5922     UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
   5923 
   5924     int i;
   5925 
   5926 
   5927     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   5928 
   5929     /* build collator tertiary */
   5930     myCollation = ucol_open("", &status);
   5931     ucol_setStrength(myCollation, UCOL_TERTIARY);
   5932     if(U_FAILURE(status)) {
   5933         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5934         return;
   5935     }
   5936     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
   5937 
   5938     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   5939     if(U_FAILURE(status)) {
   5940         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   5941         return;
   5942     }
   5943     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
   5944 
   5945     if (baseKeyLength != reorderKeyLength) {
   5946         log_err("Key lengths not the same during reordering.\n", collResult);
   5947         return;
   5948     }
   5949 
   5950     for (i = 1; i < baseKeyLength; i++) {
   5951         if (baseKey[i] != reorderKey[i]) {
   5952             log_err("Collation key bytes not the same at position %d.\n", i);
   5953             return;
   5954         }
   5955     }
   5956     ucol_close(myCollation);
   5957 
   5958     /* build collator quaternary */
   5959     myCollation = ucol_open("", &status);
   5960     ucol_setStrength(myCollation, UCOL_QUATERNARY);
   5961     if(U_FAILURE(status)) {
   5962         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5963         return;
   5964     }
   5965     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
   5966 
   5967     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   5968     if(U_FAILURE(status)) {
   5969         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   5970         return;
   5971     }
   5972     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
   5973 
   5974     if (baseKeyLength != reorderKeyLength) {
   5975         log_err("Key lengths not the same during reordering.\n", collResult);
   5976         return;
   5977     }
   5978 
   5979     for (i = 1; i < baseKeyLength; i++) {
   5980         if (baseKey[i] != reorderKey[i]) {
   5981             log_err("Collation key bytes not the same at position %d.\n", i);
   5982             return;
   5983         }
   5984     }
   5985     ucol_close(myCollation);
   5986 }
   5987 
   5988 /*
   5989  * Test reordering API.
   5990  */
   5991 static void TestReorderingAPI(void)
   5992 {
   5993     UErrorCode status = U_ZERO_ERROR;
   5994     UCollator  *myCollation;
   5995     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   5996     UCollationResult collResult;
   5997     int32_t retrievedReorderCodesLength;
   5998     UChar greekString[] = { 0x03b1 };
   5999     UChar punctuationString[] = { 0x203e };
   6000 
   6001     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   6002 
   6003     /* build collator tertiary */
   6004     myCollation = ucol_open("", &status);
   6005     ucol_setStrength(myCollation, UCOL_TERTIARY);
   6006     if(U_FAILURE(status)) {
   6007         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6008         return;
   6009     }
   6010 
   6011     /* set the reorderding */
   6012     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   6013     if (U_FAILURE(status)) {
   6014         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   6015         return;
   6016     }
   6017 
   6018     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   6019     if (status != U_BUFFER_OVERFLOW_ERROR) {
   6020         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
   6021         return;
   6022     }
   6023     status = U_ZERO_ERROR;
   6024     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   6025         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   6026         return;
   6027     }
   6028     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   6029     if (collResult != UCOL_LESS) {
   6030         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
   6031         return;
   6032     }
   6033 
   6034     /* clear the reordering */
   6035     ucol_setReorderCodes(myCollation, NULL, 0, &status);
   6036     if (U_FAILURE(status)) {
   6037         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
   6038         return;
   6039     }
   6040 
   6041     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   6042     if (retrievedReorderCodesLength != 0) {
   6043         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
   6044         return;
   6045     }
   6046 
   6047     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   6048     if (collResult != UCOL_GREATER) {
   6049         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   6050         return;
   6051     }
   6052 
   6053     ucol_close(myCollation);
   6054 }
   6055 
   6056 /*
   6057  * Utility function to test one collation reordering test case.
   6058  * @param testcases Array of test cases.
   6059  * @param n_testcases Size of the array testcases.
   6060  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
   6061  * @param n_rules Size of the array str_rules.
   6062  */
   6063 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
   6064 {
   6065     int testCaseNum;
   6066     UErrorCode status = U_ZERO_ERROR;
   6067     UCollator  *myCollation;
   6068 
   6069     for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
   6070         myCollation = ucol_open("", &status);
   6071         if (U_FAILURE(status)) {
   6072             log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   6073             return;
   6074         }
   6075         ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
   6076         if(U_FAILURE(status)) {
   6077             log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
   6078             return;
   6079         }
   6080 
   6081         for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
   6082             doTest(myCollation,
   6083                 testCases[testCaseNum].source,
   6084                 testCases[testCaseNum].target,
   6085                 testCases[testCaseNum].result
   6086             );
   6087         }
   6088         ucol_close(myCollation);
   6089     }
   6090 }
   6091 
   6092 static void TestGreekFirstReorder(void)
   6093 {
   6094     const char* strRules[] = {
   6095         "[reorder Grek]"
   6096     };
   6097 
   6098     const int32_t apiRules[] = {
   6099         USCRIPT_GREEK
   6100     };
   6101 
   6102     const static OneTestCase privateUseCharacterStrings[] = {
   6103         { {0x0391}, {0x0391}, UCOL_EQUAL },
   6104         { {0x0041}, {0x0391}, UCOL_GREATER },
   6105         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
   6106         { {0x0060}, {0x0391}, UCOL_LESS },
   6107         { {0x0391}, {0xe2dc}, UCOL_LESS },
   6108         { {0x0391}, {0x0060}, UCOL_GREATER },
   6109     };
   6110 
   6111     /* Test rules creation */
   6112     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6113 
   6114     /* Test collation reordering API */
   6115     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   6116 }
   6117 
   6118 static void TestGreekLastReorder(void)
   6119 {
   6120     const char* strRules[] = {
   6121         "[reorder Zzzz Grek]"
   6122     };
   6123 
   6124     const int32_t apiRules[] = {
   6125         USCRIPT_UNKNOWN, USCRIPT_GREEK
   6126     };
   6127 
   6128     const static OneTestCase privateUseCharacterStrings[] = {
   6129         { {0x0391}, {0x0391}, UCOL_EQUAL },
   6130         { {0x0041}, {0x0391}, UCOL_LESS },
   6131         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
   6132         { {0x0060}, {0x0391}, UCOL_LESS },
   6133         { {0x0391}, {0xe2dc}, UCOL_GREATER },
   6134     };
   6135 
   6136     /* Test rules creation */
   6137     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6138 
   6139     /* Test collation reordering API */
   6140     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   6141 }
   6142 
   6143 static void TestNonScriptReorder(void)
   6144 {
   6145     const char* strRules[] = {
   6146         "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
   6147     };
   6148 
   6149     const int32_t apiRules[] = {
   6150         USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
   6151         UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
   6152         UCOL_REORDER_CODE_CURRENCY
   6153     };
   6154 
   6155     const static OneTestCase privateUseCharacterStrings[] = {
   6156         { {0x0391}, {0x0041}, UCOL_LESS },
   6157         { {0x0041}, {0x0391}, UCOL_GREATER },
   6158         { {0x0060}, {0x0041}, UCOL_LESS },
   6159         { {0x0060}, {0x0391}, UCOL_GREATER },
   6160         { {0x0024}, {0x0041}, UCOL_GREATER },
   6161     };
   6162 
   6163     /* Test rules creation */
   6164     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6165 
   6166     /* Test collation reordering API */
   6167     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   6168 }
   6169 
   6170 static void TestHaniReorder(void)
   6171 {
   6172     const char* strRules[] = {
   6173         "[reorder Hani]"
   6174     };
   6175     const int32_t apiRules[] = {
   6176         USCRIPT_HAN
   6177     };
   6178 
   6179     const static OneTestCase privateUseCharacterStrings[] = {
   6180         { {0x4e00}, {0x0041}, UCOL_LESS },
   6181         { {0x4e00}, {0x0060}, UCOL_GREATER },
   6182         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
   6183         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
   6184         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
   6185         { {0xfa27}, {0x0041}, UCOL_LESS },
   6186         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
   6187     };
   6188 
   6189     /* Test rules creation */
   6190     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   6191 
   6192     /* Test collation reordering API */
   6193     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   6194 }
   6195 
   6196 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
   6197 {
   6198   for (; *a == *b; ++a, ++b) {
   6199     if (*a == 0) {
   6200       return 0;
   6201     }
   6202   }
   6203   return (*a < *b ? -1 : 1);
   6204 }
   6205 
   6206 static void TestImport(void)
   6207 {
   6208     UCollator* vicoll;
   6209     UCollator* escoll;
   6210     UCollator* viescoll;
   6211     UCollator* importviescoll;
   6212     UParseError error;
   6213     UErrorCode status = U_ZERO_ERROR;
   6214     UChar* virules;
   6215     int32_t viruleslength;
   6216     UChar* esrules;
   6217     int32_t esruleslength;
   6218     UChar* viesrules;
   6219     int32_t viesruleslength;
   6220     char srules[500] = "[import vi][import es]";
   6221     UChar rules[500];
   6222     uint32_t length = 0;
   6223     int32_t itemCount;
   6224     int32_t i, k;
   6225     UChar32 start;
   6226     UChar32 end;
   6227     UChar str[500];
   6228     int32_t strLength;
   6229 
   6230     uint8_t sk1[500];
   6231     uint8_t sk2[500];
   6232 
   6233     UBool b;
   6234     USet* tailoredSet;
   6235     USet* importTailoredSet;
   6236 
   6237 
   6238     vicoll = ucol_open("vi", &status);
   6239     if(U_FAILURE(status)){
   6240         log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
   6241         return;
   6242     }
   6243 
   6244     virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
   6245     escoll = ucol_open("es", &status);
   6246     esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
   6247     viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
   6248     viesrules[0] = 0;
   6249     u_strcat(viesrules, virules);
   6250     u_strcat(viesrules, esrules);
   6251     viesruleslength = viruleslength + esruleslength;
   6252     viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   6253 
   6254     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
   6255     length = u_unescape(srules, rules, 500);
   6256     importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
   6257     if(U_FAILURE(status)){
   6258         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   6259         return;
   6260     }
   6261 
   6262     tailoredSet = ucol_getTailoredSet(viescoll, &status);
   6263     importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
   6264 
   6265     if(!uset_equals(tailoredSet, importTailoredSet)){
   6266         log_err("Tailored sets not equal");
   6267     }
   6268 
   6269     uset_close(importTailoredSet);
   6270 
   6271     itemCount = uset_getItemCount(tailoredSet);
   6272 
   6273     for( i = 0; i < itemCount; i++){
   6274         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
   6275         if(strLength < 2){
   6276             for (; start <= end; start++){
   6277                 k = 0;
   6278                 U16_APPEND(str, k, 500, start, b);
   6279                 ucol_getSortKey(viescoll, str, 1, sk1, 500);
   6280                 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
   6281                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
   6282                     log_err("Sort key for %s not equal\n", str);
   6283                     break;
   6284                 }
   6285             }
   6286         }else{
   6287             ucol_getSortKey(viescoll, str, strLength, sk1, 500);
   6288             ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
   6289             if(compare_uint8_t_arrays(sk1, sk2) != 0){
   6290                 log_err("ZZSort key for %s not equal\n", str);
   6291                 break;
   6292             }
   6293 
   6294         }
   6295     }
   6296 
   6297     uset_close(tailoredSet);
   6298 
   6299     uprv_free(viesrules);
   6300 
   6301     ucol_close(vicoll);
   6302     ucol_close(escoll);
   6303     ucol_close(viescoll);
   6304     ucol_close(importviescoll);
   6305 }
   6306 
   6307 static void TestImportWithType(void)
   6308 {
   6309     UCollator* vicoll;
   6310     UCollator* decoll;
   6311     UCollator* videcoll;
   6312     UCollator* importvidecoll;
   6313     UParseError error;
   6314     UErrorCode status = U_ZERO_ERROR;
   6315     const UChar* virules;
   6316     int32_t viruleslength;
   6317     const UChar* derules;
   6318     int32_t deruleslength;
   6319     UChar* viderules;
   6320     int32_t videruleslength;
   6321     const char srules[500] = "[import vi][import de-u-co-phonebk]";
   6322     UChar rules[500];
   6323     uint32_t length = 0;
   6324     int32_t itemCount;
   6325     int32_t i, k;
   6326     UChar32 start;
   6327     UChar32 end;
   6328     UChar str[500];
   6329     int32_t strLength;
   6330 
   6331     uint8_t sk1[500];
   6332     uint8_t sk2[500];
   6333 
   6334     USet* tailoredSet;
   6335     USet* importTailoredSet;
   6336 
   6337     vicoll = ucol_open("vi", &status);
   6338     if(U_FAILURE(status)){
   6339         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   6340         return;
   6341     }
   6342     virules = ucol_getRules(vicoll, &viruleslength);
   6343     /* decoll = ucol_open("de@collation=phonebook", &status); */
   6344     decoll = ucol_open("de-u-co-phonebk", &status);
   6345     if(U_FAILURE(status)){
   6346         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   6347         return;
   6348     }
   6349 
   6350 
   6351     derules = ucol_getRules(decoll, &deruleslength);
   6352     viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
   6353     viderules[0] = 0;
   6354     u_strcat(viderules, virules);
   6355     u_strcat(viderules, derules);
   6356     videruleslength = viruleslength + deruleslength;
   6357     videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   6358 
   6359     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
   6360     length = u_unescape(srules, rules, 500);
   6361     importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
   6362     if(U_FAILURE(status)){
   6363         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   6364         return;
   6365     }
   6366 
   6367     tailoredSet = ucol_getTailoredSet(videcoll, &status);
   6368     importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
   6369 
   6370     if(!uset_equals(tailoredSet, importTailoredSet)){
   6371         log_err("Tailored sets not equal");
   6372     }
   6373 
   6374     uset_close(importTailoredSet);
   6375 
   6376     itemCount = uset_getItemCount(tailoredSet);
   6377 
   6378     for( i = 0; i < itemCount; i++){
   6379         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
   6380         if(strLength < 2){
   6381             for (; start <= end; start++){
   6382                 k = 0;
   6383                 U16_APPEND_UNSAFE(str, k, start);
   6384                 ucol_getSortKey(videcoll, str, 1, sk1, 500);
   6385                 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
   6386                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
   6387                     log_err("Sort key for %s not equal\n", str);
   6388                     break;
   6389                 }
   6390             }
   6391         }else{
   6392             ucol_getSortKey(videcoll, str, strLength, sk1, 500);
   6393             ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
   6394             if(compare_uint8_t_arrays(sk1, sk2) != 0){
   6395                 log_err("Sort key for %s not equal\n", str);
   6396                 break;
   6397             }
   6398 
   6399         }
   6400     }
   6401 
   6402     uset_close(tailoredSet);
   6403 
   6404     uprv_free(viderules);
   6405 
   6406     ucol_close(videcoll);
   6407     ucol_close(importvidecoll);
   6408     ucol_close(vicoll);
   6409     ucol_close(decoll);
   6410 
   6411 }
   6412 
   6413 
   6414 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
   6415 
   6416 void addMiscCollTest(TestNode** root)
   6417 {
   6418     TEST(TestRuleOptions);
   6419     TEST(TestBeforePrefixFailure);
   6420     TEST(TestContractionClosure);
   6421     TEST(TestPrefixCompose);
   6422     TEST(TestStrCollIdenticalPrefix);
   6423     TEST(TestPrefix);
   6424     TEST(TestNewJapanese);
   6425     /*TEST(TestLimitations);*/
   6426     TEST(TestNonChars);
   6427     TEST(TestExtremeCompression);
   6428     TEST(TestSurrogates);
   6429     /* BEGIN android-removed
   6430        To save space, Android does not include the collation tailoring rules.
   6431        We skip the tailing tests for collations. */
   6432     /* TEST(TestVariableTopSetting); */
   6433     /* END android-removed */
   6434     TEST(TestBocsuCoverage);
   6435     TEST(TestCyrillicTailoring);
   6436     TEST(TestCase);
   6437     TEST(IncompleteCntTest);
   6438     TEST(BlackBirdTest);
   6439     TEST(FunkyATest);
   6440     TEST(BillFairmanTest);
   6441     TEST(RamsRulesTest);
   6442     TEST(IsTailoredTest);
   6443     TEST(TestCollations);
   6444     TEST(TestChMove);
   6445     TEST(TestImplicitTailoring);
   6446     TEST(TestFCDProblem);
   6447     TEST(TestEmptyRule);
   6448     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
   6449     TEST(TestJ815);
   6450     /*TEST(TestJ831);*/ /* we changed lv locale */
   6451     TEST(TestBefore);
   6452     TEST(TestRedundantRules);
   6453     TEST(TestExpansionSyntax);
   6454     TEST(TestHangulTailoring);
   6455     TEST(TestUCARules);
   6456     TEST(TestIncrementalNormalize);
   6457     TEST(TestComposeDecompose);
   6458     TEST(TestCompressOverlap);
   6459     TEST(TestContraction);
   6460     TEST(TestExpansion);
   6461     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
   6462     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
   6463     TEST(TestOptimize);
   6464     TEST(TestSuppressContractions);
   6465     TEST(Alexis2);
   6466     TEST(TestHebrewUCA);
   6467     TEST(TestPartialSortKeyTermination);
   6468     TEST(TestSettings);
   6469     TEST(TestEquals);
   6470     TEST(TestJ2726);
   6471     TEST(NullRule);
   6472     TEST(TestNumericCollation);
   6473     TEST(TestTibetanConformance);
   6474     TEST(TestPinyinProblem);
   6475     TEST(TestImplicitGeneration);
   6476     TEST(TestSeparateTrees);
   6477     TEST(TestBeforePinyin);
   6478     TEST(TestBeforeTightening);
   6479     /*TEST(TestMoreBefore);*/
   6480     TEST(TestTailorNULL);
   6481     TEST(TestUpperFirstQuaternary);
   6482     TEST(TestJ4960);
   6483     TEST(TestJ5223);
   6484     TEST(TestJ5232);
   6485     TEST(TestJ5367);
   6486     TEST(TestHiragana);
   6487     TEST(TestSortKeyConsistency);
   6488     TEST(TestVI5913);  /* VI, RO tailored rules */
   6489     TEST(TestCroatianSortKey);
   6490     TEST(TestTailor6179);
   6491     TEST(TestUCAPrecontext);
   6492     TEST(TestOutOfBuffer5468);
   6493     TEST(TestSameStrengthList);
   6494 
   6495     TEST(TestSameStrengthListQuoted);
   6496     TEST(TestSameStrengthListSupplemental);
   6497     TEST(TestSameStrengthListQwerty);
   6498     TEST(TestSameStrengthListQuotedQwerty);
   6499     TEST(TestSameStrengthListRanges);
   6500     TEST(TestSameStrengthListSupplementalRanges);
   6501     TEST(TestSpecialCharacters);
   6502     TEST(TestPrivateUseCharacters);
   6503     TEST(TestPrivateUseCharactersInList);
   6504     TEST(TestPrivateUseCharactersInRange);
   6505     TEST(TestInvalidListsAndRanges);
   6506     TEST(TestImport);
   6507     TEST(TestImportWithType);
   6508 
   6509     TEST(TestBeforeRuleWithScriptReordering);
   6510     TEST(TestNonLeadBytesDuringCollationReordering);
   6511     TEST(TestReorderingAPI);
   6512     TEST(TestGreekFirstReorder);
   6513     TEST(TestGreekLastReorder);
   6514     TEST(TestNonScriptReorder);
   6515     TEST(TestHaniReorder);
   6516 }
   6517 
   6518 #endif /* #if !UCONFIG_NO_COLLATION */
   6519