Home | History | Annotate | Download | only in cintltst
      1 
      2 /********************************************************************
      3  * COPYRIGHT:
      4  * Copyright (c) 2001-2014, International Business Machines Corporation and
      5  * others. All Rights Reserved.
      6  ********************************************************************/
      7 /*******************************************************************************
      8 *
      9 * File cmsccoll.C
     10 *
     11 *******************************************************************************/
     12 /**
     13  * These are the tests specific to ICU 1.8 and above, that I didn't know where
     14  * to fit.
     15  */
     16 
     17 #include <stdio.h>
     18 
     19 #include "unicode/utypes.h"
     20 
     21 #if !UCONFIG_NO_COLLATION
     22 
     23 #include "unicode/ucol.h"
     24 #include "unicode/ucoleitr.h"
     25 #include "unicode/uloc.h"
     26 #include "cintltst.h"
     27 #include "ccolltst.h"
     28 #include "callcoll.h"
     29 #include "unicode/ustring.h"
     30 #include "string.h"
     31 #include "ucol_imp.h"
     32 #include "cmemory.h"
     33 #include "cstring.h"
     34 #include "uassert.h"
     35 #include "unicode/parseerr.h"
     36 #include "unicode/ucnv.h"
     37 #include "unicode/ures.h"
     38 #include "unicode/uscript.h"
     39 #include "unicode/utf16.h"
     40 #include "uparse.h"
     41 #include "putilimp.h"
     42 
     43 
     44 #define LEN(a) (sizeof(a)/sizeof(a[0]))
     45 
     46 #define MAX_TOKEN_LEN 16
     47 
     48 typedef UCollationResult tst_strcoll(void *collator, const int object,
     49                         const UChar *source, const int sLen,
     50                         const UChar *target, const int tLen);
     51 
     52 
     53 
     54 const static char cnt1[][10] = {
     55 
     56   "AA",
     57   "AC",
     58   "AZ",
     59   "AQ",
     60   "AB",
     61   "ABZ",
     62   "ABQ",
     63   "Z",
     64   "ABC",
     65   "Q",
     66   "B"
     67 };
     68 
     69 const static char cnt2[][10] = {
     70   "DA",
     71   "DAD",
     72   "DAZ",
     73   "MAR",
     74   "Z",
     75   "DAVIS",
     76   "MARK",
     77   "DAV",
     78   "DAVI"
     79 };
     80 
     81 static void IncompleteCntTest(void)
     82 {
     83   UErrorCode status = U_ZERO_ERROR;
     84   UChar temp[90];
     85   UChar t1[90];
     86   UChar t2[90];
     87 
     88   UCollator *coll =  NULL;
     89   uint32_t i = 0, j = 0;
     90   uint32_t size = 0;
     91 
     92   u_uastrcpy(temp, " & Z < ABC < Q < B");
     93 
     94   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
     95 
     96   if(U_SUCCESS(status)) {
     97     size = sizeof(cnt1)/sizeof(cnt1[0]);
     98     for(i = 0; i < size-1; i++) {
     99       for(j = i+1; j < size; j++) {
    100         UCollationElements *iter;
    101         u_uastrcpy(t1, cnt1[i]);
    102         u_uastrcpy(t2, cnt1[j]);
    103         doTest(coll, t1, t2, UCOL_LESS);
    104         /* synwee : added collation element iterator test */
    105         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    106         if (U_FAILURE(status)) {
    107           log_err("Creation of iterator failed\n");
    108           break;
    109         }
    110         backAndForth(iter);
    111         ucol_closeElements(iter);
    112       }
    113     }
    114   }
    115 
    116   ucol_close(coll);
    117 
    118 
    119   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
    120   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
    121 
    122   if(U_SUCCESS(status)) {
    123     size = sizeof(cnt2)/sizeof(cnt2[0]);
    124     for(i = 0; i < size-1; i++) {
    125       for(j = i+1; j < size; j++) {
    126         UCollationElements *iter;
    127         u_uastrcpy(t1, cnt2[i]);
    128         u_uastrcpy(t2, cnt2[j]);
    129         doTest(coll, t1, t2, UCOL_LESS);
    130 
    131         /* synwee : added collation element iterator test */
    132         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    133         if (U_FAILURE(status)) {
    134           log_err("Creation of iterator failed\n");
    135           break;
    136         }
    137         backAndForth(iter);
    138         ucol_closeElements(iter);
    139       }
    140     }
    141   }
    142 
    143   ucol_close(coll);
    144 
    145 
    146 }
    147 
    148 const static char shifted[][20] = {
    149   "black bird",
    150   "black-bird",
    151   "blackbird",
    152   "black Bird",
    153   "black-Bird",
    154   "blackBird",
    155   "black birds",
    156   "black-birds",
    157   "blackbirds"
    158 };
    159 
    160 const static UCollationResult shiftedTert[] = {
    161   UCOL_EQUAL,
    162   UCOL_EQUAL,
    163   UCOL_EQUAL,
    164   UCOL_LESS,
    165   UCOL_EQUAL,
    166   UCOL_EQUAL,
    167   UCOL_LESS,
    168   UCOL_EQUAL,
    169   UCOL_EQUAL
    170 };
    171 
    172 const static char nonignorable[][20] = {
    173   "black bird",
    174   "black Bird",
    175   "black birds",
    176   "black-bird",
    177   "black-Bird",
    178   "black-birds",
    179   "blackbird",
    180   "blackBird",
    181   "blackbirds"
    182 };
    183 
    184 static void BlackBirdTest(void) {
    185   UErrorCode status = U_ZERO_ERROR;
    186   UChar t1[90];
    187   UChar t2[90];
    188 
    189   uint32_t i = 0, j = 0;
    190   uint32_t size = 0;
    191   UCollator *coll = ucol_open("en_US", &status);
    192 
    193   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
    194   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
    195 
    196   if(U_SUCCESS(status)) {
    197     size = sizeof(nonignorable)/sizeof(nonignorable[0]);
    198     for(i = 0; i < size-1; i++) {
    199       for(j = i+1; j < size; j++) {
    200         u_uastrcpy(t1, nonignorable[i]);
    201         u_uastrcpy(t2, nonignorable[j]);
    202         doTest(coll, t1, t2, UCOL_LESS);
    203       }
    204     }
    205   }
    206 
    207   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
    208   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
    209 
    210   if(U_SUCCESS(status)) {
    211     size = sizeof(shifted)/sizeof(shifted[0]);
    212     for(i = 0; i < size-1; i++) {
    213       for(j = i+1; j < size; j++) {
    214         u_uastrcpy(t1, shifted[i]);
    215         u_uastrcpy(t2, shifted[j]);
    216         doTest(coll, t1, t2, UCOL_LESS);
    217       }
    218     }
    219   }
    220 
    221   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
    222   if(U_SUCCESS(status)) {
    223     size = sizeof(shifted)/sizeof(shifted[0]);
    224     for(i = 1; i < size; i++) {
    225       u_uastrcpy(t1, shifted[i-1]);
    226       u_uastrcpy(t2, shifted[i]);
    227       doTest(coll, t1, t2, shiftedTert[i]);
    228     }
    229   }
    230 
    231   ucol_close(coll);
    232 }
    233 
    234 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
    235     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
    236     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
    237     {0x0041/*'A'*/, 0x0300, 0x0000},
    238     {0x00C0, 0x0301, 0x0000},
    239     /* this would work with forced normalization */
    240     {0x00C0, 0x0316, 0x0000}
    241 };
    242 
    243 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
    244     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    245     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
    246     {0x00C0, 0},
    247     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    248     /* this would work with forced normalization */
    249     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
    250 };
    251 
    252 const static UCollationResult results[] = {
    253     UCOL_GREATER,
    254     UCOL_EQUAL,
    255     UCOL_EQUAL,
    256     UCOL_GREATER,
    257     UCOL_EQUAL
    258 };
    259 
    260 static void FunkyATest(void)
    261 {
    262 
    263     int32_t i;
    264     UErrorCode status = U_ZERO_ERROR;
    265     UCollator  *myCollation;
    266     myCollation = ucol_open("en_US", &status);
    267     if(U_FAILURE(status)){
    268         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    269         return;
    270     }
    271     log_verbose("Testing some A letters, for some reason\n");
    272     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    273     ucol_setStrength(myCollation, UCOL_TERTIARY);
    274     for (i = 0; i < 4 ; i++)
    275     {
    276         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    277     }
    278     ucol_close(myCollation);
    279 }
    280 
    281 UColAttributeValue caseFirst[] = {
    282     UCOL_OFF,
    283     UCOL_LOWER_FIRST,
    284     UCOL_UPPER_FIRST
    285 };
    286 
    287 
    288 UColAttributeValue alternateHandling[] = {
    289     UCOL_NON_IGNORABLE,
    290     UCOL_SHIFTED
    291 };
    292 
    293 UColAttributeValue caseLevel[] = {
    294     UCOL_OFF,
    295     UCOL_ON
    296 };
    297 
    298 UColAttributeValue strengths[] = {
    299     UCOL_PRIMARY,
    300     UCOL_SECONDARY,
    301     UCOL_TERTIARY,
    302     UCOL_QUATERNARY,
    303     UCOL_IDENTICAL
    304 };
    305 
    306 #if 0
    307 static const char * strengthsC[] = {
    308     "UCOL_PRIMARY",
    309     "UCOL_SECONDARY",
    310     "UCOL_TERTIARY",
    311     "UCOL_QUATERNARY",
    312     "UCOL_IDENTICAL"
    313 };
    314 
    315 static const char * caseFirstC[] = {
    316     "UCOL_OFF",
    317     "UCOL_LOWER_FIRST",
    318     "UCOL_UPPER_FIRST"
    319 };
    320 
    321 
    322 static const char * alternateHandlingC[] = {
    323     "UCOL_NON_IGNORABLE",
    324     "UCOL_SHIFTED"
    325 };
    326 
    327 static const char * caseLevelC[] = {
    328     "UCOL_OFF",
    329     "UCOL_ON"
    330 };
    331 
    332 /* not used currently - does not test only prints */
    333 static void PrintMarkDavis(void)
    334 {
    335   UErrorCode status = U_ZERO_ERROR;
    336   UChar m[256];
    337   uint8_t sortkey[256];
    338   UCollator *coll = ucol_open("en_US", &status);
    339   uint32_t h,i,j,k, sortkeysize;
    340   uint32_t sizem = 0;
    341   char buffer[512];
    342   uint32_t len = 512;
    343 
    344   log_verbose("PrintMarkDavis");
    345 
    346   u_uastrcpy(m, "Mark Davis");
    347   sizem = u_strlen(m);
    348 
    349 
    350   m[1] = 0xe4;
    351 
    352   for(i = 0; i<sizem; i++) {
    353     fprintf(stderr, "\\u%04X ", m[i]);
    354   }
    355   fprintf(stderr, "\n");
    356 
    357   for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
    358     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
    359     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
    360 
    361     for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
    362       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
    363       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
    364 
    365       for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
    366         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
    367         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
    368 
    369         for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
    370           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
    371           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
    372           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
    373           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
    374         }
    375 
    376       }
    377 
    378     }
    379 
    380   }
    381 }
    382 #endif
    383 
    384 static void BillFairmanTest(void) {
    385 /*
    386 ** check for actual locale via ICU resource bundles
    387 **
    388 ** lp points to the original locale ("fr_FR_....")
    389 */
    390 
    391     UResourceBundle *lr,*cr;
    392     UErrorCode              lec = U_ZERO_ERROR;
    393     const char *lp = "fr_FR_you_ll_never_find_this_locale";
    394 
    395     log_verbose("BillFairmanTest\n");
    396 
    397     lr = ures_open(NULL,lp,&lec);
    398     if (lr) {
    399         cr = ures_getByKey(lr,"collations",0,&lec);
    400         if (cr) {
    401             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
    402             if (lp) {
    403                 if (U_SUCCESS(lec)) {
    404                     if(strcmp(lp, "fr") != 0) {
    405                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
    406                     }
    407                 }
    408             }
    409             ures_close(cr);
    410         }
    411         ures_close(lr);
    412     }
    413 }
    414 
    415 const static char chTest[][20] = {
    416   "c",
    417   "C",
    418   "ca", "cb", "cx", "cy", "CZ",
    419   "c\\u030C", "C\\u030C",
    420   "h",
    421   "H",
    422   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
    423   "ch", "cH", "Ch", "CH",
    424   "cha", "charly", "che", "chh", "chch", "chr",
    425   "i", "I", "iarly",
    426   "r", "R",
    427   "r\\u030C", "R\\u030C",
    428   "s",
    429   "S",
    430   "s\\u030C", "S\\u030C",
    431   "z", "Z",
    432   "z\\u030C", "Z\\u030C"
    433 };
    434 
    435 static void TestChMove(void) {
    436     UChar t1[256] = {0};
    437     UChar t2[256] = {0};
    438 
    439     uint32_t i = 0, j = 0;
    440     uint32_t size = 0;
    441     UErrorCode status = U_ZERO_ERROR;
    442 
    443     UCollator *coll = ucol_open("cs", &status);
    444 
    445     if(U_SUCCESS(status)) {
    446         size = sizeof(chTest)/sizeof(chTest[0]);
    447         for(i = 0; i < size-1; i++) {
    448             for(j = i+1; j < size; j++) {
    449                 u_unescape(chTest[i], t1, 256);
    450                 u_unescape(chTest[j], t2, 256);
    451                 doTest(coll, t1, t2, UCOL_LESS);
    452             }
    453         }
    454     }
    455     else {
    456         log_data_err("Can't open collator");
    457     }
    458     ucol_close(coll);
    459 }
    460 
    461 
    462 
    463 
    464 /*
    465 const static char impTest[][20] = {
    466   "\\u4e00",
    467     "a",
    468     "A",
    469     "b",
    470     "B",
    471     "\\u4e01"
    472 };
    473 */
    474 
    475 
    476 static void TestImplicitTailoring(void) {
    477   static const struct {
    478     const char *rules;
    479     const char *data[10];
    480     const uint32_t len;
    481   } tests[] = {
    482       {
    483         /* Tailor b and c before U+4E00. */
    484         "&[before 1]\\u4e00 < b < c "
    485         /* Now, before U+4E00 is c; put d and e after that. */
    486         "&[before 1]\\u4e00 < d < e",
    487         { "b", "c", "d", "e", "\\u4e00"}, 5 },
    488       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
    489       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
    490       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
    491   };
    492 
    493   int32_t i = 0;
    494 
    495   for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
    496       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
    497   }
    498 
    499 /*
    500   UChar t1[256] = {0};
    501   UChar t2[256] = {0};
    502 
    503   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
    504 
    505   uint32_t i = 0, j = 0;
    506   uint32_t size = 0;
    507   uint32_t ruleLen = 0;
    508   UErrorCode status = U_ZERO_ERROR;
    509   UCollator *coll = NULL;
    510   ruleLen = u_unescape(rule, t1, 256);
    511 
    512   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
    513 
    514   if(U_SUCCESS(status)) {
    515     size = sizeof(impTest)/sizeof(impTest[0]);
    516     for(i = 0; i < size-1; i++) {
    517       for(j = i+1; j < size; j++) {
    518         u_unescape(impTest[i], t1, 256);
    519         u_unescape(impTest[j], t2, 256);
    520         doTest(coll, t1, t2, UCOL_LESS);
    521       }
    522     }
    523   }
    524   else {
    525     log_err("Can't open collator");
    526   }
    527   ucol_close(coll);
    528   */
    529 }
    530 
    531 static void TestFCDProblem(void) {
    532   UChar t1[256] = {0};
    533   UChar t2[256] = {0};
    534 
    535   const char *s1 = "\\u0430\\u0306\\u0325";
    536   const char *s2 = "\\u04D1\\u0325";
    537 
    538   UErrorCode status = U_ZERO_ERROR;
    539   UCollator *coll = ucol_open("", &status);
    540   u_unescape(s1, t1, 256);
    541   u_unescape(s2, t2, 256);
    542 
    543   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
    544   doTest(coll, t1, t2, UCOL_EQUAL);
    545 
    546   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    547   doTest(coll, t1, t2, UCOL_EQUAL);
    548 
    549   ucol_close(coll);
    550 }
    551 
    552 /*
    553 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
    554 We're only using NFC/NFD in this test.
    555 */
    556 #define NORM_BUFFER_TEST_LEN 18
    557 typedef struct {
    558   UChar32 u;
    559   UChar NFC[NORM_BUFFER_TEST_LEN];
    560   UChar NFD[NORM_BUFFER_TEST_LEN];
    561 } tester;
    562 
    563 static void TestComposeDecompose(void) {
    564     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
    565     static const UChar UNICODESET_STR[] = {
    566         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
    567         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
    568         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
    569     };
    570     int32_t noOfLoc;
    571     int32_t i = 0, j = 0;
    572 
    573     UErrorCode status = U_ZERO_ERROR;
    574     const char *locName = NULL;
    575     uint32_t nfcSize;
    576     uint32_t nfdSize;
    577     tester **t;
    578     uint32_t noCases = 0;
    579     UCollator *coll = NULL;
    580     UChar32 u = 0;
    581     UChar comp[NORM_BUFFER_TEST_LEN];
    582     uint32_t len = 0;
    583     UCollationElements *iter;
    584     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
    585     int32_t charsToTestSize;
    586 
    587     noOfLoc = uloc_countAvailable();
    588 
    589     coll = ucol_open("", &status);
    590     if (U_FAILURE(status)) {
    591         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
    592         return;
    593     }
    594     charsToTestSize = uset_size(charsToTest);
    595     if (charsToTestSize <= 0) {
    596         log_err("Set was zero. Missing data?\n");
    597         return;
    598     }
    599     t = (tester **)malloc(charsToTestSize * sizeof(tester *));
    600     t[0] = (tester *)malloc(sizeof(tester));
    601     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
    602 
    603     for(u = 0; u < charsToTestSize; u++) {
    604         UChar32 ch = uset_charAt(charsToTest, u);
    605         len = 0;
    606         U16_APPEND_UNSAFE(comp, len, ch);
    607         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
    608         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
    609 
    610         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
    611           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
    612             t[noCases]->u = ch;
    613             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
    614                 u_strncpy(t[noCases]->NFC, comp, len);
    615                 t[noCases]->NFC[len] = 0;
    616             }
    617             noCases++;
    618             t[noCases] = (tester *)malloc(sizeof(tester));
    619             uprv_memset(t[noCases], 0, sizeof(tester));
    620         }
    621     }
    622     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
    623     uset_close(charsToTest);
    624     charsToTest = NULL;
    625 
    626     for(u=0; u<(UChar32)noCases; u++) {
    627         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
    628             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
    629             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
    630         }
    631     }
    632     /*
    633     for(u = 0; u < charsToTestSize; u++) {
    634       if(!(u&0xFFFF)) {
    635         log_verbose("%08X ", u);
    636       }
    637       uprv_memset(t[noCases], 0, sizeof(tester));
    638       t[noCases]->u = u;
    639       len = 0;
    640       U16_APPEND_UNSAFE(comp, len, u);
    641       comp[len] = 0;
    642       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
    643       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
    644       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
    645       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
    646     }
    647     */
    648 
    649     ucol_close(coll);
    650 
    651     log_verbose("Testing locales, number of cases = %i\n", noCases);
    652     for(i = 0; i<noOfLoc; i++) {
    653         status = U_ZERO_ERROR;
    654         locName = uloc_getAvailable(i);
    655         if(hasCollationElements(locName)) {
    656             char cName[256];
    657             UChar name[256];
    658             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
    659 
    660             for(j = 0; j<nameSize; j++) {
    661                 cName[j] = (char)name[j];
    662             }
    663             cName[nameSize] = 0;
    664             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
    665 
    666             coll = ucol_open(locName, &status);
    667             ucol_setStrength(coll, UCOL_IDENTICAL);
    668             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
    669 
    670             for(u=0; u<(UChar32)noCases; u++) {
    671                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
    672                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
    673                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
    674                     log_verbose("Testing NFC\n");
    675                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
    676                     backAndForth(iter);
    677                     log_verbose("Testing NFD\n");
    678                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
    679                     backAndForth(iter);
    680                 }
    681             }
    682             ucol_closeElements(iter);
    683             ucol_close(coll);
    684         }
    685     }
    686     for(u = 0; u <= (UChar32)noCases; u++) {
    687         free(t[u]);
    688     }
    689     free(t);
    690 }
    691 
    692 static void TestEmptyRule(void) {
    693   UErrorCode status = U_ZERO_ERROR;
    694   UChar rulez[] = { 0 };
    695   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
    696 
    697   ucol_close(coll);
    698 }
    699 
    700 static void TestUCARules(void) {
    701   UErrorCode status = U_ZERO_ERROR;
    702   UChar b[256];
    703   UChar *rules = b;
    704   uint32_t ruleLen = 0;
    705   UCollator *UCAfromRules = NULL;
    706   UCollator *coll = ucol_open("", &status);
    707   if(status == U_FILE_ACCESS_ERROR) {
    708     log_data_err("Is your data around?\n");
    709     return;
    710   } else if(U_FAILURE(status)) {
    711     log_err("Error opening collator\n");
    712     return;
    713   }
    714   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
    715 
    716   log_verbose("TestUCARules\n");
    717   if(ruleLen > 256) {
    718     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
    719     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
    720   }
    721   log_verbose("Rules length is %d\n", ruleLen);
    722   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
    723   if(U_SUCCESS(status)) {
    724     ucol_close(UCAfromRules);
    725   } else {
    726     log_verbose("Unable to create a collator from UCARules!\n");
    727   }
    728 /*
    729   u_unescape(blah, b, 256);
    730   ucol_getSortKey(coll, b, 1, res, 256);
    731 */
    732   ucol_close(coll);
    733   if(rules != b) {
    734     free(rules);
    735   }
    736 }
    737 
    738 
    739 /* Pinyin tonal order */
    740 /*
    741     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
    742           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
    743     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
    744     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
    745     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
    746     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
    747       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
    748 .. (\u00fc)
    749 
    750 However, in testing we got the following order:
    751     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
    752           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
    753     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
    754 .. (\u0113)
    755     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
    756     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
    757     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
    758 .. (\u01d8)
    759       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
    760 */
    761 
    762 static void TestBefore(void) {
    763   const static char *data[] = {
    764       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
    765       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
    766       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
    767       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
    768       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
    769       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
    770   };
    771   genericRulesStarter(
    772     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
    773     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
    774     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
    775     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
    776     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
    777     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
    778     data, sizeof(data)/sizeof(data[0]));
    779 }
    780 
    781 #if 0
    782 /* superceded by TestBeforePinyin */
    783 static void TestJ784(void) {
    784   const static char *data[] = {
    785       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
    786       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
    787       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
    788       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
    789       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
    790       "\\u00fc",
    791            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
    792   };
    793   genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
    794 }
    795 #endif
    796 
    797 #if 0
    798 /* superceded by the changes to the lv locale */
    799 static void TestJ831(void) {
    800   const static char *data[] = {
    801     "I",
    802       "i",
    803       "Y",
    804       "y"
    805   };
    806   genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
    807 }
    808 #endif
    809 
    810 static void TestJ815(void) {
    811   const static char *data[] = {
    812     "aa",
    813       "Aa",
    814       "ab",
    815       "Ab",
    816       "ad",
    817       "Ad",
    818       "ae",
    819       "Ae",
    820       "\\u00e6",
    821       "\\u00c6",
    822       "af",
    823       "Af",
    824       "b",
    825       "B"
    826   };
    827   genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
    828   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
    829 }
    830 
    831 
    832 static void TestCase(void)
    833 {
    834     const static UChar gRules[MAX_TOKEN_LEN] =
    835     /*" & 0 < 1,\u2461<a,A"*/
    836     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
    837 
    838     const static UChar testCase[][MAX_TOKEN_LEN] =
    839     {
    840         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
    841         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
    842         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
    843         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
    844     };
    845 
    846     const static UCollationResult caseTestResults[][9] =
    847     {
    848         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
    849         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
    850         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
    851         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
    852     };
    853 
    854     const static UColAttributeValue caseTestAttributes[][2] =
    855     {
    856         { UCOL_LOWER_FIRST, UCOL_OFF},
    857         { UCOL_UPPER_FIRST, UCOL_OFF},
    858         { UCOL_LOWER_FIRST, UCOL_ON},
    859         { UCOL_UPPER_FIRST, UCOL_ON}
    860     };
    861     int32_t i,j,k;
    862     UErrorCode status = U_ZERO_ERROR;
    863     UCollationElements *iter;
    864     UCollator  *myCollation;
    865     myCollation = ucol_open("en_US", &status);
    866 
    867     if(U_FAILURE(status)){
    868         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    869         return;
    870     }
    871     log_verbose("Testing different case settings\n");
    872     ucol_setStrength(myCollation, UCOL_TERTIARY);
    873 
    874     for(k = 0; k<4; k++) {
    875       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
    876       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
    877       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
    878       for (i = 0; i < 3 ; i++) {
    879         for(j = i+1; j<4; j++) {
    880           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
    881         }
    882       }
    883     }
    884     ucol_close(myCollation);
    885 
    886     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
    887     if(U_FAILURE(status)){
    888         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    889         return;
    890     }
    891     log_verbose("Testing different case settings with custom rules\n");
    892     ucol_setStrength(myCollation, UCOL_TERTIARY);
    893 
    894     for(k = 0; k<4; k++) {
    895       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
    896       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
    897       for (i = 0; i < 3 ; i++) {
    898         for(j = i+1; j<4; j++) {
    899           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
    900           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
    901           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
    902           backAndForth(iter);
    903           ucol_closeElements(iter);
    904           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
    905           backAndForth(iter);
    906           ucol_closeElements(iter);
    907         }
    908       }
    909     }
    910     ucol_close(myCollation);
    911     {
    912       const static char *lowerFirst[] = {
    913         "h",
    914         "H",
    915         "ch",
    916         "Ch",
    917         "CH",
    918         "cha",
    919         "chA",
    920         "Cha",
    921         "ChA",
    922         "CHa",
    923         "CHA",
    924         "i",
    925         "I"
    926       };
    927 
    928       const static char *upperFirst[] = {
    929         "H",
    930         "h",
    931         "CH",
    932         "Ch",
    933         "ch",
    934         "CHA",
    935         "CHa",
    936         "ChA",
    937         "Cha",
    938         "chA",
    939         "cha",
    940         "I",
    941         "i"
    942       };
    943       log_verbose("mixed case test\n");
    944       log_verbose("lower first, case level off\n");
    945       genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
    946       log_verbose("upper first, case level off\n");
    947       genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
    948       log_verbose("lower first, case level on\n");
    949       genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
    950       log_verbose("upper first, case level on\n");
    951       genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
    952     }
    953 
    954 }
    955 
    956 static void TestIncrementalNormalize(void) {
    957 
    958     /*UChar baseA     =0x61;*/
    959     UChar baseA     =0x41;
    960 /*    UChar baseB     = 0x42;*/
    961     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
    962     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
    963     /*
    964         0x316 is combining grave accent below, cc=220
    965         0x321 is combining palatalized hook below, cc=202
    966         0x300 is combining grave accent, cc=230
    967     */
    968 
    969 #define MAXSLEN 2000
    970     /*int          maxSLen   = 64000;*/
    971     int          sLen;
    972     int          i;
    973 
    974     UCollator        *coll;
    975     UErrorCode       status = U_ZERO_ERROR;
    976     UCollationResult result;
    977 
    978     int32_t myQ = getTestOption(QUICK_OPTION);
    979 
    980     if(getTestOption(QUICK_OPTION) < 0) {
    981         setTestOption(QUICK_OPTION, 1);
    982     }
    983 
    984     {
    985         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
    986         /*          most buffers along the way.*/
    987         UChar            strA[MAXSLEN+1];
    988         UChar            strB[MAXSLEN+1];
    989 
    990         coll = ucol_open("en_US", &status);
    991         if(status == U_FILE_ACCESS_ERROR) {
    992           log_data_err("Is your data around?\n");
    993           return;
    994         } else if(U_FAILURE(status)) {
    995           log_err("Error opening collator\n");
    996           return;
    997         }
    998         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    999 
   1000         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
   1001         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
   1002         /*for (sLen = 1000; sLen<1001; sLen++) {*/
   1003         for (sLen = 500; sLen<501; sLen++) {
   1004         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
   1005             strA[0] = baseA;
   1006             strB[0] = baseA;
   1007             for (i=1; i<=sLen-1; i++) {
   1008                 strA[i] = ccMix[i % 3];
   1009                 strB[sLen-i] = ccMix[i % 3];
   1010             }
   1011             strA[sLen]   = 0;
   1012             strB[sLen]   = 0;
   1013 
   1014             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
   1015             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
   1016             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
   1017             doTest(coll, strA, strB, UCOL_EQUAL);
   1018         }
   1019     }
   1020 
   1021     setTestOption(QUICK_OPTION, myQ);
   1022 
   1023 
   1024     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
   1025     /*         of the string.  Checks a couple of edge cases.*/
   1026 
   1027     {
   1028         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
   1029         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
   1030         ucol_setStrength(coll, UCOL_TERTIARY);
   1031         doTest(coll, strA, strB, UCOL_EQUAL);
   1032     }
   1033 
   1034     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
   1035 
   1036     {
   1037       /* New UCA  3.1.1.
   1038        * test below used a code point from Desseret, which sorts differently
   1039        * than d800 dc00
   1040        */
   1041         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
   1042         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
   1043         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
   1044         ucol_setStrength(coll, UCOL_TERTIARY);
   1045         doTest(coll, strA, strB, UCOL_GREATER);
   1046     }
   1047 
   1048     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
   1049 
   1050     {
   1051         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
   1052         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
   1053         char  sortKeyA[50];
   1054         char  sortKeyAz[50];
   1055         char  sortKeyB[50];
   1056         char  sortKeyBz[50];
   1057         int   r;
   1058 
   1059         /* there used to be -3 here. Hmmmm.... */
   1060         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
   1061         result = ucol_strcoll(coll, strA, 3, strB, 3);
   1062         if (result != UCOL_GREATER) {
   1063             log_err("ERROR 1 in test 4\n");
   1064         }
   1065         result = ucol_strcoll(coll, strA, -1, strB, -1);
   1066         if (result != UCOL_EQUAL) {
   1067             log_err("ERROR 2 in test 4\n");
   1068         }
   1069 
   1070         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   1071         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   1072         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   1073         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   1074 
   1075         r = strcmp(sortKeyA, sortKeyAz);
   1076         if (r <= 0) {
   1077             log_err("Error 3 in test 4\n");
   1078         }
   1079         r = strcmp(sortKeyA, sortKeyB);
   1080         if (r <= 0) {
   1081             log_err("Error 4 in test 4\n");
   1082         }
   1083         r = strcmp(sortKeyAz, sortKeyBz);
   1084         if (r != 0) {
   1085             log_err("Error 5 in test 4\n");
   1086         }
   1087 
   1088         ucol_setStrength(coll, UCOL_IDENTICAL);
   1089         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   1090         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   1091         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   1092         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   1093 
   1094         r = strcmp(sortKeyA, sortKeyAz);
   1095         if (r <= 0) {
   1096             log_err("Error 6 in test 4\n");
   1097         }
   1098         r = strcmp(sortKeyA, sortKeyB);
   1099         if (r <= 0) {
   1100             log_err("Error 7 in test 4\n");
   1101         }
   1102         r = strcmp(sortKeyAz, sortKeyBz);
   1103         if (r != 0) {
   1104             log_err("Error 8 in test 4\n");
   1105         }
   1106         ucol_setStrength(coll, UCOL_TERTIARY);
   1107     }
   1108 
   1109 
   1110     /*  Test 5:  Null characters in non-normal source strings.*/
   1111 
   1112     {
   1113         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
   1114         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
   1115         char  sortKeyA[50];
   1116         char  sortKeyAz[50];
   1117         char  sortKeyB[50];
   1118         char  sortKeyBz[50];
   1119         int   r;
   1120 
   1121         result = ucol_strcoll(coll, strA, 6, strB, 6);
   1122         if (result != UCOL_GREATER) {
   1123             log_err("ERROR 1 in test 5\n");
   1124         }
   1125         result = ucol_strcoll(coll, strA, -1, strB, -1);
   1126         if (result != UCOL_EQUAL) {
   1127             log_err("ERROR 2 in test 5\n");
   1128         }
   1129 
   1130         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   1131         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   1132         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   1133         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   1134 
   1135         r = strcmp(sortKeyA, sortKeyAz);
   1136         if (r <= 0) {
   1137             log_err("Error 3 in test 5\n");
   1138         }
   1139         r = strcmp(sortKeyA, sortKeyB);
   1140         if (r <= 0) {
   1141             log_err("Error 4 in test 5\n");
   1142         }
   1143         r = strcmp(sortKeyAz, sortKeyBz);
   1144         if (r != 0) {
   1145             log_err("Error 5 in test 5\n");
   1146         }
   1147 
   1148         ucol_setStrength(coll, UCOL_IDENTICAL);
   1149         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   1150         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   1151         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   1152         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   1153 
   1154         r = strcmp(sortKeyA, sortKeyAz);
   1155         if (r <= 0) {
   1156             log_err("Error 6 in test 5\n");
   1157         }
   1158         r = strcmp(sortKeyA, sortKeyB);
   1159         if (r <= 0) {
   1160             log_err("Error 7 in test 5\n");
   1161         }
   1162         r = strcmp(sortKeyAz, sortKeyBz);
   1163         if (r != 0) {
   1164             log_err("Error 8 in test 5\n");
   1165         }
   1166         ucol_setStrength(coll, UCOL_TERTIARY);
   1167     }
   1168 
   1169 
   1170     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
   1171 
   1172     {
   1173         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
   1174         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
   1175 
   1176         result = ucol_strcoll(coll, strA, 5, strB, 5);
   1177         if (result != UCOL_LESS) {
   1178             log_err("Error 1 in test 6\n");
   1179         }
   1180         result = ucol_strcoll(coll, strA, -1, strB, -1);
   1181         if (result != UCOL_EQUAL) {
   1182             log_err("Error 2 in test 6\n");
   1183         }
   1184     }
   1185 
   1186     ucol_close(coll);
   1187 }
   1188 
   1189 
   1190 
   1191 #if 0
   1192 static void TestGetCaseBit(void) {
   1193   static const char *caseBitData[] = {
   1194     "a", "A", "ch", "Ch", "CH",
   1195       "\\uFF9E", "\\u0009"
   1196   };
   1197 
   1198   static const uint8_t results[] = {
   1199     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
   1200       UCOL_UPPER_CASE, UCOL_LOWER_CASE
   1201   };
   1202 
   1203   uint32_t i, blen = 0;
   1204   UChar b[256] = {0};
   1205   UErrorCode status = U_ZERO_ERROR;
   1206   UCollator *UCA = ucol_open("", &status);
   1207   uint8_t res = 0;
   1208 
   1209   for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
   1210     blen = u_unescape(caseBitData[i], b, 256);
   1211     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
   1212     if(results[i] != res) {
   1213       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
   1214     }
   1215   }
   1216 }
   1217 #endif
   1218 
   1219 static void TestHangulTailoring(void) {
   1220     static const char *koreanData[] = {
   1221         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
   1222             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
   1223             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
   1224             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
   1225             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
   1226             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
   1227     };
   1228 
   1229     const char *rules =
   1230         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
   1231         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
   1232         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
   1233         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
   1234         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
   1235         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
   1236 
   1237 
   1238   UErrorCode status = U_ZERO_ERROR;
   1239   UChar rlz[2048] = { 0 };
   1240   uint32_t rlen = u_unescape(rules, rlz, 2048);
   1241 
   1242   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   1243   if(status == U_FILE_ACCESS_ERROR) {
   1244     log_data_err("Is your data around?\n");
   1245     return;
   1246   } else if(U_FAILURE(status)) {
   1247     log_err("Error opening collator\n");
   1248     return;
   1249   }
   1250 
   1251   log_verbose("Using start of korean rules\n");
   1252 
   1253   if(U_SUCCESS(status)) {
   1254     genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   1255   } else {
   1256     log_err("Unable to open collator with rules %s\n", rules);
   1257   }
   1258 
   1259   ucol_close(coll);
   1260 
   1261   log_verbose("Using ko__LOTUS locale\n");
   1262   genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   1263 }
   1264 
   1265 /*
   1266  * The secondary/tertiary compression middle byte
   1267  * as used by the current implementation.
   1268  * Subject to change as the sort key compression changes.
   1269  * See class CollationKeys.
   1270  */
   1271 enum {
   1272     SEC_COMMON_MIDDLE = 0x25,  /* range 05..45 */
   1273     TER_ONLY_COMMON_MIDDLE = 0x65  /* range 05..C5 */
   1274 };
   1275 
   1276 static void TestCompressOverlap(void) {
   1277     UChar       secstr[150];
   1278     UChar       tertstr[150];
   1279     UErrorCode  status = U_ZERO_ERROR;
   1280     UCollator  *coll;
   1281     uint8_t     result[500];
   1282     uint32_t    resultlen;
   1283     int         count = 0;
   1284     uint8_t    *tempptr;
   1285 
   1286     coll = ucol_open("", &status);
   1287 
   1288     if (U_FAILURE(status)) {
   1289         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
   1290         return;
   1291     }
   1292     while (count < 149) {
   1293         secstr[count] = 0x0020; /* [06, 05, 05] */
   1294         tertstr[count] = 0x0020;
   1295         count ++;
   1296     }
   1297 
   1298     /* top down compression ----------------------------------- */
   1299     secstr[count] = 0x0332; /* [, 87, 05] */
   1300     tertstr[count] = 0x3000; /* [06, 05, 07] */
   1301 
   1302     /* no compression secstr should have 150 secondary bytes, tertstr should
   1303     have 150 tertiary bytes.
   1304     with correct compression, secstr should have 6 secondary
   1305     bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
   1306     resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
   1307     (void)resultlen;    /* Suppress set but not used warning. */
   1308     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
   1309     while (*(tempptr + 1) != 1) {
   1310         /* the last secondary collation element is not checked since it is not
   1311         part of the compression */
   1312         if (*tempptr < SEC_COMMON_MIDDLE) {
   1313             log_err("Secondary top down compression overlapped\n");
   1314         }
   1315         tempptr ++;
   1316     }
   1317 
   1318     /* tertiary top/bottom/common for en_US is similar to the secondary
   1319     top/bottom/common */
   1320     resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
   1321     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
   1322     while (*(tempptr + 1) != 0) {
   1323         /* the last secondary collation element is not checked since it is not
   1324         part of the compression */
   1325         if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
   1326             log_err("Tertiary top down compression overlapped\n");
   1327         }
   1328         tempptr ++;
   1329     }
   1330 
   1331     /* bottom up compression ------------------------------------- */
   1332     secstr[count] = 0;
   1333     tertstr[count] = 0;
   1334     resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
   1335     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
   1336     while (*(tempptr + 1) != 1) {
   1337         /* the last secondary collation element is not checked since it is not
   1338         part of the compression */
   1339         if (*tempptr > SEC_COMMON_MIDDLE) {
   1340             log_err("Secondary bottom up compression overlapped\n");
   1341         }
   1342         tempptr ++;
   1343     }
   1344 
   1345     /* tertiary top/bottom/common for en_US is similar to the secondary
   1346     top/bottom/common */
   1347     resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
   1348     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
   1349     while (*(tempptr + 1) != 0) {
   1350         /* the last secondary collation element is not checked since it is not
   1351         part of the compression */
   1352         if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
   1353             log_err("Tertiary bottom up compression overlapped\n");
   1354         }
   1355         tempptr ++;
   1356     }
   1357 
   1358     ucol_close(coll);
   1359 }
   1360 
   1361 static void TestCyrillicTailoring(void) {
   1362   static const char *test[] = {
   1363     "\\u0410b",
   1364       "\\u0410\\u0306a",
   1365       "\\u04d0A"
   1366   };
   1367 
   1368     /* Russian overrides contractions, so this test is not valid anymore */
   1369     /*genericLocaleStarter("ru", test, 3);*/
   1370 
   1371     genericLocaleStarter("root", test, 3);
   1372     genericRulesStarter("&\\u0410 = \\u0410", test, 3);
   1373     genericRulesStarter("&Z < \\u0410", test, 3);
   1374     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
   1375     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
   1376     genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
   1377     genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
   1378 }
   1379 
   1380 static void TestSuppressContractions(void) {
   1381 
   1382   static const char *testNoCont2[] = {
   1383       "\\u0410\\u0302a",
   1384       "\\u0410\\u0306b",
   1385       "\\u0410c"
   1386   };
   1387   static const char *testNoCont[] = {
   1388       "a\\u0410",
   1389       "A\\u0410\\u0306",
   1390       "\\uFF21\\u0410\\u0302"
   1391   };
   1392 
   1393   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
   1394   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
   1395 }
   1396 
   1397 static void TestContraction(void) {
   1398     const static char *testrules[] = {
   1399         "&A = AB / B",
   1400         "&A = A\\u0306/\\u0306",
   1401         "&c = ch / h"
   1402     };
   1403     const static UChar testdata[][2] = {
   1404         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
   1405         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
   1406         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
   1407     };
   1408     const static UChar testdata2[][2] = {
   1409         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
   1410         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
   1411         {0x0063 /* 'c' */, 0x006C /* 'l' */}
   1412     };
   1413 #if 0
   1414     /*
   1415      * These pairs of rule strings are not guaranteed to yield the very same mappings.
   1416      * In fact, LDML 24 recommends an improved way of creating mappings
   1417      * which always yields different mappings for such pairs. See
   1418      * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
   1419      */
   1420     const static char *testrules3[] = {
   1421         "&z < xyz &xyzw << B",
   1422         "&z < xyz &xyz << B / w",
   1423         "&z < ch &achm << B",
   1424         "&z < ch &a << B / chm",
   1425         "&\\ud800\\udc00w << B",
   1426         "&\\ud800\\udc00 << B / w",
   1427         "&a\\ud800\\udc00m << B",
   1428         "&a << B / \\ud800\\udc00m",
   1429     };
   1430 #endif
   1431 
   1432     UErrorCode  status   = U_ZERO_ERROR;
   1433     UCollator  *coll;
   1434     UChar       rule[256] = {0};
   1435     uint32_t    rlen     = 0;
   1436     int         i;
   1437 
   1438     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
   1439         UCollationElements *iter1;
   1440         int j = 0;
   1441         log_verbose("Rule %s for testing\n", testrules[i]);
   1442         rlen = u_unescape(testrules[i], rule, 32);
   1443         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1444         if (U_FAILURE(status)) {
   1445             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   1446             return;
   1447         }
   1448         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
   1449         if (U_FAILURE(status)) {
   1450             log_err("Collation iterator creation failed\n");
   1451             return;
   1452         }
   1453         while (j < 2) {
   1454             UCollationElements *iter2 = ucol_openElements(coll,
   1455                                                          &(testdata[i][j]),
   1456                                                          1, &status);
   1457             uint32_t ce;
   1458             if (U_FAILURE(status)) {
   1459                 log_err("Collation iterator creation failed\n");
   1460                 return;
   1461             }
   1462             ce = ucol_next(iter2, &status);
   1463             while (ce != UCOL_NULLORDER) {
   1464                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
   1465                     log_err("Collation elements in contraction split does not match\n");
   1466                     return;
   1467                 }
   1468                 ce = ucol_next(iter2, &status);
   1469             }
   1470             j ++;
   1471             ucol_closeElements(iter2);
   1472         }
   1473         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
   1474             log_err("Collation elements not exhausted\n");
   1475             return;
   1476         }
   1477         ucol_closeElements(iter1);
   1478         ucol_close(coll);
   1479     }
   1480 
   1481     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
   1482     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1483     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
   1484         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   1485                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
   1486                 testdata2[1][1]);
   1487         return;
   1488     }
   1489     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
   1490         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   1491                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
   1492                 testdata2[2][1]);
   1493         return;
   1494     }
   1495     ucol_close(coll);
   1496 #if 0  /* see above */
   1497     for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
   1498         log_verbose("testrules3 i==%d  \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
   1499         UCollator          *coll1,
   1500                            *coll2;
   1501         UCollationElements *iter1,
   1502                            *iter2;
   1503         UChar               ch = 0x0042 /* 'B' */;
   1504         uint32_t            ce;
   1505         rlen = u_unescape(testrules3[i], rule, 32);
   1506         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1507         rlen = u_unescape(testrules3[i + 1], rule, 32);
   1508         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1509         if (U_FAILURE(status)) {
   1510             log_err("Collator creation failed %s\n", testrules[i]);
   1511             return;
   1512         }
   1513         iter1 = ucol_openElements(coll1, &ch, 1, &status);
   1514         iter2 = ucol_openElements(coll2, &ch, 1, &status);
   1515         if (U_FAILURE(status)) {
   1516             log_err("Collation iterator creation failed\n");
   1517             return;
   1518         }
   1519         ce = ucol_next(iter1, &status);
   1520         if (U_FAILURE(status)) {
   1521             log_err("Retrieving ces failed\n");
   1522             return;
   1523         }
   1524         while (ce != UCOL_NULLORDER) {
   1525             uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
   1526             if (ce == ce2) {
   1527                 log_verbose("CEs match: %08x\n", ce);
   1528             } else {
   1529                 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
   1530                 return;
   1531             }
   1532             ce = ucol_next(iter1, &status);
   1533             if (U_FAILURE(status)) {
   1534                 log_err("Retrieving ces failed\n");
   1535                 return;
   1536             }
   1537         }
   1538         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
   1539             log_err("CEs not exhausted\n");
   1540             return;
   1541         }
   1542         ucol_closeElements(iter1);
   1543         ucol_closeElements(iter2);
   1544         ucol_close(coll1);
   1545         ucol_close(coll2);
   1546     }
   1547 #endif
   1548 }
   1549 
   1550 static void TestExpansion(void) {
   1551     const static char *testrules[] = {
   1552 #if 0
   1553         /*
   1554          * This seems to have tested that M was not mapped to an expansion.
   1555          * I believe the old builder just did that because it computed the extension CEs
   1556          * at the very end, which was a bug.
   1557          * Among other problems, it violated the core tailoring principle
   1558          * by making an earlier rule depend on a later one.
   1559          * And, of course, if M did not get an expansion, then it was primary different from K,
   1560          * unlike what the rule &K<<M says.
   1561          */
   1562         "&J << K / B & K << M",
   1563 #endif
   1564         "&J << K / B << M"
   1565     };
   1566     const static UChar testdata[][3] = {
   1567         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
   1568         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
   1569         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
   1570         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
   1571         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
   1572         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
   1573     };
   1574 
   1575     UErrorCode  status   = U_ZERO_ERROR;
   1576     UCollator  *coll;
   1577     UChar       rule[256] = {0};
   1578     uint32_t    rlen     = 0;
   1579     int         i;
   1580 
   1581     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
   1582         int j = 0;
   1583         log_verbose("Rule %s for testing\n", testrules[i]);
   1584         rlen = u_unescape(testrules[i], rule, 32);
   1585         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1586         if (U_FAILURE(status)) {
   1587             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   1588             return;
   1589         }
   1590 
   1591         for (j = 0; j < 5; j ++) {
   1592             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
   1593         }
   1594         ucol_close(coll);
   1595     }
   1596 }
   1597 
   1598 #if 0
   1599 /* this test tests the current limitations of the engine */
   1600 /* it always fail, so it is disabled by default */
   1601 static void TestLimitations(void) {
   1602   /* recursive expansions */
   1603   {
   1604     static const char *rule = "&a=b/c&d=c/e";
   1605     static const char *tlimit01[] = {"add","b","adf"};
   1606     static const char *tlimit02[] = {"aa","b","af"};
   1607     log_verbose("recursive expansions\n");
   1608     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
   1609     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
   1610   }
   1611   /* contractions spanning expansions */
   1612   {
   1613     static const char *rule = "&a<<<c/e&g<<<eh";
   1614     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
   1615     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
   1616     log_verbose("contractions spanning expansions\n");
   1617     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
   1618     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
   1619   }
   1620   /* normalization: nulls in contractions */
   1621   {
   1622     static const char *rule = "&a<<<\\u0000\\u0302";
   1623     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   1624     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   1625     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   1626     static const UColAttributeValue valOn[] = { UCOL_ON };
   1627     static const UColAttributeValue valOff[] = { UCOL_OFF };
   1628 
   1629     log_verbose("NULL in contractions\n");
   1630     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   1631     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   1632     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   1633     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   1634 
   1635   }
   1636   /* normalization: contractions spanning normalization */
   1637   {
   1638     static const char *rule = "&a<<<\\u0000\\u0302";
   1639     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   1640     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   1641     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   1642     static const UColAttributeValue valOn[] = { UCOL_ON };
   1643     static const UColAttributeValue valOff[] = { UCOL_OFF };
   1644 
   1645     log_verbose("contractions spanning normalization\n");
   1646     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   1647     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   1648     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   1649     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   1650 
   1651   }
   1652   /* variable top:  */
   1653   {
   1654     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
   1655     static const char *rule = "&\\u2010<x<[variable top]=z";
   1656     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
   1657     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
   1658     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
   1659     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
   1660     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
   1661     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
   1662     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
   1663 
   1664     log_verbose("variable top\n");
   1665     genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   1666     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   1667     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   1668     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
   1669     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
   1670 
   1671   }
   1672   /* case level */
   1673   {
   1674     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
   1675     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
   1676     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
   1677     static const UColAttribute att[] = { UCOL_CASE_FIRST};
   1678     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
   1679     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
   1680     log_verbose("case level\n");
   1681     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   1682     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   1683     /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
   1684     /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
   1685   }
   1686 
   1687 }
   1688 #endif
   1689 
   1690 static void TestBocsuCoverage(void) {
   1691   UErrorCode status = U_ZERO_ERROR;
   1692   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
   1693   UChar       test[256] = {0};
   1694   uint32_t    tlen     = u_unescape(testString, test, 32);
   1695   uint8_t key[256]     = {0};
   1696   uint32_t klen         = 0;
   1697 
   1698   UCollator *coll = ucol_open("", &status);
   1699   if(U_SUCCESS(status)) {
   1700   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
   1701 
   1702   klen = ucol_getSortKey(coll, test, tlen, key, 256);
   1703   (void)klen;    /* Suppress set but not used warning. */
   1704 
   1705   ucol_close(coll);
   1706   } else {
   1707     log_data_err("Couldn't open UCA\n");
   1708   }
   1709 }
   1710 
   1711 static void TestVariableTopSetting(void) {
   1712   UErrorCode status = U_ZERO_ERROR;
   1713   uint32_t varTopOriginal = 0, varTop1, varTop2;
   1714   UCollator *coll = ucol_open("", &status);
   1715   if(U_SUCCESS(status)) {
   1716 
   1717   static const UChar nul = 0;
   1718   static const UChar space = 0x20;
   1719   static const UChar dot = 0x2e;  /* punctuation */
   1720   static const UChar degree = 0xb0;  /* symbol */
   1721   static const UChar dollar = 0x24;  /* currency symbol */
   1722   static const UChar zero = 0x30;  /* digit */
   1723 
   1724   varTopOriginal = ucol_getVariableTop(coll, &status);
   1725   log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
   1726   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   1727 
   1728   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
   1729   varTop2 = ucol_getVariableTop(coll, &status);
   1730   log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
   1731   if(U_FAILURE(status) || varTop1 != varTop2 ||
   1732       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1733       ucol_equal(coll, &nul, 0, &dot, 1) ||
   1734       ucol_equal(coll, &nul, 0, &degree, 1) ||
   1735       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1736       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1737       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
   1738     log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
   1739   }
   1740 
   1741   varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
   1742   varTop2 = ucol_getVariableTop(coll, &status);
   1743   log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
   1744   if(U_FAILURE(status) || varTop1 != varTop2 ||
   1745       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1746       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1747       ucol_equal(coll, &nul, 0, &degree, 1) ||
   1748       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1749       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1750       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
   1751     log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
   1752   }
   1753 
   1754   varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
   1755   varTop2 = ucol_getVariableTop(coll, &status);
   1756   log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
   1757   if(U_FAILURE(status) || varTop1 != varTop2 ||
   1758       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1759       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1760       !ucol_equal(coll, &nul, 0, &degree, 1) ||
   1761       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1762       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1763       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
   1764     log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
   1765   }
   1766 
   1767   varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
   1768   varTop2 = ucol_getVariableTop(coll, &status);
   1769   log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
   1770   if(U_FAILURE(status) || varTop1 != varTop2 ||
   1771       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1772       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1773       !ucol_equal(coll, &nul, 0, &degree, 1) ||
   1774       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1775       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1776       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
   1777     log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
   1778   }
   1779 
   1780   log_verbose("Testing setting variable top to contractions\n");
   1781   {
   1782     UChar first[4] = { 0 };
   1783     first[0] = 0x0040;
   1784     first[1] = 0x0050;
   1785     first[2] = 0x0000;
   1786 
   1787     status = U_ZERO_ERROR;
   1788     ucol_setVariableTop(coll, first, -1, &status);
   1789 
   1790     if(U_SUCCESS(status)) {
   1791       log_err("Invalid contraction succeded in setting variable top!\n");
   1792     }
   1793 
   1794   }
   1795 
   1796   log_verbose("Test restoring variable top\n");
   1797 
   1798   status = U_ZERO_ERROR;
   1799   ucol_restoreVariableTop(coll, varTopOriginal, &status);
   1800   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
   1801     log_err("Couldn't restore old variable top\n");
   1802   }
   1803 
   1804   log_verbose("Testing calling with error set\n");
   1805 
   1806   status = U_INTERNAL_PROGRAM_ERROR;
   1807   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
   1808   varTop2 = ucol_getVariableTop(coll, &status);
   1809   ucol_restoreVariableTop(coll, varTop2, &status);
   1810   varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
   1811   varTop2 = ucol_getVariableTop(NULL, &status);
   1812   ucol_restoreVariableTop(NULL, varTop2, &status);
   1813   if(status != U_INTERNAL_PROGRAM_ERROR) {
   1814     log_err("Bad reaction to passed error!\n");
   1815   }
   1816   ucol_close(coll);
   1817   } else {
   1818     log_data_err("Couldn't open UCA collator\n");
   1819   }
   1820 }
   1821 
   1822 static void TestMaxVariable() {
   1823   UErrorCode status = U_ZERO_ERROR;
   1824   UColReorderCode oldMax, max;
   1825   UCollator *coll;
   1826 
   1827   static const UChar nul = 0;
   1828   static const UChar space = 0x20;
   1829   static const UChar dot = 0x2e;  /* punctuation */
   1830   static const UChar degree = 0xb0;  /* symbol */
   1831   static const UChar dollar = 0x24;  /* currency symbol */
   1832   static const UChar zero = 0x30;  /* digit */
   1833 
   1834   coll = ucol_open("", &status);
   1835   if(U_FAILURE(status)) {
   1836     log_data_err("Couldn't open root collator\n");
   1837     return;
   1838   }
   1839 
   1840   oldMax = ucol_getMaxVariable(coll);
   1841   log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
   1842   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   1843 
   1844   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
   1845   max = ucol_getMaxVariable(coll);
   1846   log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
   1847   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
   1848       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1849       ucol_equal(coll, &nul, 0, &dot, 1) ||
   1850       ucol_equal(coll, &nul, 0, &degree, 1) ||
   1851       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1852       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1853       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
   1854     log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
   1855   }
   1856 
   1857   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
   1858   max = ucol_getMaxVariable(coll);
   1859   log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
   1860   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
   1861       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1862       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1863       ucol_equal(coll, &nul, 0, &degree, 1) ||
   1864       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1865       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1866       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
   1867     log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
   1868   }
   1869 
   1870   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
   1871   max = ucol_getMaxVariable(coll);
   1872   log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
   1873   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
   1874       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1875       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1876       !ucol_equal(coll, &nul, 0, &degree, 1) ||
   1877       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1878       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1879       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
   1880     log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
   1881   }
   1882 
   1883   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
   1884   max = ucol_getMaxVariable(coll);
   1885   log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
   1886   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
   1887       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1888       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1889       !ucol_equal(coll, &nul, 0, &degree, 1) ||
   1890       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1891       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1892       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
   1893     log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
   1894   }
   1895 
   1896   log_verbose("Test restoring maxVariable\n");
   1897   status = U_ZERO_ERROR;
   1898   ucol_setMaxVariable(coll, oldMax, &status);
   1899   if(oldMax != ucol_getMaxVariable(coll)) {
   1900     log_err("Couldn't restore old maxVariable\n");
   1901   }
   1902 
   1903   log_verbose("Testing calling with error set\n");
   1904   status = U_INTERNAL_PROGRAM_ERROR;
   1905   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
   1906   max = ucol_getMaxVariable(coll);
   1907   if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
   1908     log_err("Bad reaction to passed error!\n");
   1909   }
   1910   ucol_close(coll);
   1911 }
   1912 
   1913 static void TestNonChars(void) {
   1914   static const char *test[] = {
   1915       "\\u0000",  /* ignorable */
   1916       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
   1917       "\\uFDD0", "\\uFDEF",
   1918       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
   1919       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
   1920       "\\U0003FFFE", "\\U0003FFFF",
   1921       "\\U0004FFFE", "\\U0004FFFF",
   1922       "\\U0005FFFE", "\\U0005FFFF",
   1923       "\\U0006FFFE", "\\U0006FFFF",
   1924       "\\U0007FFFE", "\\U0007FFFF",
   1925       "\\U0008FFFE", "\\U0008FFFF",
   1926       "\\U0009FFFE", "\\U0009FFFF",
   1927       "\\U000AFFFE", "\\U000AFFFF",
   1928       "\\U000BFFFE", "\\U000BFFFF",
   1929       "\\U000CFFFE", "\\U000CFFFF",
   1930       "\\U000DFFFE", "\\U000DFFFF",
   1931       "\\U000EFFFE", "\\U000EFFFF",
   1932       "\\U000FFFFE", "\\U000FFFFF",
   1933       "\\U0010FFFE", "\\U0010FFFF",
   1934       "\\uFFFF"  /* special character with maximum primary weight */
   1935   };
   1936   UErrorCode status = U_ZERO_ERROR;
   1937   UCollator *coll = ucol_open("en_US", &status);
   1938 
   1939   log_verbose("Test non characters\n");
   1940 
   1941   if(U_SUCCESS(status)) {
   1942     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
   1943   } else {
   1944     log_err_status(status, "Unable to open collator\n");
   1945   }
   1946 
   1947   ucol_close(coll);
   1948 }
   1949 
   1950 static void TestExtremeCompression(void) {
   1951   static char *test[4];
   1952   int32_t j = 0, i = 0;
   1953 
   1954   for(i = 0; i<4; i++) {
   1955     test[i] = (char *)malloc(2048*sizeof(char));
   1956   }
   1957 
   1958   for(j = 20; j < 500; j++) {
   1959     for(i = 0; i<4; i++) {
   1960       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   1961       test[i][j-1] = (char)('a'+i);
   1962       test[i][j] = 0;
   1963     }
   1964     genericLocaleStarter("en_US", (const char **)test, 4);
   1965   }
   1966 
   1967 
   1968   for(i = 0; i<4; i++) {
   1969     free(test[i]);
   1970   }
   1971 }
   1972 
   1973 #if 0
   1974 static void TestExtremeCompression(void) {
   1975   static char *test[4];
   1976   int32_t j = 0, i = 0;
   1977   UErrorCode status = U_ZERO_ERROR;
   1978   UCollator *coll = ucol_open("en_US", status);
   1979   for(i = 0; i<4; i++) {
   1980     test[i] = (char *)malloc(2048*sizeof(char));
   1981   }
   1982   for(j = 10; j < 2048; j++) {
   1983     for(i = 0; i<4; i++) {
   1984       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
   1985       test[i][j-1] = (char)('a'+i);
   1986       test[i][j] = 0;
   1987     }
   1988   }
   1989   genericLocaleStarter("en_US", (const char **)test, 4);
   1990 
   1991   for(j = 10; j < 2048; j++) {
   1992     for(i = 0; i<1; i++) {
   1993       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   1994       test[i][j] = 0;
   1995     }
   1996   }
   1997   for(i = 0; i<4; i++) {
   1998     free(test[i]);
   1999   }
   2000 }
   2001 #endif
   2002 
   2003 static void TestSurrogates(void) {
   2004   static const char *test[] = {
   2005     "z","\\ud900\\udc25",  "\\ud805\\udc50",
   2006        "\\ud800\\udc00y",  "\\ud800\\udc00r",
   2007        "\\ud800\\udc00f",  "\\ud800\\udc00",
   2008        "\\ud800\\udc00c", "\\ud800\\udc00b",
   2009        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
   2010        "\\ud800\\udc00a",
   2011        "c", "b"
   2012   };
   2013 
   2014   static const char *rule =
   2015     "&z < \\ud900\\udc25   < \\ud805\\udc50"
   2016        "< \\ud800\\udc00y  < \\ud800\\udc00r"
   2017        "< \\ud800\\udc00f  << \\ud800\\udc00"
   2018        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
   2019        "< \\ud800\\udc00a  < c < b" ;
   2020 
   2021   genericRulesStarter(rule, test, 14);
   2022 }
   2023 
   2024 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
   2025 static void TestPrefix(void) {
   2026   uint32_t i;
   2027 
   2028   static const struct {
   2029     const char *rules;
   2030     const char *data[50];
   2031     const uint32_t len;
   2032   } tests[] = {
   2033     { "&z <<< z|a",
   2034       {"zz", "za"}, 2 },
   2035 
   2036     { "&z <<< z|   a",
   2037       {"zz", "za"}, 2 },
   2038     { "[strength I]"
   2039       "&a=\\ud900\\udc25"
   2040       "&z<<<\\ud900\\udc25|a",
   2041       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
   2042   };
   2043 
   2044 
   2045   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   2046     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   2047   }
   2048 }
   2049 
   2050 /* This test uses data suplied by Masashiko Maedera to test the implementation */
   2051 /* JIS X 4061 collation order implementation                                   */
   2052 static void TestNewJapanese(void) {
   2053 
   2054   static const char * const test1[] = {
   2055       "\\u30b7\\u30e3\\u30fc\\u30ec",
   2056       "\\u30b7\\u30e3\\u30a4",
   2057       "\\u30b7\\u30e4\\u30a3",
   2058       "\\u30b7\\u30e3\\u30ec",
   2059       "\\u3061\\u3087\\u3053",
   2060       "\\u3061\\u3088\\u3053",
   2061       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
   2062       "\\u3066\\u30fc\\u305f",
   2063       "\\u30c6\\u30fc\\u30bf",
   2064       "\\u30c6\\u30a7\\u30bf",
   2065       "\\u3066\\u3048\\u305f",
   2066       "\\u3067\\u30fc\\u305f",
   2067       "\\u30c7\\u30fc\\u30bf",
   2068       "\\u30c7\\u30a7\\u30bf",
   2069       "\\u3067\\u3048\\u305f",
   2070       "\\u3066\\u30fc\\u305f\\u30fc",
   2071       "\\u30c6\\u30fc\\u30bf\\u30a1",
   2072       "\\u30c6\\u30a7\\u30bf\\u30fc",
   2073       "\\u3066\\u3047\\u305f\\u3041",
   2074       "\\u3066\\u3048\\u305f\\u30fc",
   2075       "\\u3067\\u30fc\\u305f\\u30fc",
   2076       "\\u30c7\\u30fc\\u30bf\\u30a1",
   2077       "\\u3067\\u30a7\\u305f\\u30a1",
   2078       "\\u30c7\\u3047\\u30bf\\u3041",
   2079       "\\u30c7\\u30a8\\u30bf\\u30a2",
   2080       "\\u3072\\u3086",
   2081       "\\u3073\\u3085\\u3042",
   2082       "\\u3074\\u3085\\u3042",
   2083       "\\u3073\\u3085\\u3042\\u30fc",
   2084       "\\u30d3\\u30e5\\u30a2\\u30fc",
   2085       "\\u3074\\u3085\\u3042\\u30fc",
   2086       "\\u30d4\\u30e5\\u30a2\\u30fc",
   2087       "\\u30d2\\u30e5\\u30a6",
   2088       "\\u30d2\\u30e6\\u30a6",
   2089       "\\u30d4\\u30e5\\u30a6\\u30a2",
   2090       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
   2091       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
   2092       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
   2093       "\\u3072\\u3085\\u3093",
   2094       "\\u3074\\u3085\\u3093",
   2095       "\\u3075\\u30fc\\u308a",
   2096       "\\u30d5\\u30fc\\u30ea",
   2097       "\\u3075\\u3045\\u308a",
   2098       "\\u3075\\u30a5\\u308a",
   2099       "\\u3075\\u30a5\\u30ea",
   2100       "\\u30d5\\u30a6\\u30ea",
   2101       "\\u3076\\u30fc\\u308a",
   2102       "\\u30d6\\u30fc\\u30ea",
   2103       "\\u3076\\u3045\\u308a",
   2104       "\\u30d6\\u30a5\\u308a",
   2105       "\\u3077\\u3046\\u308a",
   2106       "\\u30d7\\u30a6\\u30ea",
   2107       "\\u3075\\u30fc\\u308a\\u30fc",
   2108       "\\u30d5\\u30a5\\u30ea\\u30fc",
   2109       "\\u3075\\u30a5\\u308a\\u30a3",
   2110       "\\u30d5\\u3045\\u308a\\u3043",
   2111       "\\u30d5\\u30a6\\u30ea\\u30fc",
   2112       "\\u3075\\u3046\\u308a\\u3043",
   2113       "\\u30d6\\u30a6\\u30ea\\u30a4",
   2114       "\\u3077\\u30fc\\u308a\\u30fc",
   2115       "\\u3077\\u30a5\\u308a\\u30a4",
   2116       "\\u3077\\u3046\\u308a\\u30fc",
   2117       "\\u30d7\\u30a6\\u30ea\\u30a4",
   2118       "\\u30d5\\u30fd",
   2119       "\\u3075\\u309e",
   2120       "\\u3076\\u309d",
   2121       "\\u3076\\u3075",
   2122       "\\u3076\\u30d5",
   2123       "\\u30d6\\u3075",
   2124       "\\u30d6\\u30d5",
   2125       "\\u3076\\u309e",
   2126       "\\u3076\\u3077",
   2127       "\\u30d6\\u3077",
   2128       "\\u3077\\u309d",
   2129       "\\u30d7\\u30fd",
   2130       "\\u3077\\u3075",
   2131 };
   2132 
   2133   static const char *test2[] = {
   2134     "\\u306f\\u309d", /* H\\u309d */
   2135     "\\u30cf\\u30fd", /* K\\u30fd */
   2136     "\\u306f\\u306f", /* HH */
   2137     "\\u306f\\u30cf", /* HK */
   2138     "\\u30cf\\u30cf", /* KK */
   2139     "\\u306f\\u309e", /* H\\u309e */
   2140     "\\u30cf\\u30fe", /* K\\u30fe */
   2141     "\\u306f\\u3070", /* HH\\u309b */
   2142     "\\u30cf\\u30d0", /* KK\\u309b */
   2143     "\\u306f\\u3071", /* HH\\u309c */
   2144     "\\u30cf\\u3071", /* KH\\u309c */
   2145     "\\u30cf\\u30d1", /* KK\\u309c */
   2146     "\\u3070\\u309d", /* H\\u309b\\u309d */
   2147     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
   2148     "\\u3070\\u306f", /* H\\u309bH */
   2149     "\\u30d0\\u30cf", /* K\\u309bK */
   2150     "\\u3070\\u309e", /* H\\u309b\\u309e */
   2151     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
   2152     "\\u3070\\u3070", /* H\\u309bH\\u309b */
   2153     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
   2154     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
   2155     "\\u3070\\u3071", /* H\\u309bH\\u309c */
   2156     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
   2157     "\\u3071\\u309d", /* H\\u309c\\u309d */
   2158     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
   2159     "\\u3071\\u306f", /* H\\u309cH */
   2160     "\\u30d1\\u30cf", /* K\\u309cK */
   2161     "\\u3071\\u3070", /* H\\u309cH\\u309b */
   2162     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
   2163     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
   2164     "\\u3071\\u3071", /* H\\u309cH\\u309c */
   2165     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
   2166   };
   2167   /*
   2168   static const char *test3[] = {
   2169     "\\u221er\\u221e",
   2170     "\\u221eR#",
   2171     "\\u221et\\u221e",
   2172     "#r\\u221e",
   2173     "#R#",
   2174     "#t%",
   2175     "#T%",
   2176     "8t\\u221e",
   2177     "8T\\u221e",
   2178     "8t#",
   2179     "8T#",
   2180     "8t%",
   2181     "8T%",
   2182     "8t8",
   2183     "8T8",
   2184     "\\u03c9r\\u221e",
   2185     "\\u03a9R%",
   2186     "rr\\u221e",
   2187     "rR\\u221e",
   2188     "Rr\\u221e",
   2189     "RR\\u221e",
   2190     "RT%",
   2191     "rt8",
   2192     "tr\\u221e",
   2193     "tr8",
   2194     "TR8",
   2195     "tt8",
   2196     "\\u30b7\\u30e3\\u30fc\\u30ec",
   2197   };
   2198   */
   2199   static const UColAttribute att[] = { UCOL_STRENGTH };
   2200   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
   2201 
   2202   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
   2203   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
   2204 
   2205   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
   2206   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
   2207   /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
   2208   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
   2209   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
   2210 }
   2211 
   2212 static void TestStrCollIdenticalPrefix(void) {
   2213   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
   2214   const char* test[] = {
   2215     "ab\\ud9b0\\udc70",
   2216     "ab\\ud9b0\\udc71"
   2217   };
   2218   genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
   2219 }
   2220 /* Contractions should have all their canonically equivalent */
   2221 /* strings included */
   2222 static void TestContractionClosure(void) {
   2223   static const struct {
   2224     const char *rules;
   2225     const char *data[10];
   2226     const uint32_t len;
   2227   } tests[] = {
   2228     {   "&b=\\u00e4\\u00e4",
   2229       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
   2230     {   "&b=\\u00C5",
   2231       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
   2232   };
   2233   uint32_t i;
   2234 
   2235 
   2236   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   2237     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
   2238   }
   2239 }
   2240 
   2241 /* This tests also fails*/
   2242 static void TestBeforePrefixFailure(void) {
   2243   static const struct {
   2244     const char *rules;
   2245     const char *data[10];
   2246     const uint32_t len;
   2247   } tests[] = {
   2248     { "&g <<< a"
   2249       "&[before 3]\\uff41 <<< x",
   2250       {"x", "\\uff41"}, 2 },
   2251     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   2252         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   2253         "&[before 3]\\u30a7<<<\\u30a9",
   2254       {"\\u30a9", "\\u30a7"}, 2 },
   2255     {   "&[before 3]\\u30a7<<<\\u30a9"
   2256         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   2257         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
   2258       {"\\u30a9", "\\u30a7"}, 2 },
   2259   };
   2260   uint32_t i;
   2261 
   2262 
   2263   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   2264     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   2265   }
   2266 
   2267 #if 0
   2268   const char* rule1 =
   2269         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   2270         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   2271         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
   2272   const char* rule2 =
   2273         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
   2274         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   2275         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
   2276   const char* test[] = {
   2277       "\\u30c6\\u30fc\\u30bf",
   2278       "\\u30c6\\u30a7\\u30bf",
   2279   };
   2280   genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
   2281   genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
   2282 /* this piece of code should be in some sort of verbose mode     */
   2283 /* it gets the collation elements for elements and prints them   */
   2284 /* This is useful when trying to see whether the problem is      */
   2285   {
   2286     UErrorCode status = U_ZERO_ERROR;
   2287     uint32_t i = 0;
   2288     UCollationElements *it = NULL;
   2289     uint32_t CE;
   2290     UChar string[256];
   2291     uint32_t uStringLen;
   2292     UCollator *coll = NULL;
   2293 
   2294     uStringLen = u_unescape(rule1, string, 256);
   2295 
   2296     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2297 
   2298     /*coll = ucol_open("ja_JP_JIS", &status);*/
   2299     it = ucol_openElements(coll, string, 0, &status);
   2300 
   2301     for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
   2302       log_verbose("%s\n", test[i]);
   2303       uStringLen = u_unescape(test[i], string, 256);
   2304       ucol_setText(it, string, uStringLen, &status);
   2305 
   2306       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
   2307         log_verbose("%08X\n", CE);
   2308       }
   2309       log_verbose("\n");
   2310 
   2311     }
   2312 
   2313     ucol_closeElements(it);
   2314     ucol_close(coll);
   2315   }
   2316 #endif
   2317 }
   2318 
   2319 static void TestPrefixCompose(void) {
   2320   const char* rule1 =
   2321         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
   2322   /*
   2323   const char* test[] = {
   2324       "\\u30c6\\u30fc\\u30bf",
   2325       "\\u30c6\\u30a7\\u30bf",
   2326   };
   2327   */
   2328   {
   2329     UErrorCode status = U_ZERO_ERROR;
   2330     /*uint32_t i = 0;*/
   2331     /*UCollationElements *it = NULL;*/
   2332 /*    uint32_t CE;*/
   2333     UChar string[256];
   2334     uint32_t uStringLen;
   2335     UCollator *coll = NULL;
   2336 
   2337     uStringLen = u_unescape(rule1, string, 256);
   2338 
   2339     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2340     ucol_close(coll);
   2341   }
   2342 
   2343 
   2344 }
   2345 
   2346 /*
   2347 [last variable] last variable value
   2348 [last primary ignorable] largest CE for primary ignorable
   2349 [last secondary ignorable] largest CE for secondary ignorable
   2350 [last tertiary ignorable] largest CE for tertiary ignorable
   2351 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
   2352 */
   2353 
   2354 static void TestRuleOptions(void) {
   2355   /* values here are hardcoded and are correct for the current UCA
   2356    * when the UCA changes, one might be forced to change these
   2357    * values.
   2358    */
   2359 
   2360   /*
   2361    * These strings contain the last character before [variable top]
   2362    * and the first and second characters (by primary weights) after it.
   2363    * See FractionalUCA.txt. For example:
   2364       [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
   2365       [variable top = 0C FE]
   2366       [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
   2367      and
   2368       00B4; [0D 0C, 05, 05]
   2369    *
   2370    * Note: Starting with UCA 6.0, the [variable top] collation element
   2371    * is not the weight of any character or string,
   2372    * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
   2373    */
   2374 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
   2375 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
   2376 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
   2377 
   2378   /*
   2379    * This string has to match the character that has the [last regular] weight
   2380    * which changes with each UCA version.
   2381    * See the bottom of FractionalUCA.txt which says something like
   2382       [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
   2383    *
   2384    * Note: Starting with UCA 6.0, the [last regular] collation element
   2385    * is not the weight of any character or string,
   2386    * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
   2387    */
   2388 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
   2389 
   2390   static const struct {
   2391     const char *rules;
   2392     const char *data[10];
   2393     const uint32_t len;
   2394   } tests[] = {
   2395 #if 0
   2396     /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
   2397     /* - all befores here amount to zero */
   2398     { "&[before 3][first tertiary ignorable]<<<a",
   2399         { "\\u0000", "a"}, 2
   2400     }, /* you cannot go before first tertiary ignorable */
   2401 
   2402     { "&[before 3][last tertiary ignorable]<<<a",
   2403         { "\\u0000", "a"}, 2
   2404     }, /* you cannot go before last tertiary ignorable */
   2405 #endif
   2406     /*
   2407      * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
   2408      * and it *is* possible to "go before" that.
   2409      */
   2410     { "&[before 3][first secondary ignorable]<<<a",
   2411         { "\\u0000", "a"}, 2
   2412     },
   2413 
   2414     { "&[before 3][last secondary ignorable]<<<a",
   2415         { "\\u0000", "a"}, 2
   2416     },
   2417 
   2418     /* 'normal' befores */
   2419 
   2420     /*
   2421      * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
   2422      * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
   2423      * because there is no tailoring space before that boundary.
   2424      * Made the tests work by tailoring to a space instead.
   2425      */
   2426     { "&[before 3][first primary ignorable]<<<c<<<b &' '<a",  /* was &[first primary ignorable]<a */
   2427         {  "c", "b", "\\u0332", "a" }, 4
   2428     },
   2429 
   2430     /* we don't have a code point that corresponds to
   2431      * the last primary ignorable
   2432      */
   2433     { "&[before 3][last primary ignorable]<<<c<<<b &' '<a",  /* was &[last primary ignorable]<a */
   2434         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
   2435     },
   2436 
   2437     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
   2438         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
   2439     },
   2440 
   2441     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
   2442         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
   2443     },
   2444 
   2445     { "&[first regular]<a"
   2446       "&[before 1][first regular]<b",
   2447       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
   2448     },
   2449 
   2450     { "&[before 1][last regular]<b"
   2451       "&[last regular]<a",
   2452         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
   2453     },
   2454 
   2455     { "&[before 1][first implicit]<b"
   2456       "&[first implicit]<a",
   2457         { "b", "\\u4e00", "a", "\\u4e01"}, 4
   2458     },
   2459 #if 0  /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
   2460     { "&[before 1][last implicit]<b"
   2461       "&[last implicit]<a",
   2462         { "b", "\\U0010FFFD", "a" }, 3
   2463     },
   2464 #endif
   2465     { "&[last variable]<z"
   2466       "&' '<x"  /* was &[last primary ignorable]<x, see above */
   2467       "&[last secondary ignorable]<<y"
   2468       "&[last tertiary ignorable]<<<w"
   2469       "&[top]<u",
   2470       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
   2471     }
   2472 
   2473   };
   2474   uint32_t i;
   2475 
   2476   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   2477     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   2478   }
   2479 }
   2480 
   2481 
   2482 static void TestOptimize(void) {
   2483   /* this is not really a test - just trying out
   2484    * whether copying of UCA contents will fail
   2485    * Cannot really test, since the functionality
   2486    * remains the same.
   2487    */
   2488   static const struct {
   2489     const char *rules;
   2490     const char *data[10];
   2491     const uint32_t len;
   2492   } tests[] = {
   2493     /* - all befores here amount to zero */
   2494     { "[optimize [\\uAC00-\\uD7FF]]",
   2495     { "a", "b"}, 2}
   2496   };
   2497   uint32_t i;
   2498 
   2499   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   2500     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   2501   }
   2502 }
   2503 
   2504 /*
   2505 cycheng (at) ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
   2506 weiv    ucol_strcollIter?
   2507 cycheng (at) ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
   2508 weiv    these are the input strings?
   2509 cycheng (at) ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
   2510 weiv    will check - could be a problem with utf-8 iterator
   2511 cycheng (at) ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
   2512 weiv    hmmm
   2513 cycheng (at) ca.ibm.c... note that we have a standalone high surrogate
   2514 weiv    that doesn't sound right
   2515 cycheng (at) ca.ibm.c... we got the same inconsistent results on AIX and Win2000
   2516 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
   2517 cycheng (at) ca.ibm.c... yes
   2518 weiv    and then do the comparison
   2519 cycheng (at) ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
   2520 weiv    utf-16 strings look like a little endian ones in the example you sent me
   2521 weiv    It could be a bug - let me try to test it out
   2522 cycheng (at) ca.ibm.c... ok
   2523 cycheng (at) ca.ibm.c... we can wait till the conf. call
   2524 cycheng (at) ca.ibm.c... next weke
   2525 weiv    that would be great
   2526 weiv    hmmm
   2527 weiv    I might be wrong
   2528 weiv    let me play with it some more
   2529 cycheng (at) ca.ibm.c... ok
   2530 cycheng (at) ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
   2531 cycheng (at) ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
   2532 cycheng (at) ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
   2533 weiv    ok
   2534 cycheng (at) ca.ibm.c... i ask sherman to send you more inconsistent data
   2535 weiv    thanks
   2536 cycheng (at) ca.ibm.c... the 4 strings we sent are just samples
   2537 */
   2538 #if 0
   2539 static void Alexis(void) {
   2540   UErrorCode status = U_ZERO_ERROR;
   2541   UCollator *coll = ucol_open("", &status);
   2542 
   2543 
   2544   const char utf16be[2][4] = {
   2545     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
   2546     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
   2547   };
   2548 
   2549   const char utf8[2][4] = {
   2550     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
   2551     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
   2552   };
   2553 
   2554   UCharIterator iterU161, iterU162;
   2555   UCharIterator iterU81, iterU82;
   2556 
   2557   UCollationResult resU16, resU8;
   2558 
   2559   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
   2560   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
   2561 
   2562   uiter_setUTF8(&iterU81, utf8[0], 4);
   2563   uiter_setUTF8(&iterU82, utf8[1], 4);
   2564 
   2565   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   2566 
   2567   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
   2568   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
   2569 
   2570 
   2571   if(resU16 != resU8) {
   2572     log_err("different results\n");
   2573   }
   2574 
   2575   ucol_close(coll);
   2576 }
   2577 #endif
   2578 
   2579 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
   2580 static void Alexis2(void) {
   2581   UErrorCode status = U_ZERO_ERROR;
   2582   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   2583   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   2584   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   2585   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
   2586 
   2587   UConverter *conv = NULL;
   2588 
   2589   UCharIterator U16BEItS, U16BEItT;
   2590   UCharIterator U8ItS, U8ItT;
   2591 
   2592   UCollationResult resU16, resU16BE, resU8;
   2593 
   2594   static const char* const pairs[][2] = {
   2595     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
   2596     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
   2597     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
   2598     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
   2599     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
   2600     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
   2601     { "\\u0020", "\\u0020\\u0000"}
   2602 /*
   2603 5F20 (my result here)
   2604 5F204E008E3F
   2605 5F20 (your result here)
   2606 */
   2607   };
   2608 
   2609   int32_t i = 0;
   2610 
   2611   UCollator *coll = ucol_open("", &status);
   2612   if(status == U_FILE_ACCESS_ERROR) {
   2613     log_data_err("Is your data around?\n");
   2614     return;
   2615   } else if(U_FAILURE(status)) {
   2616     log_err("Error opening collator\n");
   2617     return;
   2618   }
   2619   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   2620   conv = ucnv_open("UTF16BE", &status);
   2621   for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
   2622     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   2623     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   2624 
   2625     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
   2626 
   2627     log_verbose("Result of strcoll is %i\n", resU16);
   2628 
   2629     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
   2630     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
   2631     (void)U16BELenS;    /* Suppress set but not used warnings. */
   2632     (void)U16BELenT;
   2633 
   2634     /* use the original sizes, as the result from converter is in bytes */
   2635     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
   2636     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
   2637 
   2638     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
   2639 
   2640     log_verbose("Result of U16BE is %i\n", resU16BE);
   2641 
   2642     if(resU16 != resU16BE) {
   2643       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
   2644     }
   2645 
   2646     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
   2647     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
   2648 
   2649     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
   2650     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
   2651 
   2652     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
   2653 
   2654     if(resU16 != resU8) {
   2655       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
   2656     }
   2657 
   2658   }
   2659 
   2660   ucol_close(coll);
   2661   ucnv_close(conv);
   2662 }
   2663 
   2664 static void TestHebrewUCA(void) {
   2665   UErrorCode status = U_ZERO_ERROR;
   2666   static const char *first[] = {
   2667     "d790d6b8d79cd795d6bcd7a9",
   2668     "d790d79cd79ed7a7d799d799d7a1",
   2669     "d790d6b4d79ed795d6bcd7a9",
   2670   };
   2671 
   2672   char utf8String[3][256];
   2673   UChar utf16String[3][256];
   2674 
   2675   int32_t i = 0, j = 0;
   2676   int32_t sizeUTF8[3];
   2677   int32_t sizeUTF16[3];
   2678 
   2679   UCollator *coll = ucol_open("", &status);
   2680   if (U_FAILURE(status)) {
   2681       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
   2682       return;
   2683   }
   2684   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
   2685 
   2686   for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
   2687     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
   2688     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
   2689     log_verbose("%i: ");
   2690     for(j = 0; j < sizeUTF16[i]; j++) {
   2691       /*log_verbose("\\u%04X", utf16String[i][j]);*/
   2692       log_verbose("%04X", utf16String[i][j]);
   2693     }
   2694     log_verbose("\n");
   2695   }
   2696   for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
   2697     for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
   2698       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
   2699     }
   2700   }
   2701 
   2702   ucol_close(coll);
   2703 
   2704 }
   2705 
   2706 static void TestPartialSortKeyTermination(void) {
   2707   static const char* cases[] = {
   2708     "\\u1234\\u1234\\udc00",
   2709     "\\udc00\\ud800\\ud800"
   2710   };
   2711 
   2712   int32_t i;
   2713 
   2714   UErrorCode status = U_ZERO_ERROR;
   2715 
   2716   UCollator *coll = ucol_open("", &status);
   2717 
   2718   UCharIterator iter;
   2719 
   2720   UChar currCase[256];
   2721   int32_t length = 0;
   2722   int32_t pKeyLen = 0;
   2723 
   2724   uint8_t key[256];
   2725 
   2726   for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
   2727     uint32_t state[2] = {0, 0};
   2728     length = u_unescape(cases[i], currCase, 256);
   2729     uiter_setString(&iter, currCase, length);
   2730     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
   2731     (void)pKeyLen;   /* Suppress set but not used warning. */
   2732 
   2733     log_verbose("Done\n");
   2734 
   2735   }
   2736   ucol_close(coll);
   2737 }
   2738 
   2739 static void TestSettings(void) {
   2740   static const char* cases[] = {
   2741     "apple",
   2742       "Apple"
   2743   };
   2744 
   2745   static const char* locales[] = {
   2746     "",
   2747       "en"
   2748   };
   2749 
   2750   UErrorCode status = U_ZERO_ERROR;
   2751 
   2752   int32_t i = 0, j = 0;
   2753 
   2754   UChar source[256], target[256];
   2755   int32_t sLen = 0, tLen = 0;
   2756 
   2757   UCollator *collateObject = NULL;
   2758   for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
   2759     collateObject = ucol_open(locales[i], &status);
   2760     ucol_setStrength(collateObject, UCOL_PRIMARY);
   2761     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
   2762     for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
   2763       sLen = u_unescape(cases[j-1], source, 256);
   2764       source[sLen] = 0;
   2765       tLen = u_unescape(cases[j], target, 256);
   2766       source[tLen] = 0;
   2767       doTest(collateObject, source, target, UCOL_EQUAL);
   2768     }
   2769     ucol_close(collateObject);
   2770   }
   2771 }
   2772 
   2773 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
   2774     UErrorCode status = U_ZERO_ERROR;
   2775     int32_t errorNo = 0;
   2776     const UChar *sourceRules = NULL;
   2777     int32_t sourceRulesLen = 0;
   2778     UParseError parseError;
   2779     UColAttributeValue french = UCOL_OFF;
   2780 
   2781     if(!ucol_equals(source, target)) {
   2782         log_err("Same collators, different address not equal\n");
   2783         errorNo++;
   2784     }
   2785     ucol_close(target);
   2786     if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
   2787         target = ucol_safeClone(source, NULL, NULL, &status);
   2788         if(U_FAILURE(status)) {
   2789             log_err("Error creating clone\n");
   2790             errorNo++;
   2791             return errorNo;
   2792         }
   2793         if(!ucol_equals(source, target)) {
   2794             log_err("Collator different from it's clone\n");
   2795             errorNo++;
   2796         }
   2797         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
   2798         if(french == UCOL_ON) {
   2799             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
   2800         } else {
   2801             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
   2802         }
   2803         if(U_FAILURE(status)) {
   2804             log_err("Error setting attributes\n");
   2805             errorNo++;
   2806             return errorNo;
   2807         }
   2808         if(ucol_equals(source, target)) {
   2809             log_err("Collators same even when options changed\n");
   2810             errorNo++;
   2811         }
   2812         ucol_close(target);
   2813 
   2814         sourceRules = ucol_getRules(source, &sourceRulesLen);
   2815         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   2816         if(U_FAILURE(status)) {
   2817             log_err("Error instantiating target from rules - %s\n", u_errorName(status));
   2818             errorNo++;
   2819             return errorNo;
   2820         }
   2821         /* Note: The tailoring rule string is an optional data item. */
   2822         if(!ucol_equals(source, target) && sourceRulesLen != 0) {
   2823             log_err("Collator different from collator that was created from the same rules\n");
   2824             errorNo++;
   2825         }
   2826         ucol_close(target);
   2827     }
   2828     return errorNo;
   2829 }
   2830 
   2831 
   2832 static void TestEquals(void) {
   2833     /* ucol_equals is not currently a public API. There is a chance that it will become
   2834     * something like this.
   2835     */
   2836     /* test whether the two collators instantiated from the same locale are equal */
   2837     UErrorCode status = U_ZERO_ERROR;
   2838     UParseError parseError;
   2839     int32_t noOfLoc = uloc_countAvailable();
   2840     const char *locName = NULL;
   2841     UCollator *source = NULL, *target = NULL;
   2842     int32_t i = 0;
   2843 
   2844     const char* rules[] = {
   2845         "&l < lj <<< Lj <<< LJ",
   2846         "&n < nj <<< Nj <<< NJ",
   2847         "&ae <<< \\u00e4",
   2848         "&AE <<< \\u00c4"
   2849     };
   2850     /*
   2851     const char* badRules[] = {
   2852     "&l <<< Lj",
   2853     "&n < nj <<< nJ <<< NJ",
   2854     "&a <<< \\u00e4",
   2855     "&AE <<< \\u00c4 <<< x"
   2856     };
   2857     */
   2858 
   2859     UChar sourceRules[1024], targetRules[1024];
   2860     int32_t sourceRulesSize = 0, targetRulesSize = 0;
   2861     int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
   2862 
   2863     for(i = 0; i < rulesSize; i++) {
   2864         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
   2865         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
   2866     }
   2867 
   2868     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   2869     if(status == U_FILE_ACCESS_ERROR) {
   2870         log_data_err("Is your data around?\n");
   2871         return;
   2872     } else if(U_FAILURE(status)) {
   2873         log_err("Error opening collator\n");
   2874         return;
   2875     }
   2876     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   2877     if(!ucol_equals(source, target)) {
   2878         log_err("Equivalent collators not equal!\n");
   2879     }
   2880     ucol_close(source);
   2881     ucol_close(target);
   2882 
   2883     source = ucol_open("root", &status);
   2884     target = ucol_open("root", &status);
   2885     log_verbose("Testing root\n");
   2886     if(!ucol_equals(source, source)) {
   2887         log_err("Same collator not equal\n");
   2888     }
   2889     if(TestEqualsForCollator(locName, source, target)) {
   2890         log_err("Errors for root\n", locName);
   2891     }
   2892     ucol_close(source);
   2893 
   2894     for(i = 0; i<noOfLoc; i++) {
   2895         status = U_ZERO_ERROR;
   2896         locName = uloc_getAvailable(i);
   2897         /*if(hasCollationElements(locName)) {*/
   2898         log_verbose("Testing equality for locale %s\n", locName);
   2899         source = ucol_open(locName, &status);
   2900         target = ucol_open(locName, &status);
   2901         if (U_FAILURE(status)) {
   2902             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
   2903             continue;
   2904         }
   2905         if(TestEqualsForCollator(locName, source, target)) {
   2906             log_err("Errors for locale %s\n", locName);
   2907         }
   2908         ucol_close(source);
   2909         /*}*/
   2910     }
   2911 }
   2912 
   2913 static void TestJ2726(void) {
   2914     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
   2915     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
   2916     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
   2917     UErrorCode status = U_ZERO_ERROR;
   2918     UCollator *coll = ucol_open("en", &status);
   2919     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   2920     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   2921     doTest(coll, a, aSpace, UCOL_EQUAL);
   2922     doTest(coll, aSpace, a, UCOL_EQUAL);
   2923     doTest(coll, a, spaceA, UCOL_EQUAL);
   2924     doTest(coll, spaceA, a, UCOL_EQUAL);
   2925     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
   2926     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
   2927     ucol_close(coll);
   2928 }
   2929 
   2930 static void NullRule(void) {
   2931     UChar r[3] = {0};
   2932     UErrorCode status = U_ZERO_ERROR;
   2933     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2934     if(U_SUCCESS(status)) {
   2935         log_err("This should have been an error!\n");
   2936         ucol_close(coll);
   2937     } else {
   2938         status = U_ZERO_ERROR;
   2939     }
   2940     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2941     if(U_FAILURE(status)) {
   2942         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
   2943     } else {
   2944         ucol_close(coll);
   2945     }
   2946 }
   2947 
   2948 /**
   2949  * Test for CollationElementIterator previous and next for the whole set of
   2950  * unicode characters with normalization on.
   2951  */
   2952 static void TestNumericCollation(void)
   2953 {
   2954     UErrorCode status = U_ZERO_ERROR;
   2955 
   2956     const static char *basicTestStrings[]={
   2957     "hello1",
   2958     "hello2",
   2959     "hello2002",
   2960     "hello2003",
   2961     "hello123456",
   2962     "hello1234567",
   2963     "hello10000000",
   2964     "hello100000000",
   2965     "hello1000000000",
   2966     "hello10000000000",
   2967     };
   2968 
   2969     const static char *preZeroTestStrings[]={
   2970     "avery10000",
   2971     "avery010000",
   2972     "avery0010000",
   2973     "avery00010000",
   2974     "avery000010000",
   2975     "avery0000010000",
   2976     "avery00000010000",
   2977     "avery000000010000",
   2978     };
   2979 
   2980     const static char *thirtyTwoBitNumericStrings[]={
   2981     "avery42949672960",
   2982     "avery42949672961",
   2983     "avery42949672962",
   2984     "avery429496729610"
   2985     };
   2986 
   2987      const static char *longNumericStrings[]={
   2988      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
   2989         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
   2990         are treated as multiple collation elements. */
   2991     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
   2992     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
   2993     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
   2994     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
   2995     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
   2996     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
   2997     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
   2998     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
   2999     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
   3000     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
   3001     };
   3002 
   3003     const static char *supplementaryDigits[] = {
   3004       "\\uD835\\uDFCE", /* 0 */
   3005       "\\uD835\\uDFCF", /* 1 */
   3006       "\\uD835\\uDFD0", /* 2 */
   3007       "\\uD835\\uDFD1", /* 3 */
   3008       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
   3009       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
   3010       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
   3011       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
   3012       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
   3013       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
   3014     };
   3015 
   3016     const static char *foreignDigits[] = {
   3017       "\\u0661",
   3018         "\\u0662",
   3019         "\\u0663",
   3020       "\\u0661\\u0660",
   3021       "\\u0661\\u0662",
   3022       "\\u0661\\u0663",
   3023       "\\u0662\\u0660",
   3024       "\\u0662\\u0662",
   3025       "\\u0662\\u0663",
   3026       "\\u0663\\u0660",
   3027       "\\u0663\\u0662",
   3028       "\\u0663\\u0663"
   3029     };
   3030 
   3031     const static char *evenZeroes[] = {
   3032       "2000",
   3033       "2001",
   3034         "2002",
   3035         "2003"
   3036     };
   3037 
   3038     UColAttribute att = UCOL_NUMERIC_COLLATION;
   3039     UColAttributeValue val = UCOL_ON;
   3040 
   3041     /* Open our collator. */
   3042     UCollator* coll = ucol_open("root", &status);
   3043     if (U_FAILURE(status)){
   3044         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
   3045               myErrorName(status));
   3046         return;
   3047     }
   3048     genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
   3049     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
   3050     genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
   3051     genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
   3052     genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
   3053     genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
   3054 
   3055     /* Setting up our collator to do digits. */
   3056     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   3057     if (U_FAILURE(status)){
   3058         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
   3059               myErrorName(status));
   3060         return;
   3061     }
   3062 
   3063     /*
   3064        Testing that prepended zeroes still yield the correct collation behavior.
   3065        We expect that every element in our strings array will be equal.
   3066     */
   3067     genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
   3068 
   3069     ucol_close(coll);
   3070 }
   3071 
   3072 static void TestTibetanConformance(void)
   3073 {
   3074     const char* test[] = {
   3075         "\\u0FB2\\u0591\\u0F71\\u0061",
   3076         "\\u0FB2\\u0F71\\u0061"
   3077     };
   3078 
   3079     UErrorCode status = U_ZERO_ERROR;
   3080     UCollator *coll = ucol_open("", &status);
   3081     UChar source[100];
   3082     UChar target[100];
   3083     int result;
   3084     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3085     if (U_SUCCESS(status)) {
   3086         u_unescape(test[0], source, 100);
   3087         u_unescape(test[1], target, 100);
   3088         doTest(coll, source, target, UCOL_EQUAL);
   3089         result = ucol_strcoll(coll, source, -1,   target, -1);
   3090         log_verbose("result %d\n", result);
   3091         if (UCOL_EQUAL != result) {
   3092             log_err("Tibetan comparison error\n");
   3093         }
   3094     }
   3095     ucol_close(coll);
   3096 
   3097     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
   3098 }
   3099 
   3100 static void TestPinyinProblem(void) {
   3101     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
   3102     genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
   3103 }
   3104 
   3105 /**
   3106  * Iterate through the given iterator, checking to see that all the strings
   3107  * in the expected array are present.
   3108  * @param expected array of strings we expect to see, or NULL
   3109  * @param expectedCount number of elements of expected, or 0
   3110  */
   3111 static int32_t checkUEnumeration(const char* msg,
   3112                                  UEnumeration* iter,
   3113                                  const char** expected,
   3114                                  int32_t expectedCount) {
   3115     UErrorCode ec = U_ZERO_ERROR;
   3116     int32_t i = 0, n, j, bit;
   3117     int32_t seenMask = 0;
   3118 
   3119     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
   3120     n = uenum_count(iter, &ec);
   3121     if (!assertSuccess("count", &ec)) return -1;
   3122     log_verbose("%s = [", msg);
   3123     for (;; ++i) {
   3124         const char* s = uenum_next(iter, NULL, &ec);
   3125         if (!assertSuccess("snext", &ec) || s == NULL) break;
   3126         if (i != 0) log_verbose(",");
   3127         log_verbose("%s", s);
   3128         /* check expected list */
   3129         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   3130             if ((seenMask&bit) == 0 &&
   3131                 uprv_strcmp(s, expected[j]) == 0) {
   3132                 seenMask |= bit;
   3133                 break;
   3134             }
   3135         }
   3136     }
   3137     log_verbose("] (%d)\n", i);
   3138     assertTrue("count verified", i==n);
   3139     /* did we see all expected strings? */
   3140     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   3141         if ((seenMask&bit)!=0) {
   3142             log_verbose("Ok: \"%s\" seen\n", expected[j]);
   3143         } else {
   3144             log_err("FAIL: \"%s\" not seen\n", expected[j]);
   3145         }
   3146     }
   3147     return n;
   3148 }
   3149 
   3150 /**
   3151  * Test new API added for separate collation tree.
   3152  */
   3153 static void TestSeparateTrees(void) {
   3154     UErrorCode ec = U_ZERO_ERROR;
   3155     UEnumeration *e = NULL;
   3156     int32_t n = -1;
   3157     UBool isAvailable;
   3158     char loc[256];
   3159 
   3160     static const char* AVAIL[] = { "en", "de" };
   3161 
   3162     static const char* KW[] = { "collation" };
   3163 
   3164     static const char* KWVAL[] = { "phonebook", "stroke" };
   3165 
   3166 #if !UCONFIG_NO_SERVICE
   3167     e = ucol_openAvailableLocales(&ec);
   3168     if (e != NULL) {
   3169         assertSuccess("ucol_openAvailableLocales", &ec);
   3170         assertTrue("ucol_openAvailableLocales!=0", e!=0);
   3171         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
   3172         (void)n;    /* Suppress set but not used warnings. */
   3173         /* Don't need to check n because we check list */
   3174         uenum_close(e);
   3175     } else {
   3176         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
   3177     }
   3178 #endif
   3179 
   3180     e = ucol_getKeywords(&ec);
   3181     if (e != NULL) {
   3182         assertSuccess("ucol_getKeywords", &ec);
   3183         assertTrue("ucol_getKeywords!=0", e!=0);
   3184         n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
   3185         /* Don't need to check n because we check list */
   3186         uenum_close(e);
   3187     } else {
   3188         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
   3189     }
   3190 
   3191     e = ucol_getKeywordValues(KW[0], &ec);
   3192     if (e != NULL) {
   3193         assertSuccess("ucol_getKeywordValues", &ec);
   3194         assertTrue("ucol_getKeywordValues!=0", e!=0);
   3195         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
   3196         /* Don't need to check n because we check list */
   3197         uenum_close(e);
   3198     } else {
   3199         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
   3200     }
   3201 
   3202     /* Try setting a warning before calling ucol_getKeywordValues */
   3203     ec = U_USING_FALLBACK_WARNING;
   3204     e = ucol_getKeywordValues(KW[0], &ec);
   3205     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
   3206         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
   3207         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
   3208         /* Don't need to check n because we check list */
   3209         uenum_close(e);
   3210     }
   3211 
   3212     /*
   3213 U_DRAFT int32_t U_EXPORT2
   3214 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
   3215                              const char* locale, UBool* isAvailable,
   3216                              UErrorCode* status);
   3217 }
   3218 */
   3219     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
   3220                                      &isAvailable, &ec);
   3221     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   3222         assertEquals("getFunctionalEquivalent(de)", "root", loc);
   3223         assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
   3224                    isAvailable == TRUE);
   3225     }
   3226 
   3227     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
   3228                                      &isAvailable, &ec);
   3229     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   3230         assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
   3231         assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
   3232                    isAvailable == FALSE);
   3233     }
   3234 }
   3235 
   3236 /* supercedes TestJ784 */
   3237 static void TestBeforePinyin(void) {
   3238     const static char rules[] = {
   3239         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
   3240         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
   3241         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
   3242         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
   3243         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
   3244         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
   3245     };
   3246 
   3247     const static char *test[] = {
   3248         "l\\u0101",
   3249         "la",
   3250         "l\\u0101n",
   3251         "lan ",
   3252         "l\\u0113",
   3253         "le",
   3254         "l\\u0113n",
   3255         "len"
   3256     };
   3257 
   3258     const static char *test2[] = {
   3259         "x\\u0101",
   3260         "x\\u0100",
   3261         "X\\u0101",
   3262         "X\\u0100",
   3263         "x\\u00E1",
   3264         "x\\u00C1",
   3265         "X\\u00E1",
   3266         "X\\u00C1",
   3267         "x\\u01CE",
   3268         "x\\u01CD",
   3269         "X\\u01CE",
   3270         "X\\u01CD",
   3271         "x\\u00E0",
   3272         "x\\u00C0",
   3273         "X\\u00E0",
   3274         "X\\u00C0",
   3275         "xa",
   3276         "xA",
   3277         "Xa",
   3278         "XA",
   3279         "x\\u0101x",
   3280         "x\\u0100x",
   3281         "x\\u00E1x",
   3282         "x\\u00C1x",
   3283         "x\\u01CEx",
   3284         "x\\u01CDx",
   3285         "x\\u00E0x",
   3286         "x\\u00C0x",
   3287         "xax",
   3288         "xAx"
   3289     };
   3290 
   3291     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
   3292     genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
   3293     genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
   3294     genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
   3295 }
   3296 
   3297 static void TestBeforeTightening(void) {
   3298     static const struct {
   3299         const char *rules;
   3300         UErrorCode expectedStatus;
   3301     } tests[] = {
   3302         { "&[before 1]a<x", U_ZERO_ERROR },
   3303         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
   3304         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
   3305         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
   3306         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
   3307         { "&[before 2]a<<x",U_ZERO_ERROR },
   3308         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
   3309         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
   3310         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
   3311         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
   3312         { "&[before 3]a<<<x",U_ZERO_ERROR },
   3313         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
   3314         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
   3315     };
   3316 
   3317     int32_t i = 0;
   3318 
   3319     UErrorCode status = U_ZERO_ERROR;
   3320     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   3321     uint32_t rlen = 0;
   3322 
   3323     UCollator *coll = NULL;
   3324 
   3325 
   3326     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   3327         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
   3328         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   3329         if(status != tests[i].expectedStatus) {
   3330             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
   3331                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
   3332         }
   3333         ucol_close(coll);
   3334         status = U_ZERO_ERROR;
   3335     }
   3336 
   3337 }
   3338 
   3339 /*
   3340 &m < a
   3341 &[before 1] a < x <<< X << q <<< Q < z
   3342 assert: m <<< M < x <<< X << q <<< Q < z < a < n
   3343 
   3344 &m < a
   3345 &[before 2] a << x <<< X << q <<< Q < z
   3346 assert: m <<< M < x <<< X << q <<< Q << a < z < n
   3347 
   3348 &m < a
   3349 &[before 3] a <<< x <<< X << q <<< Q < z
   3350 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
   3351 
   3352 
   3353 &m << a
   3354 &[before 1] a < x <<< X << q <<< Q < z
   3355 assert: x <<< X << q <<< Q < z < m <<< M << a < n
   3356 
   3357 &m << a
   3358 &[before 2] a << x <<< X << q <<< Q < z
   3359 assert: m <<< M << x <<< X << q <<< Q << a < z < n
   3360 
   3361 &m << a
   3362 &[before 3] a <<< x <<< X << q <<< Q < z
   3363 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
   3364 
   3365 
   3366 &m <<< a
   3367 &[before 1] a < x <<< X << q <<< Q < z
   3368 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
   3369 
   3370 &m <<< a
   3371 &[before 2] a << x <<< X << q <<< Q < z
   3372 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
   3373 
   3374 &m <<< a
   3375 &[before 3] a <<< x <<< X << q <<< Q < z
   3376 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
   3377 
   3378 
   3379 &[before 1] s < x <<< X << q <<< Q < z
   3380 assert: r <<< R < x <<< X << q <<< Q < z < s < n
   3381 
   3382 &[before 2] s << x <<< X << q <<< Q < z
   3383 assert: r <<< R < x <<< X << q <<< Q << s < z < n
   3384 
   3385 &[before 3] s <<< x <<< X << q <<< Q < z
   3386 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
   3387 
   3388 
   3389 &[before 1] \u24DC < x <<< X << q <<< Q < z
   3390 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
   3391 
   3392 &[before 2] \u24DC << x <<< X << q <<< Q < z
   3393 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
   3394 
   3395 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
   3396 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
   3397 */
   3398 
   3399 
   3400 #if 0
   3401 /* requires features not yet supported */
   3402 static void TestMoreBefore(void) {
   3403     static const struct {
   3404         const char* rules;
   3405         const char* order[16];
   3406         int32_t size;
   3407     } tests[] = {
   3408         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
   3409         { "m","M","x","X","q","Q","z","a","n" }, 9},
   3410         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
   3411         { "m","M","x","X","q","Q","a","z","n" }, 9},
   3412         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
   3413         { "m","M","x","X","a","q","Q","z","n" }, 9},
   3414         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
   3415         { "x","X","q","Q","z","m","M","a","n" }, 9},
   3416         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
   3417         { "m","M","x","X","q","Q","a","z","n" }, 9},
   3418         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
   3419         { "m","M","x","X","a","q","Q","z","n" }, 9},
   3420         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
   3421         { "x","X","q","Q","z","n","m","a","M" }, 9},
   3422         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
   3423         { "x","X","q","Q","m","a","M","z","n" }, 9},
   3424         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
   3425         { "m","x","X","a","M","q","Q","z","n" }, 9},
   3426         { "&[before 1] s < x <<< X << q <<< Q < z",
   3427         { "r","R","x","X","q","Q","z","s","n" }, 9},
   3428         { "&[before 2] s << x <<< X << q <<< Q < z",
   3429         { "r","R","x","X","q","Q","s","z","n" }, 9},
   3430         { "&[before 3] s <<< x <<< X << q <<< Q < z",
   3431         { "r","R","x","X","s","q","Q","z","n" }, 9},
   3432         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
   3433         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
   3434         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
   3435         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
   3436         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
   3437         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
   3438     };
   3439 
   3440     int32_t i = 0;
   3441 
   3442     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   3443         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
   3444     }
   3445 }
   3446 #endif
   3447 
   3448 static void TestTailorNULL( void ) {
   3449     const static char* rule = "&a <<< '\\u0000'";
   3450     UErrorCode status = U_ZERO_ERROR;
   3451     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   3452     uint32_t rlen = 0;
   3453     UChar a = 1, null = 0;
   3454     UCollationResult res = UCOL_EQUAL;
   3455 
   3456     UCollator *coll = NULL;
   3457 
   3458 
   3459     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
   3460     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   3461 
   3462     if(U_FAILURE(status)) {
   3463         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
   3464     } else {
   3465         res = ucol_strcoll(coll, &a, 1, &null, 1);
   3466 
   3467         if(res != UCOL_LESS) {
   3468             log_err("NULL was not tailored properly!\n");
   3469         }
   3470     }
   3471 
   3472     ucol_close(coll);
   3473 }
   3474 
   3475 static void
   3476 TestUpperFirstQuaternary(void)
   3477 {
   3478   const char* tests[] = { "B", "b", "Bb", "bB" };
   3479   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
   3480   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
   3481   genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
   3482 }
   3483 
   3484 static void
   3485 TestJ4960(void)
   3486 {
   3487   const char* tests[] = { "\\u00e2T", "aT" };
   3488   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
   3489   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
   3490   const char* tests2[] = { "a", "A" };
   3491   const char* rule = "&[first tertiary ignorable]=A=a";
   3492   UColAttribute att2[] = { UCOL_CASE_LEVEL };
   3493   UColAttributeValue attVals2[] = { UCOL_ON };
   3494   /* Test whether we correctly ignore primary ignorables on case level when */
   3495   /* we have only primary & case level */
   3496   genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
   3497   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
   3498   /* and case level */
   3499   genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
   3500   /* Test whether completely ignorable letters have case level info (they shouldn't) */
   3501   genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
   3502 }
   3503 
   3504 static void
   3505 TestJ5223(void)
   3506 {
   3507   static const char *test = "this is a test string";
   3508   UChar ustr[256];
   3509   int32_t ustr_length = u_unescape(test, ustr, 256);
   3510   unsigned char sortkey[256];
   3511   int32_t sortkey_length;
   3512   UErrorCode status = U_ZERO_ERROR;
   3513   static UCollator *coll = NULL;
   3514   coll = ucol_open("root", &status);
   3515   if(U_FAILURE(status)) {
   3516     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   3517     return;
   3518   }
   3519   ucol_setStrength(coll, UCOL_PRIMARY);
   3520   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   3521   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3522   if (U_FAILURE(status)) {
   3523     log_err("Failed setting atributes\n");
   3524     return;
   3525   }
   3526   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
   3527   if (sortkey_length > 256) return;
   3528 
   3529   /* we mark the position where the null byte should be written in advance */
   3530   sortkey[sortkey_length-1] = 0xAA;
   3531 
   3532   /* we set the buffer size one byte higher than needed */
   3533   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
   3534     sortkey_length+1);
   3535 
   3536   /* no error occurs (for me) */
   3537   if (sortkey[sortkey_length-1] == 0xAA) {
   3538     log_err("Hit bug at first try\n");
   3539   }
   3540 
   3541   /* we mark the position where the null byte should be written again */
   3542   sortkey[sortkey_length-1] = 0xAA;
   3543 
   3544   /* this time we set the buffer size to the exact amount needed */
   3545   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
   3546     sortkey_length);
   3547 
   3548   /* now the trailing null byte is not written */
   3549   if (sortkey[sortkey_length-1] == 0xAA) {
   3550     log_err("Hit bug at second try\n");
   3551   }
   3552 
   3553   ucol_close(coll);
   3554 }
   3555 
   3556 /* Regression test for Thai partial sort key problem */
   3557 static void
   3558 TestJ5232(void)
   3559 {
   3560     const static char *test[] = {
   3561         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
   3562         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
   3563     };
   3564 
   3565     genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
   3566 }
   3567 
   3568 static void
   3569 TestJ5367(void)
   3570 {
   3571     const static char *test[] = { "a", "y" };
   3572     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
   3573     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
   3574 }
   3575 
   3576 static void
   3577 TestVI5913(void)
   3578 {
   3579     UErrorCode status = U_ZERO_ERROR;
   3580     int32_t i, j;
   3581     UCollator *coll =NULL;
   3582     uint8_t  resColl[100], expColl[100];
   3583     int32_t  rLen, tLen, ruleLen, sLen, kLen;
   3584     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &b<0x1FF3-omega with Ypogegrammeni*/
   3585     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
   3586     /*
   3587      * Note: Just tailoring &z<ae^ does not work as expected:
   3588      * The UCA spec requires for discontiguous contractions that they
   3589      * extend an *existing match* by one combining mark at a time.
   3590      * Therefore, ae must be a contraction so that the builder finds
   3591      * discontiguous contractions for ae^, for example with an intervening underdot.
   3592      * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
   3593      */
   3594     UChar rule3[256]={
   3595         0x26, 0x78, 0x3c, 0x61, 0x65,      /* &x<ae */
   3596         0x26, 0x7a, 0x3c, 0x0061, 0x00ea,  /* &z<a+e with circumflex.*/
   3597         0};
   3598     static const UChar tData[][20]={
   3599         {0x1EAC, 0},
   3600         {0x0041, 0x0323, 0x0302, 0},
   3601         {0x1EA0, 0x0302, 0},
   3602         {0x00C2, 0x0323, 0},
   3603         {0x1ED8, 0},  /* O with dot and circumflex */
   3604         {0x1ECC, 0x0302, 0},
   3605         {0x1EB7, 0},
   3606         {0x1EA1, 0x0306, 0},
   3607     };
   3608     static const UChar tailorData[][20]={
   3609         {0x1FA2, 0},  /* Omega with 3 combining marks */
   3610         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
   3611         {0x1FF3, 0x0313, 0x0300, 0},
   3612         {0x1F60, 0x0300, 0x0345, 0},
   3613         {0x1F62, 0x0345, 0},
   3614         {0x1FA0, 0x0300, 0},
   3615     };
   3616     static const UChar tailorData2[][20]={
   3617         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
   3618         {0x0073, 0x0323, 0x030C, 0},
   3619         {0x0073, 0x030C, 0x0323, 0},
   3620     };
   3621     static const UChar tailorData3[][20]={
   3622         {0x007a, 0},  /*  z */
   3623         {0x0061, 0x0065, 0},  /*  a + e */
   3624         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
   3625         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
   3626         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
   3627         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
   3628         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
   3629         {0x00EA, 0},  /* e with circumflex  */
   3630     };
   3631 
   3632     /* Test Vietnamese sort. */
   3633     coll = ucol_open("vi", &status);
   3634     if(U_FAILURE(status)) {
   3635         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
   3636         return;
   3637     }
   3638     log_verbose("\n\nVI collation:");
   3639     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
   3640         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
   3641     }
   3642     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
   3643         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
   3644     }
   3645     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
   3646         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
   3647     }
   3648     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
   3649         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
   3650     }
   3651 
   3652     for (j=0; j<8; j++) {
   3653         tLen = u_strlen(tData[j]);
   3654         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
   3655         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
   3656         for(i = 0; i<rLen; i++) {
   3657             log_verbose(" %02X", resColl[i]);
   3658         }
   3659     }
   3660 
   3661     ucol_close(coll);
   3662 
   3663     /* Test Romanian sort. */
   3664     coll = ucol_open("ro", &status);
   3665     log_verbose("\n\nRO collation:");
   3666     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
   3667         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
   3668     }
   3669     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
   3670         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
   3671     }
   3672     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
   3673         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
   3674     }
   3675 
   3676     for (j=4; j<8; j++) {
   3677         tLen = u_strlen(tData[j]);
   3678         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
   3679         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
   3680         for(i = 0; i<rLen; i++) {
   3681             log_verbose(" %02X", resColl[i]);
   3682         }
   3683     }
   3684     ucol_close(coll);
   3685 
   3686     /* Test the precomposed Greek character with 3 combining marks. */
   3687     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
   3688     ruleLen = u_strlen(rule);
   3689     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3690     if (U_FAILURE(status)) {
   3691         log_err("ucol_openRules failed with %s\n", u_errorName(status));
   3692         return;
   3693     }
   3694     sLen = u_strlen(tailorData[0]);
   3695     for (j=1; j<6; j++) {
   3696         tLen = u_strlen(tailorData[j]);
   3697         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
   3698             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
   3699         }
   3700     }
   3701     /* Test getSortKey. */
   3702     tLen = u_strlen(tailorData[0]);
   3703     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
   3704     for (j=0; j<6; j++) {
   3705         tLen = u_strlen(tailorData[j]);
   3706         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
   3707         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   3708             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   3709             for(i = 0; i<rLen; i++) {
   3710                 log_err(" %02X", resColl[i]);
   3711             }
   3712         }
   3713     }
   3714     ucol_close(coll);
   3715 
   3716     log_verbose("\n\nTailoring test for s with caron:");
   3717     ruleLen = u_strlen(rule2);
   3718     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3719     tLen = u_strlen(tailorData2[0]);
   3720     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
   3721     for (j=1; j<3; j++) {
   3722         tLen = u_strlen(tailorData2[j]);
   3723         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
   3724         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   3725             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   3726             for(i = 0; i<rLen; i++) {
   3727                 log_err(" %02X", resColl[i]);
   3728             }
   3729         }
   3730     }
   3731     ucol_close(coll);
   3732 
   3733     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
   3734     ruleLen = u_strlen(rule3);
   3735     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3736     tLen = u_strlen(tailorData3[3]);
   3737     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
   3738     log_verbose("\n Test Data[3] :%s  \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
   3739     for(i = 0; i<kLen; i++) {
   3740         log_verbose(" %02X", expColl[i]);
   3741     }
   3742     for (j=4; j<6; j++) {
   3743         tLen = u_strlen(tailorData3[j]);
   3744         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
   3745 
   3746         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   3747             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
   3748             for(i = 0; i<rLen; i++) {
   3749                 log_err(" %02X", resColl[i]);
   3750             }
   3751         }
   3752 
   3753         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
   3754          for(i = 0; i<rLen; i++) {
   3755              log_verbose(" %02X", resColl[i]);
   3756          }
   3757     }
   3758     ucol_close(coll);
   3759 }
   3760 
   3761 static void
   3762 TestTailor6179(void)
   3763 {
   3764     UErrorCode status = U_ZERO_ERROR;
   3765     int32_t i;
   3766     UCollator *coll =NULL;
   3767     uint8_t  resColl[100];
   3768     int32_t  rLen, tLen, ruleLen;
   3769     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
   3770     static const UChar rule1[]={
   3771             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
   3772             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
   3773             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
   3774             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
   3775     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
   3776     static const UChar rule2[]={
   3777             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
   3778             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
   3779             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
   3780             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
   3781             0x3C,0x3C,0x20,0x62,0};
   3782 
   3783     static const UChar tData1[][4]={
   3784         {0x61, 0},
   3785         {0x62, 0},
   3786         { 0xFDD0,0x009E, 0}
   3787     };
   3788     static const UChar tData2[][4]={
   3789         {0x61, 0},
   3790         {0x62, 0},
   3791         { 0xFDD0,0x009E, 0}
   3792      };
   3793 
   3794     /*
   3795      * These values from FractionalUCA.txt will change,
   3796      * and need to be updated here.
   3797      * TODO: Make this not check for particular sort keys.
   3798      * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
   3799      */
   3800     static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
   3801     static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
   3802     static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
   3803     static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
   3804 
   3805     UParseError parseError;
   3806 
   3807     /* Test [Last Primary ignorable] */
   3808 
   3809     log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
   3810     ruleLen = u_strlen(rule1);
   3811     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3812     if (U_FAILURE(status)) {
   3813         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
   3814         return;
   3815     }
   3816     tLen = u_strlen(tData1[0]);
   3817     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
   3818     if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
   3819         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
   3820         for(i = 0; i<rLen; i++) {
   3821             log_err(" %02X", resColl[i]);
   3822         }
   3823         log_err("\n");
   3824     }
   3825     tLen = u_strlen(tData1[1]);
   3826     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
   3827     if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
   3828         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
   3829         for(i = 0; i<rLen; i++) {
   3830             log_err(" %02X", resColl[i]);
   3831         }
   3832         log_err("\n");
   3833     }
   3834     ucol_close(coll);
   3835 
   3836 
   3837     /* Test [Last Secondary ignorable] */
   3838     log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
   3839     ruleLen = u_strlen(rule2);
   3840     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
   3841     if (U_FAILURE(status)) {
   3842         log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
   3843         log_info("  offset=%d  \"%s\" | \"%s\"\n",
   3844                  parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
   3845         return;
   3846     }
   3847     tLen = u_strlen(tData2[0]);
   3848     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
   3849     if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
   3850         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
   3851         for(i = 0; i<rLen; i++) {
   3852             log_err(" %02X", resColl[i]);
   3853         }
   3854         log_err("\n");
   3855     }
   3856     tLen = u_strlen(tData2[1]);
   3857     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
   3858     if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
   3859       log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
   3860       for(i = 0; i<rLen; i++) {
   3861         log_err(" %02X", resColl[i]);
   3862       }
   3863       log_err("\n");
   3864     }
   3865     ucol_close(coll);
   3866 }
   3867 
   3868 static void
   3869 TestUCAPrecontext(void)
   3870 {
   3871     UErrorCode status = U_ZERO_ERROR;
   3872     int32_t i, j;
   3873     UCollator *coll =NULL;
   3874     uint8_t  resColl[100], prevColl[100];
   3875     int32_t  rLen, tLen, ruleLen;
   3876     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
   3877     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
   3878     /* & l middle-dot << a  a is an expansion. */
   3879 
   3880     UChar tData1[][20]={
   3881             { 0xb7, 0},  /* standalone middle dot(0xb7) */
   3882             { 0x387, 0}, /* standalone middle dot(0x387) */
   3883             { 0x61, 0},  /* a */
   3884             { 0x6C, 0},  /* l */
   3885             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
   3886             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
   3887             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
   3888             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
   3889             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
   3890             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
   3891             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
   3892      };
   3893 
   3894     log_verbose("\n\nEN collation:");
   3895     coll = ucol_open("en", &status);
   3896     if (U_FAILURE(status)) {
   3897         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
   3898         return;
   3899     }
   3900     for (j=0; j<11; j++) {
   3901         tLen = u_strlen(tData1[j]);
   3902         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   3903         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   3904             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   3905                     j, tData1[j]);
   3906         }
   3907         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   3908         for(i = 0; i<rLen; i++) {
   3909             log_verbose(" %02X", resColl[i]);
   3910         }
   3911         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   3912      }
   3913      ucol_close(coll);
   3914 
   3915 
   3916      log_verbose("\n\nJA collation:");
   3917      coll = ucol_open("ja", &status);
   3918      if (U_FAILURE(status)) {
   3919          log_err("Tailoring test: &z <<a|- failed!");
   3920          return;
   3921      }
   3922      for (j=0; j<11; j++) {
   3923          tLen = u_strlen(tData1[j]);
   3924          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   3925          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   3926              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   3927                      j, tData1[j]);
   3928          }
   3929          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   3930          for(i = 0; i<rLen; i++) {
   3931              log_verbose(" %02X", resColl[i]);
   3932          }
   3933          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   3934       }
   3935       ucol_close(coll);
   3936 
   3937 
   3938       log_verbose("\n\nTailoring test: & middle dot < a ");
   3939       ruleLen = u_strlen(rule1);
   3940       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3941       if (U_FAILURE(status)) {
   3942           log_err("Tailoring test: & middle dot < a failed!");
   3943           return;
   3944       }
   3945       for (j=0; j<11; j++) {
   3946           tLen = u_strlen(tData1[j]);
   3947           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   3948           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   3949               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   3950                       j, tData1[j]);
   3951           }
   3952           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   3953           for(i = 0; i<rLen; i++) {
   3954               log_verbose(" %02X", resColl[i]);
   3955           }
   3956           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   3957        }
   3958        ucol_close(coll);
   3959 
   3960 
   3961        log_verbose("\n\nTailoring test: & l middle-dot << a ");
   3962        ruleLen = u_strlen(rule2);
   3963        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3964        if (U_FAILURE(status)) {
   3965            log_err("Tailoring test: & l middle-dot << a failed!");
   3966            return;
   3967        }
   3968        for (j=0; j<11; j++) {
   3969            tLen = u_strlen(tData1[j]);
   3970            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   3971            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   3972                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   3973                        j, tData1[j]);
   3974            }
   3975            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
   3976                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
   3977                        j, tData1[j]);
   3978            }
   3979            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   3980            for(i = 0; i<rLen; i++) {
   3981                log_verbose(" %02X", resColl[i]);
   3982            }
   3983            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   3984         }
   3985         ucol_close(coll);
   3986 }
   3987 
   3988 static void
   3989 TestOutOfBuffer5468(void)
   3990 {
   3991     static const char *test = "\\u4e00";
   3992     UChar ustr[256];
   3993     int32_t ustr_length = u_unescape(test, ustr, 256);
   3994     unsigned char shortKeyBuf[1];
   3995     int32_t sortkey_length;
   3996     UErrorCode status = U_ZERO_ERROR;
   3997     static UCollator *coll = NULL;
   3998 
   3999     coll = ucol_open("root", &status);
   4000     if(U_FAILURE(status)) {
   4001       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   4002       return;
   4003     }
   4004     ucol_setStrength(coll, UCOL_PRIMARY);
   4005     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   4006     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   4007     if (U_FAILURE(status)) {
   4008       log_err("Failed setting atributes\n");
   4009       return;
   4010     }
   4011 
   4012     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
   4013     if (sortkey_length != 4) {
   4014         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
   4015     }
   4016     log_verbose("length of sortKey is %d", sortkey_length);
   4017     ucol_close(coll);
   4018 }
   4019 
   4020 #define TSKC_DATA_SIZE 5
   4021 #define TSKC_BUF_SIZE  50
   4022 static void
   4023 TestSortKeyConsistency(void)
   4024 {
   4025     UErrorCode icuRC = U_ZERO_ERROR;
   4026     UCollator* ucol;
   4027     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
   4028 
   4029     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
   4030     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
   4031     int32_t i, j, i2;
   4032 
   4033     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
   4034     if (U_FAILURE(icuRC))
   4035     {
   4036         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
   4037         return;
   4038     }
   4039 
   4040     for (i = 0; i < TSKC_DATA_SIZE; i++)
   4041     {
   4042         UCharIterator uiter;
   4043         uint32_t state[2] = { 0, 0 };
   4044         int32_t dataLen = i+1;
   4045         for (j=0; j<TSKC_BUF_SIZE; j++)
   4046             bufFull[i][j] = bufPart[i][j] = 0;
   4047 
   4048         /* Full sort key */
   4049         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
   4050 
   4051         /* Partial sort key */
   4052         uiter_setString(&uiter, data, dataLen);
   4053         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
   4054         if (U_FAILURE(icuRC))
   4055         {
   4056             log_err("ucol_nextSortKeyPart failed\n");
   4057             ucol_close(ucol);
   4058             return;
   4059         }
   4060 
   4061         for (i2=0; i2<i; i2++)
   4062         {
   4063             UBool fullMatch = TRUE;
   4064             UBool partMatch = TRUE;
   4065             for (j=0; j<TSKC_BUF_SIZE; j++)
   4066             {
   4067                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
   4068                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
   4069             }
   4070             if (fullMatch != partMatch) {
   4071                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
   4072                                   : "partial key was consistent, but full key changed\n");
   4073                 ucol_close(ucol);
   4074                 return;
   4075             }
   4076         }
   4077     }
   4078 
   4079     /*=============================================*/
   4080    ucol_close(ucol);
   4081 }
   4082 
   4083 /* ticket: 6101 */
   4084 static void TestCroatianSortKey(void) {
   4085     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
   4086     UErrorCode status = U_ZERO_ERROR;
   4087     UCollator *ucol;
   4088     UCharIterator iter;
   4089 
   4090     static const UChar text[] = { 0x0044, 0xD81A };
   4091 
   4092     size_t length = sizeof(text)/sizeof(*text);
   4093 
   4094     uint8_t textSortKey[32];
   4095     size_t lenSortKey = 32;
   4096     size_t actualSortKeyLen;
   4097     uint32_t uStateInfo[2] = { 0, 0 };
   4098 
   4099     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
   4100     if (U_FAILURE(status)) {
   4101         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
   4102         return;
   4103     }
   4104 
   4105     uiter_setString(&iter, text, length);
   4106 
   4107     actualSortKeyLen = ucol_nextSortKeyPart(
   4108         ucol, &iter, (uint32_t*)uStateInfo,
   4109         textSortKey, lenSortKey, &status
   4110         );
   4111 
   4112     if (actualSortKeyLen == lenSortKey) {
   4113         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
   4114     }
   4115 
   4116     ucol_close(ucol);
   4117 }
   4118 
   4119 /* ticket: 6140 */
   4120 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
   4121  * they are both Hiragana and Katakana
   4122  */
   4123 #define SORTKEYLEN 50
   4124 static void TestHiragana(void) {
   4125     UErrorCode status = U_ZERO_ERROR;
   4126     UCollator* ucol;
   4127     UCollationResult strcollresult;
   4128     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
   4129     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
   4130     int32_t data1Len = sizeof(data1)/sizeof(*data1);
   4131     int32_t data2Len = sizeof(data2)/sizeof(*data2);
   4132     int32_t i, j;
   4133     uint8_t sortKey1[SORTKEYLEN];
   4134     uint8_t sortKey2[SORTKEYLEN];
   4135 
   4136     UCharIterator uiter1;
   4137     UCharIterator uiter2;
   4138     uint32_t state1[2] = { 0, 0 };
   4139     uint32_t state2[2] = { 0, 0 };
   4140     int32_t keySize1;
   4141     int32_t keySize2;
   4142 
   4143     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
   4144             &status);
   4145     if (U_FAILURE(status)) {
   4146         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
   4147         return;
   4148     }
   4149 
   4150     /* Start of full sort keys */
   4151     /* Full sort key1 */
   4152     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
   4153     /* Full sort key2 */
   4154     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
   4155     if (keySize1 == keySize2) {
   4156         for (i = 0; i < keySize1; i++) {
   4157             if (sortKey1[i] != sortKey2[i]) {
   4158                 log_err("Full sort keys are different. Should be equal.");
   4159             }
   4160         }
   4161     } else {
   4162         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
   4163     }
   4164     /* End of full sort keys */
   4165 
   4166     /* Start of partial sort keys */
   4167     /* Partial sort key1 */
   4168     uiter_setString(&uiter1, data1, data1Len);
   4169     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
   4170     /* Partial sort key2 */
   4171     uiter_setString(&uiter2, data2, data2Len);
   4172     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
   4173     if (U_SUCCESS(status) && keySize1 == keySize2) {
   4174         for (j = 0; j < keySize1; j++) {
   4175             if (sortKey1[j] != sortKey2[j]) {
   4176                 log_err("Partial sort keys are different. Should be equal");
   4177             }
   4178         }
   4179     } else {
   4180         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
   4181     }
   4182     /* End of partial sort keys */
   4183 
   4184     /* Start of strcoll */
   4185     /* Use ucol_strcoll() to determine ordering */
   4186     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
   4187     if (strcollresult != UCOL_EQUAL) {
   4188         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
   4189     }
   4190 
   4191     ucol_close(ucol);
   4192 }
   4193 
   4194 /* Convenient struct for running collation tests */
   4195 typedef struct {
   4196   const UChar source[MAX_TOKEN_LEN];  /* String on left */
   4197   const UChar target[MAX_TOKEN_LEN];  /* String on right */
   4198   UCollationResult result;            /* -1, 0 or +1, depending on collation */
   4199 } OneTestCase;
   4200 
   4201 /*
   4202  * Utility function to test one collation test case.
   4203  * @param testcases Array of test cases.
   4204  * @param n_testcases Size of the array testcases.
   4205  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
   4206  * @param n_rules Size of the array str_rules.
   4207  */
   4208 static void doTestOneTestCase(const OneTestCase testcases[],
   4209                               int n_testcases,
   4210                               const char* str_rules[],
   4211                               int n_rules)
   4212 {
   4213   int rule_no, testcase_no;
   4214   UChar rule[500];
   4215   int32_t length = 0;
   4216   UErrorCode status = U_ZERO_ERROR;
   4217   UParseError parse_error;
   4218   UCollator  *myCollation;
   4219 
   4220   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
   4221 
   4222     length = u_unescape(str_rules[rule_no], rule, 500);
   4223     if (length == 0) {
   4224         log_err("ERROR: The rule cannot be unescaped: %s\n");
   4225         return;
   4226     }
   4227     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
   4228     if(U_FAILURE(status)){
   4229         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   4230         log_info("  offset=%d  \"%s\" | \"%s\"\n",
   4231                  parse_error.offset,
   4232                  aescstrdup(parse_error.preContext, -1),
   4233                  aescstrdup(parse_error.postContext, -1));
   4234         return;
   4235     }
   4236     log_verbose("Testing the <<* syntax\n");
   4237     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   4238     ucol_setStrength(myCollation, UCOL_TERTIARY);
   4239     for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
   4240       doTest(myCollation,
   4241              testcases[testcase_no].source,
   4242              testcases[testcase_no].target,
   4243              testcases[testcase_no].result
   4244              );
   4245     }
   4246     ucol_close(myCollation);
   4247   }
   4248 }
   4249 
   4250 const static OneTestCase rangeTestcases[] = {
   4251   { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
   4252   { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
   4253   { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
   4254 
   4255   { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
   4256   { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
   4257   { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
   4258   { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
   4259   { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
   4260 
   4261   { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
   4262   { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
   4263   { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
   4264   { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
   4265 
   4266   { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
   4267   { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
   4268   { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
   4269   { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
   4270   { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
   4271   { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
   4272   { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
   4273   { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
   4274 };
   4275 
   4276 static int nRangeTestcases = LEN(rangeTestcases);
   4277 
   4278 const static OneTestCase rangeTestcasesSupplemental[] = {
   4279   { {0x4e00},                            {0xfffb},                          UCOL_LESS }, /* U+4E00 < U+FFFB */
   4280   { {0xfffb},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFB < U+10000 */
   4281   { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
   4282   { {0x4e00},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+4E00 < U+10001 */
   4283   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
   4284   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
   4285   { {0x4e00},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+4E00 < U+10001 */
   4286 };
   4287 
   4288 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
   4289 
   4290 const static OneTestCase rangeTestcasesQwerty[] = {
   4291   { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
   4292   { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
   4293 
   4294   { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
   4295   { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
   4296 
   4297   { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
   4298   { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
   4299 
   4300   { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
   4301   { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
   4302 
   4303   { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
   4304     {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
   4305   { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
   4306     {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
   4307 };
   4308 
   4309 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
   4310 
   4311 static void TestSameStrengthList(void)
   4312 {
   4313   const char* strRules[] = {
   4314     /* Normal */
   4315     "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
   4316 
   4317     /* Lists */
   4318     "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
   4319   };
   4320   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   4321 }
   4322 
   4323 static void TestSameStrengthListQuoted(void)
   4324 {
   4325   const char* strRules[] = {
   4326     /* Lists with quoted characters */
   4327     "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
   4328     "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
   4329 
   4330     "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
   4331     "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
   4332 
   4333     "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
   4334     "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
   4335   };
   4336   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   4337 }
   4338 
   4339 static void TestSameStrengthListSupplemental(void)
   4340 {
   4341   const char* strRules[] = {
   4342     "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
   4343     "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
   4344     "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
   4345     "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
   4346   };
   4347   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
   4348 }
   4349 
   4350 static void TestSameStrengthListQwerty(void)
   4351 {
   4352   const char* strRules[] = {
   4353     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
   4354     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
   4355     "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
   4356     "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
   4357     "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
   4358 
   4359     /* Quoted characters also will work if two quoted characters are not consecutive.  */
   4360     "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
   4361 
   4362     /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
   4363     /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
   4364 
   4365  };
   4366   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
   4367 }
   4368 
   4369 static void TestSameStrengthListQuotedQwerty(void)
   4370 {
   4371   const char* strRules[] = {
   4372     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
   4373     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
   4374     "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
   4375 
   4376     /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
   4377     /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
   4378    };
   4379   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
   4380 }
   4381 
   4382 static void TestSameStrengthListRanges(void)
   4383 {
   4384   const char* strRules[] = {
   4385     "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
   4386   };
   4387   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   4388 }
   4389 
   4390 static void TestSameStrengthListSupplementalRanges(void)
   4391 {
   4392   const char* strRules[] = {
   4393     /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
   4394     "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
   4395   };
   4396   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
   4397 }
   4398 
   4399 static void TestSpecialCharacters(void)
   4400 {
   4401   const char* strRules[] = {
   4402     /* Normal */
   4403     "&';'<'+'<','<'-'<'&'<'*'",
   4404 
   4405     /* List */
   4406     "&';'<*'+,-&*'",
   4407 
   4408     /* Range */
   4409     "&';'<*'+'-'-&*'",
   4410   };
   4411 
   4412   const static OneTestCase specialCharacterStrings[] = {
   4413     { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
   4414     { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
   4415     { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
   4416     { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
   4417   };
   4418   doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
   4419 }
   4420 
   4421 static void TestPrivateUseCharacters(void)
   4422 {
   4423   const char* strRules[] = {
   4424     /* Normal */
   4425     "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
   4426     "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
   4427   };
   4428 
   4429   const static OneTestCase privateUseCharacterStrings[] = {
   4430     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   4431     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   4432     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   4433     { {0xe2da}, {0xe2db}, UCOL_LESS },
   4434     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   4435     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   4436   };
   4437   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   4438 }
   4439 
   4440 static void TestPrivateUseCharactersInList(void)
   4441 {
   4442   const char* strRules[] = {
   4443     /* List */
   4444     "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
   4445     /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
   4446     "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
   4447   };
   4448 
   4449   const static OneTestCase privateUseCharacterStrings[] = {
   4450     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   4451     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   4452     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   4453     { {0xe2da}, {0xe2db}, UCOL_LESS },
   4454     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   4455     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   4456   };
   4457   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   4458 }
   4459 
   4460 static void TestPrivateUseCharactersInRange(void)
   4461 {
   4462   const char* strRules[] = {
   4463     /* Range */
   4464     "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
   4465     "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
   4466     /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
   4467   };
   4468 
   4469   const static OneTestCase privateUseCharacterStrings[] = {
   4470     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   4471     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   4472     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   4473     { {0xe2da}, {0xe2db}, UCOL_LESS },
   4474     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   4475     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   4476   };
   4477   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   4478 }
   4479 
   4480 static void TestInvalidListsAndRanges(void)
   4481 {
   4482   const char* invalidRules[] = {
   4483     /* Range not in starred expression */
   4484     "&\\ufffe<\\uffff-\\U00010002",
   4485 
   4486     /* Range without start */
   4487     "&a<*-c",
   4488 
   4489     /* Range without end */
   4490     "&a<*b-",
   4491 
   4492     /* More than one hyphen */
   4493     "&a<*b-g-l",
   4494 
   4495     /* Range in the wrong order */
   4496     "&a<*k-b",
   4497 
   4498   };
   4499 
   4500   UChar rule[500];
   4501   UErrorCode status = U_ZERO_ERROR;
   4502   UParseError parse_error;
   4503   int n_rules = LEN(invalidRules);
   4504   int rule_no;
   4505   int length;
   4506   UCollator  *myCollation;
   4507 
   4508   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
   4509 
   4510     length = u_unescape(invalidRules[rule_no], rule, 500);
   4511     if (length == 0) {
   4512         log_err("ERROR: The rule cannot be unescaped: %s\n");
   4513         return;
   4514     }
   4515     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
   4516     (void)myCollation;      /* Suppress set but not used warning. */
   4517     if(!U_FAILURE(status)){
   4518       log_err("ERROR: Could not cause a failure as expected: \n");
   4519     }
   4520     status = U_ZERO_ERROR;
   4521   }
   4522 }
   4523 
   4524 /*
   4525  * This test ensures that characters placed before a character in a different script have the same lead byte
   4526  * in their collation key before and after script reordering.
   4527  */
   4528 static void TestBeforeRuleWithScriptReordering(void)
   4529 {
   4530     UParseError error;
   4531     UErrorCode status = U_ZERO_ERROR;
   4532     UCollator  *myCollation;
   4533     char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
   4534     UChar rules[500];
   4535     uint32_t rulesLength = 0;
   4536     int32_t reorderCodes[1] = {USCRIPT_GREEK};
   4537     UCollationResult collResult;
   4538 
   4539     uint8_t baseKey[256];
   4540     uint32_t baseKeyLength;
   4541     uint8_t beforeKey[256];
   4542     uint32_t beforeKeyLength;
   4543 
   4544     UChar base[] = { 0x03b1 }; /* base */
   4545     int32_t baseLen = sizeof(base)/sizeof(*base);
   4546 
   4547     UChar before[] = { 0x0e01 }; /* ko kai */
   4548     int32_t beforeLen = sizeof(before)/sizeof(*before);
   4549 
   4550     /*UChar *data[] = { before, base };
   4551     genericRulesStarter(srules, data, 2);*/
   4552 
   4553     log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
   4554 
   4555     (void)beforeKeyLength;   /* Suppress set but not used warnings. */
   4556     (void)baseKeyLength;
   4557 
   4558     /* build collator */
   4559     log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
   4560 
   4561     rulesLength = u_unescape(srules, rules, LEN(rules));
   4562     myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   4563     if(U_FAILURE(status)) {
   4564         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   4565         return;
   4566     }
   4567 
   4568     /* check collation results - before rule applied but not script reordering */
   4569     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
   4570     if (collResult != UCOL_GREATER) {
   4571         log_err("Collation result not correct before script reordering = %d\n", collResult);
   4572     }
   4573 
   4574     /* check the lead byte of the collation keys before script reordering */
   4575     baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
   4576     beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
   4577     if (baseKey[0] != beforeKey[0]) {
   4578       log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
   4579    }
   4580 
   4581     /* reorder the scripts */
   4582     ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
   4583     if(U_FAILURE(status)) {
   4584         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
   4585         return;
   4586     }
   4587 
   4588     /* check collation results - before rule applied and after script reordering */
   4589     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
   4590     if (collResult != UCOL_GREATER) {
   4591         log_err("Collation result not correct after script reordering = %d\n", collResult);
   4592     }
   4593 
   4594     /* check the lead byte of the collation keys after script reordering */
   4595     ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
   4596     ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
   4597     if (baseKey[0] != beforeKey[0]) {
   4598         log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
   4599     }
   4600 
   4601     ucol_close(myCollation);
   4602 }
   4603 
   4604 /*
   4605  * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
   4606  */
   4607 static void TestNonLeadBytesDuringCollationReordering(void)
   4608 {
   4609     UErrorCode status = U_ZERO_ERROR;
   4610     UCollator  *myCollation;
   4611     int32_t reorderCodes[1] = {USCRIPT_GREEK};
   4612 
   4613     uint8_t baseKey[256];
   4614     uint32_t baseKeyLength;
   4615     uint8_t reorderKey[256];
   4616     uint32_t reorderKeyLength;
   4617 
   4618     UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
   4619 
   4620     uint32_t i;
   4621 
   4622 
   4623     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   4624 
   4625     /* build collator tertiary */
   4626     myCollation = ucol_open("", &status);
   4627     ucol_setStrength(myCollation, UCOL_TERTIARY);
   4628     if(U_FAILURE(status)) {
   4629         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   4630         return;
   4631     }
   4632     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
   4633 
   4634     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   4635     if(U_FAILURE(status)) {
   4636         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   4637         return;
   4638     }
   4639     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
   4640 
   4641     if (baseKeyLength != reorderKeyLength) {
   4642         log_err("Key lengths not the same during reordering.\n");
   4643         return;
   4644     }
   4645 
   4646     for (i = 1; i < baseKeyLength; i++) {
   4647         if (baseKey[i] != reorderKey[i]) {
   4648             log_err("Collation key bytes not the same at position %d.\n", i);
   4649             return;
   4650         }
   4651     }
   4652     ucol_close(myCollation);
   4653 
   4654     /* build collator quaternary */
   4655     myCollation = ucol_open("", &status);
   4656     ucol_setStrength(myCollation, UCOL_QUATERNARY);
   4657     if(U_FAILURE(status)) {
   4658         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   4659         return;
   4660     }
   4661     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
   4662 
   4663     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   4664     if(U_FAILURE(status)) {
   4665         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   4666         return;
   4667     }
   4668     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
   4669 
   4670     if (baseKeyLength != reorderKeyLength) {
   4671         log_err("Key lengths not the same during reordering.\n");
   4672         return;
   4673     }
   4674 
   4675     for (i = 1; i < baseKeyLength; i++) {
   4676         if (baseKey[i] != reorderKey[i]) {
   4677             log_err("Collation key bytes not the same at position %d.\n", i);
   4678             return;
   4679         }
   4680     }
   4681     ucol_close(myCollation);
   4682 }
   4683 
   4684 /*
   4685  * Test reordering API.
   4686  */
   4687 static void TestReorderingAPI(void)
   4688 {
   4689     UErrorCode status = U_ZERO_ERROR;
   4690     UCollator  *myCollation;
   4691     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   4692     int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS};
   4693     int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   4694     UCollationResult collResult;
   4695     int32_t retrievedReorderCodesLength;
   4696     int32_t retrievedReorderCodes[10];
   4697     UChar greekString[] = { 0x03b1 };
   4698     UChar punctuationString[] = { 0x203e };
   4699     int loopIndex;
   4700 
   4701     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   4702 
   4703     /* build collator tertiary */
   4704     myCollation = ucol_open("", &status);
   4705     ucol_setStrength(myCollation, UCOL_TERTIARY);
   4706     if(U_FAILURE(status)) {
   4707         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   4708         return;
   4709     }
   4710 
   4711     /* set the reorderding */
   4712     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   4713     if (U_FAILURE(status)) {
   4714         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   4715         return;
   4716     }
   4717 
   4718     /* get the reordering */
   4719     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   4720     if (status != U_BUFFER_OVERFLOW_ERROR) {
   4721         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
   4722         return;
   4723     }
   4724     status = U_ZERO_ERROR;
   4725     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   4726         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   4727         return;
   4728     }
   4729     /* now let's really get it */
   4730     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   4731     if (U_FAILURE(status)) {
   4732         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   4733         return;
   4734     }
   4735     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   4736         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   4737         return;
   4738     }
   4739     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   4740         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   4741             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   4742             return;
   4743         }
   4744     }
   4745     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   4746     if (collResult != UCOL_LESS) {
   4747         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
   4748         return;
   4749     }
   4750 
   4751     /* clear the reordering */
   4752     ucol_setReorderCodes(myCollation, NULL, 0, &status);
   4753     if (U_FAILURE(status)) {
   4754         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
   4755         return;
   4756     }
   4757 
   4758     /* get the reordering again */
   4759     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   4760     if (retrievedReorderCodesLength != 0) {
   4761         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
   4762         return;
   4763     }
   4764 
   4765     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   4766     if (collResult != UCOL_GREATER) {
   4767         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   4768         return;
   4769     }
   4770 
   4771     /* test for error condition on duplicate reorder codes */
   4772     ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
   4773     if (!U_FAILURE(status)) {
   4774         log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
   4775         return;
   4776     }
   4777 
   4778     status = U_ZERO_ERROR;
   4779     /* test for reorder codes after a reset code */
   4780     ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status);
   4781     if (!U_FAILURE(status)) {
   4782         log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
   4783         return;
   4784     }
   4785 
   4786     ucol_close(myCollation);
   4787 }
   4788 
   4789 /*
   4790  * Test reordering API.
   4791  */
   4792 static void TestReorderingAPIWithRuleCreatedCollator(void)
   4793 {
   4794     UErrorCode status = U_ZERO_ERROR;
   4795     UCollator  *myCollation;
   4796     UChar rules[90];
   4797     static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
   4798     static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   4799     static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
   4800     UCollationResult collResult;
   4801     int32_t retrievedReorderCodesLength;
   4802     int32_t retrievedReorderCodes[10];
   4803     static const UChar greekString[] = { 0x03b1 };
   4804     static const UChar punctuationString[] = { 0x203e };
   4805     static const UChar hanString[] = { 0x65E5, 0x672C };
   4806     int loopIndex;
   4807 
   4808     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   4809 
   4810     /* build collator from rules */
   4811     u_uastrcpy(rules, "[reorder Hani Grek]");
   4812     myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
   4813     if(U_FAILURE(status)) {
   4814         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   4815         return;
   4816     }
   4817 
   4818     /* get the reordering */
   4819     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   4820     if (U_FAILURE(status)) {
   4821         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   4822         return;
   4823     }
   4824     if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
   4825         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
   4826         return;
   4827     }
   4828     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   4829         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
   4830             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   4831             return;
   4832         }
   4833     }
   4834     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString));
   4835     if (collResult != UCOL_GREATER) {
   4836         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   4837         return;
   4838     }
   4839 
   4840     /* set the reordering */
   4841     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   4842     if (U_FAILURE(status)) {
   4843         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   4844         return;
   4845     }
   4846 
   4847     /* get the reordering */
   4848     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   4849     if (status != U_BUFFER_OVERFLOW_ERROR) {
   4850         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
   4851         return;
   4852     }
   4853     status = U_ZERO_ERROR;
   4854     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   4855         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   4856         return;
   4857     }
   4858     /* now let's really get it */
   4859     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   4860     if (U_FAILURE(status)) {
   4861         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   4862         return;
   4863     }
   4864     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   4865         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   4866         return;
   4867     }
   4868     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   4869         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   4870             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   4871             return;
   4872         }
   4873     }
   4874     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   4875     if (collResult != UCOL_LESS) {
   4876         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
   4877         return;
   4878     }
   4879 
   4880     /* clear the reordering */
   4881     ucol_setReorderCodes(myCollation, NULL, 0, &status);
   4882     if (U_FAILURE(status)) {
   4883         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
   4884         return;
   4885     }
   4886 
   4887     /* get the reordering again */
   4888     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   4889     if (retrievedReorderCodesLength != 0) {
   4890         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
   4891         return;
   4892     }
   4893 
   4894     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   4895     if (collResult != UCOL_GREATER) {
   4896         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   4897         return;
   4898     }
   4899 
   4900     /* reset the reordering */
   4901     ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
   4902     if (U_FAILURE(status)) {
   4903         log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
   4904         return;
   4905     }
   4906     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   4907     if (U_FAILURE(status)) {
   4908         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   4909         return;
   4910     }
   4911     if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
   4912         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
   4913         return;
   4914     }
   4915     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   4916         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
   4917             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   4918             return;
   4919         }
   4920     }
   4921 
   4922     ucol_close(myCollation);
   4923 }
   4924 
   4925 static int compareUScriptCodes(const void * a, const void * b)
   4926 {
   4927   return ( *(int32_t*)a - *(int32_t*)b );
   4928 }
   4929 
   4930 static void TestEquivalentReorderingScripts(void) {
   4931     UErrorCode status = U_ZERO_ERROR;
   4932     int32_t equivalentScripts[50];
   4933     int32_t equivalentScriptsLength;
   4934     int loopIndex;
   4935     int32_t equivalentScriptsResult[] = {
   4936         USCRIPT_BOPOMOFO,
   4937         USCRIPT_LISU,
   4938         USCRIPT_LYCIAN,
   4939         USCRIPT_CARIAN,
   4940         USCRIPT_LYDIAN,
   4941         USCRIPT_YI,
   4942         USCRIPT_OLD_ITALIC,
   4943         USCRIPT_GOTHIC,
   4944         USCRIPT_DESERET,
   4945         USCRIPT_SHAVIAN,
   4946         USCRIPT_OSMANYA,
   4947         USCRIPT_LINEAR_B,
   4948         USCRIPT_CYPRIOT,
   4949         USCRIPT_OLD_SOUTH_ARABIAN,
   4950         USCRIPT_AVESTAN,
   4951         USCRIPT_IMPERIAL_ARAMAIC,
   4952         USCRIPT_INSCRIPTIONAL_PARTHIAN,
   4953         USCRIPT_INSCRIPTIONAL_PAHLAVI,
   4954         USCRIPT_UGARITIC,
   4955         USCRIPT_OLD_PERSIAN,
   4956         USCRIPT_CUNEIFORM,
   4957         USCRIPT_EGYPTIAN_HIEROGLYPHS,
   4958         USCRIPT_PHONETIC_POLLARD,
   4959         USCRIPT_SORA_SOMPENG,
   4960         USCRIPT_MEROITIC_CURSIVE,
   4961         USCRIPT_MEROITIC_HIEROGLYPHS
   4962     };
   4963 
   4964     qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes);
   4965 
   4966     /* UScript.GOTHIC */
   4967     equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
   4968     if (U_FAILURE(status)) {
   4969         log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
   4970         return;
   4971     }
   4972     /*
   4973     fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
   4974     fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
   4975     for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
   4976         fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
   4977     }
   4978     */
   4979     if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
   4980         log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
   4981         return;
   4982     }
   4983     for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
   4984         if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
   4985             log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
   4986             return;
   4987         }
   4988     }
   4989 
   4990     /* UScript.SHAVIAN */
   4991     equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
   4992     if (U_FAILURE(status)) {
   4993         log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
   4994         return;
   4995     }
   4996     if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
   4997         log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
   4998         return;
   4999     }
   5000     for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
   5001         if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
   5002             log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
   5003             return;
   5004         }
   5005     }
   5006 }
   5007 
   5008 static void TestReorderingAcrossCloning(void)
   5009 {
   5010     UErrorCode status = U_ZERO_ERROR;
   5011     UCollator  *myCollation;
   5012     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   5013     UCollator *clonedCollation;
   5014     int32_t retrievedReorderCodesLength;
   5015     int32_t retrievedReorderCodes[10];
   5016     int loopIndex;
   5017 
   5018     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   5019 
   5020     /* build collator tertiary */
   5021     myCollation = ucol_open("", &status);
   5022     ucol_setStrength(myCollation, UCOL_TERTIARY);
   5023     if(U_FAILURE(status)) {
   5024         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5025         return;
   5026     }
   5027 
   5028     /* set the reorderding */
   5029     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   5030     if (U_FAILURE(status)) {
   5031         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   5032         return;
   5033     }
   5034 
   5035     /* clone the collator */
   5036     clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
   5037     if (U_FAILURE(status)) {
   5038         log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
   5039         return;
   5040     }
   5041 
   5042     /* get the reordering */
   5043     retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   5044     if (U_FAILURE(status)) {
   5045         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   5046         return;
   5047     }
   5048     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   5049         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   5050         return;
   5051     }
   5052     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   5053         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   5054             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   5055             return;
   5056         }
   5057     }
   5058 
   5059     /*uprv_free(buffer);*/
   5060     ucol_close(myCollation);
   5061     ucol_close(clonedCollation);
   5062 }
   5063 
   5064 /*
   5065  * Utility function to test one collation reordering test case set.
   5066  * @param testcases Array of test cases.
   5067  * @param n_testcases Size of the array testcases.
   5068  * @param reorderTokens Array of reordering codes.
   5069  * @param reorderTokensLen Size of the array reorderTokens.
   5070  */
   5071 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
   5072 {
   5073     uint32_t testCaseNum;
   5074     UErrorCode status = U_ZERO_ERROR;
   5075     UCollator  *myCollation;
   5076 
   5077     myCollation = ucol_open("", &status);
   5078     if (U_FAILURE(status)) {
   5079         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5080         return;
   5081     }
   5082     ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
   5083     if(U_FAILURE(status)) {
   5084         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
   5085         return;
   5086     }
   5087 
   5088     for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
   5089         doTest(myCollation,
   5090             testCases[testCaseNum].source,
   5091             testCases[testCaseNum].target,
   5092             testCases[testCaseNum].result
   5093         );
   5094     }
   5095     ucol_close(myCollation);
   5096 }
   5097 
   5098 static void TestGreekFirstReorder(void)
   5099 {
   5100     const char* strRules[] = {
   5101         "[reorder Grek]"
   5102     };
   5103 
   5104     const int32_t apiRules[] = {
   5105         USCRIPT_GREEK
   5106     };
   5107 
   5108     const static OneTestCase privateUseCharacterStrings[] = {
   5109         { {0x0391}, {0x0391}, UCOL_EQUAL },
   5110         { {0x0041}, {0x0391}, UCOL_GREATER },
   5111         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
   5112         { {0x0060}, {0x0391}, UCOL_LESS },
   5113         { {0x0391}, {0xe2dc}, UCOL_LESS },
   5114         { {0x0391}, {0x0060}, UCOL_GREATER },
   5115     };
   5116 
   5117     /* Test rules creation */
   5118     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5119 
   5120     /* Test collation reordering API */
   5121     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   5122 }
   5123 
   5124 static void TestGreekLastReorder(void)
   5125 {
   5126     const char* strRules[] = {
   5127         "[reorder Zzzz Grek]"
   5128     };
   5129 
   5130     const int32_t apiRules[] = {
   5131         USCRIPT_UNKNOWN, USCRIPT_GREEK
   5132     };
   5133 
   5134     const static OneTestCase privateUseCharacterStrings[] = {
   5135         { {0x0391}, {0x0391}, UCOL_EQUAL },
   5136         { {0x0041}, {0x0391}, UCOL_LESS },
   5137         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
   5138         { {0x0060}, {0x0391}, UCOL_LESS },
   5139         { {0x0391}, {0xe2dc}, UCOL_GREATER },
   5140     };
   5141 
   5142     /* Test rules creation */
   5143     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5144 
   5145     /* Test collation reordering API */
   5146     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   5147 }
   5148 
   5149 static void TestNonScriptReorder(void)
   5150 {
   5151     const char* strRules[] = {
   5152         "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
   5153     };
   5154 
   5155     const int32_t apiRules[] = {
   5156         USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
   5157         UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
   5158         UCOL_REORDER_CODE_CURRENCY
   5159     };
   5160 
   5161     const static OneTestCase privateUseCharacterStrings[] = {
   5162         { {0x0391}, {0x0041}, UCOL_LESS },
   5163         { {0x0041}, {0x0391}, UCOL_GREATER },
   5164         { {0x0060}, {0x0041}, UCOL_LESS },
   5165         { {0x0060}, {0x0391}, UCOL_GREATER },
   5166         { {0x0024}, {0x0041}, UCOL_GREATER },
   5167     };
   5168 
   5169     /* Test rules creation */
   5170     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5171 
   5172     /* Test collation reordering API */
   5173     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   5174 }
   5175 
   5176 static void TestHaniReorder(void)
   5177 {
   5178     const char* strRules[] = {
   5179         "[reorder Hani]"
   5180     };
   5181     const int32_t apiRules[] = {
   5182         USCRIPT_HAN
   5183     };
   5184 
   5185     const static OneTestCase privateUseCharacterStrings[] = {
   5186         { {0x4e00}, {0x0041}, UCOL_LESS },
   5187         { {0x4e00}, {0x0060}, UCOL_GREATER },
   5188         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
   5189         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
   5190         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
   5191         { {0xfa27}, {0x0041}, UCOL_LESS },
   5192         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
   5193     };
   5194 
   5195     /* Test rules creation */
   5196     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5197 
   5198     /* Test collation reordering API */
   5199     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   5200 }
   5201 
   5202 static void TestHaniReorderWithOtherRules(void)
   5203 {
   5204     const char* strRules[] = {
   5205         "[reorder Hani] &b<a"
   5206     };
   5207     /*const int32_t apiRules[] = {
   5208         USCRIPT_HAN
   5209     };*/
   5210 
   5211     const static OneTestCase privateUseCharacterStrings[] = {
   5212         { {0x4e00}, {0x0041}, UCOL_LESS },
   5213         { {0x4e00}, {0x0060}, UCOL_GREATER },
   5214         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
   5215         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
   5216         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
   5217         { {0xfa27}, {0x0041}, UCOL_LESS },
   5218         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
   5219         { {0x0062}, {0x0061}, UCOL_LESS },
   5220     };
   5221 
   5222     /* Test rules creation */
   5223     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5224 }
   5225 
   5226 static void TestMultipleReorder(void)
   5227 {
   5228     const char* strRules[] = {
   5229         "[reorder Grek Zzzz DIGIT Latn Hani]"
   5230     };
   5231 
   5232     const int32_t apiRules[] = {
   5233         USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
   5234     };
   5235 
   5236     const static OneTestCase collationTestCases[] = {
   5237         { {0x0391}, {0x0041}, UCOL_LESS},
   5238         { {0x0031}, {0x0041}, UCOL_LESS},
   5239         { {0x0041}, {0x4e00}, UCOL_LESS},
   5240     };
   5241 
   5242     /* Test rules creation */
   5243     doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
   5244 
   5245     /* Test collation reordering API */
   5246     doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
   5247 }
   5248 
   5249 /*
   5250  * Test that covers issue reported in ticket 8814
   5251  */
   5252 static void TestReorderWithNumericCollation(void)
   5253 {
   5254     UErrorCode status = U_ZERO_ERROR;
   5255     UCollator  *myCollation;
   5256     UCollator  *myReorderCollation;
   5257     int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
   5258     /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
   5259     UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
   5260     UChar fortyS[] = { 0x0053 };
   5261     UChar fortyThreeP[] = { 0x0050 };
   5262     uint8_t fortyS_sortKey[128];
   5263     int32_t fortyS_sortKey_Length;
   5264     uint8_t fortyThreeP_sortKey[128];
   5265     int32_t fortyThreeP_sortKey_Length;
   5266     uint8_t fortyS_sortKey_reorder[128];
   5267     int32_t fortyS_sortKey_reorder_Length;
   5268     uint8_t fortyThreeP_sortKey_reorder[128];
   5269     int32_t fortyThreeP_sortKey_reorder_Length;
   5270     UCollationResult collResult;
   5271     UCollationResult collResultReorder;
   5272 
   5273     log_verbose("Testing reordering with and without numeric collation\n");
   5274 
   5275     /* build collator tertiary with numeric */
   5276     myCollation = ucol_open("", &status);
   5277     /*
   5278     ucol_setStrength(myCollation, UCOL_TERTIARY);
   5279     */
   5280     ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   5281     if(U_FAILURE(status)) {
   5282         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5283         return;
   5284     }
   5285 
   5286     /* build collator tertiary with numeric and reordering */
   5287     myReorderCollation = ucol_open("", &status);
   5288     /*
   5289     ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
   5290     */
   5291     ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   5292     ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status);
   5293     if(U_FAILURE(status)) {
   5294         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5295         return;
   5296     }
   5297 
   5298     fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128);
   5299     fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128);
   5300     fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128);
   5301     fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
   5302 
   5303     if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
   5304         log_err_status(status, "ERROR: couldn't generate sort keys\n");
   5305         return;
   5306     }
   5307     collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
   5308     collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
   5309     /*
   5310     fprintf(stderr, "\tcollResult = %x\n", collResult);
   5311     fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
   5312     fprintf(stderr, "\nfortyS\n");
   5313     for (i = 0; i < fortyS_sortKey_Length; i++) {
   5314         fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
   5315     }
   5316     fprintf(stderr, "\nfortyThreeP\n");
   5317     for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
   5318         fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
   5319     }
   5320     */
   5321     if (collResult != collResultReorder) {
   5322         log_err_status(status, "ERROR: collation results should have been the same.\n");
   5323         return;
   5324     }
   5325 
   5326     ucol_close(myCollation);
   5327     ucol_close(myReorderCollation);
   5328 }
   5329 
   5330 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
   5331 {
   5332   for (; *a == *b; ++a, ++b) {
   5333     if (*a == 0) {
   5334       return 0;
   5335     }
   5336   }
   5337   return (*a < *b ? -1 : 1);
   5338 }
   5339 
   5340 static void TestImportRulesDeWithPhonebook(void)
   5341 {
   5342   const char* normalRules[] = {
   5343     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
   5344     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
   5345     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
   5346   };
   5347   const OneTestCase normalTests[] = {
   5348     { {0x00e6}, {0x00c6}, UCOL_LESS},
   5349     { {0x00fc}, {0x00dc}, UCOL_GREATER},
   5350   };
   5351 
   5352   const char* importRules[] = {
   5353     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
   5354     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
   5355     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
   5356   };
   5357   const OneTestCase importTests[] = {
   5358     { {0x00e6}, {0x00c6}, UCOL_LESS},
   5359     { {0x00fc}, {0x00dc}, UCOL_LESS},
   5360   };
   5361 
   5362   doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules));
   5363   doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules));
   5364 }
   5365 
   5366 #if 0
   5367 static void TestImportRulesFiWithEor(void)
   5368 {
   5369   /* DUCET. */
   5370   const char* defaultRules[] = {
   5371     "&a<b",                                    /* Dummy rule. */
   5372   };
   5373 
   5374   const OneTestCase defaultTests[] = {
   5375     { {0x0110}, {0x00F0}, UCOL_LESS},
   5376     { {0x00a3}, {0x00a5}, UCOL_LESS},
   5377     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
   5378   };
   5379 
   5380   /* European Ordering rules: ignore currency characters. */
   5381   const char* eorRules[] = {
   5382     "[import root-u-co-eor]",
   5383   };
   5384 
   5385   const OneTestCase eorTests[] = {
   5386     { {0x0110}, {0x00F0}, UCOL_LESS},
   5387     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
   5388     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
   5389   };
   5390 
   5391   const char* fiStdRules[] = {
   5392     "[import fi-u-co-standard]",
   5393   };
   5394 
   5395   const OneTestCase fiStdTests[] = {
   5396     { {0x0110}, {0x00F0}, UCOL_GREATER},
   5397     { {0x00a3}, {0x00a5}, UCOL_LESS},
   5398     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
   5399   };
   5400 
   5401   /* Both European Ordering Rules and Fi Standard Rules. */
   5402   const char* eorFiStdRules[] = {
   5403     "[import root-u-co-eor][import fi-u-co-standard]",
   5404   };
   5405 
   5406   /* This is essentially same as the one before once fi.txt is updated with import. */
   5407   const char* fiEorRules[] = {
   5408     "[import fi-u-co-eor]",
   5409   };
   5410 
   5411   const OneTestCase fiEorTests[] = {
   5412     { {0x0110}, {0x00F0}, UCOL_GREATER},
   5413     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
   5414     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
   5415   };
   5416 
   5417   doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
   5418   doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));
   5419   doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));
   5420   doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules));
   5421 
   5422   log_knownIssue("8962", NULL);
   5423   /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
   5424         eor{
   5425             Sequence{
   5426                 "[import root-u-co-eor][import fi-u-co-standard]"
   5427             }
   5428             Version{"21.0"}
   5429         }
   5430   */
   5431   /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
   5432 
   5433 }
   5434 #endif
   5435 
   5436 #if 0
   5437 /*
   5438  * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
   5439  * the resource files are built with -includeUnihanColl option.
   5440  * TODO: Uncomment this function and make it work when unihan rules are built by default.
   5441  */
   5442 static void TestImportRulesCJKWithUnihan(void)
   5443 {
   5444   /* DUCET. */
   5445   const char* defaultRules[] = {
   5446     "&a<b",                                    /* Dummy rule. */
   5447   };
   5448 
   5449   const OneTestCase defaultTests[] = {
   5450     { {0x3402}, {0x4e1e}, UCOL_GREATER},
   5451   };
   5452 
   5453   /* European Ordering rules: ignore currency characters. */
   5454   const char* unihanRules[] = {
   5455     "[import ko-u-co-unihan]",
   5456   };
   5457 
   5458   const OneTestCase unihanTests[] = {
   5459     { {0x3402}, {0x4e1e}, UCOL_LESS},
   5460   };
   5461 
   5462   doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
   5463   doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules));
   5464 
   5465 }
   5466 #endif
   5467 
   5468 static void TestImport(void)
   5469 {
   5470     UCollator* vicoll;
   5471     UCollator* escoll;
   5472     UCollator* viescoll;
   5473     UCollator* importviescoll;
   5474     UParseError error;
   5475     UErrorCode status = U_ZERO_ERROR;
   5476     UChar* virules;
   5477     int32_t viruleslength;
   5478     UChar* esrules;
   5479     int32_t esruleslength;
   5480     UChar* viesrules;
   5481     int32_t viesruleslength;
   5482     char srules[500] = "[import vi][import es]";
   5483     UChar rules[500];
   5484     uint32_t length = 0;
   5485     int32_t itemCount;
   5486     int32_t i, k;
   5487     UChar32 start;
   5488     UChar32 end;
   5489     UChar str[500];
   5490     int32_t strLength;
   5491 
   5492     uint8_t sk1[500];
   5493     uint8_t sk2[500];
   5494 
   5495     UBool b;
   5496     USet* tailoredSet;
   5497     USet* importTailoredSet;
   5498 
   5499 
   5500     vicoll = ucol_open("vi", &status);
   5501     if(U_FAILURE(status)){
   5502         log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
   5503         return;
   5504     }
   5505 
   5506     virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
   5507     if(viruleslength == 0) {
   5508         log_data_err("missing vi tailoring rule string\n");
   5509         ucol_close(vicoll);
   5510         return;
   5511     }
   5512     escoll = ucol_open("es", &status);
   5513     esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
   5514     viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
   5515     viesrules[0] = 0;
   5516     u_strcat(viesrules, virules);
   5517     u_strcat(viesrules, esrules);
   5518     viesruleslength = viruleslength + esruleslength;
   5519     viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   5520 
   5521     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
   5522     length = u_unescape(srules, rules, 500);
   5523     importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
   5524     if(U_FAILURE(status)){
   5525         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5526         return;
   5527     }
   5528 
   5529     tailoredSet = ucol_getTailoredSet(viescoll, &status);
   5530     importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
   5531 
   5532     if(!uset_equals(tailoredSet, importTailoredSet)){
   5533         log_err("Tailored sets not equal");
   5534     }
   5535 
   5536     uset_close(importTailoredSet);
   5537 
   5538     itemCount = uset_getItemCount(tailoredSet);
   5539 
   5540     for( i = 0; i < itemCount; i++){
   5541         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
   5542         if(strLength < 2){
   5543             for (; start <= end; start++){
   5544                 k = 0;
   5545                 U16_APPEND(str, k, 500, start, b);
   5546                 (void)b;    /* Suppress set but not used warning. */
   5547                 ucol_getSortKey(viescoll, str, 1, sk1, 500);
   5548                 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
   5549                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
   5550                     log_err("Sort key for %s not equal\n", str);
   5551                     break;
   5552                 }
   5553             }
   5554         }else{
   5555             ucol_getSortKey(viescoll, str, strLength, sk1, 500);
   5556             ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
   5557             if(compare_uint8_t_arrays(sk1, sk2) != 0){
   5558                 log_err("ZZSort key for %s not equal\n", str);
   5559                 break;
   5560             }
   5561 
   5562         }
   5563     }
   5564 
   5565     uset_close(tailoredSet);
   5566 
   5567     uprv_free(viesrules);
   5568 
   5569     ucol_close(vicoll);
   5570     ucol_close(escoll);
   5571     ucol_close(viescoll);
   5572     ucol_close(importviescoll);
   5573 }
   5574 
   5575 static void TestImportWithType(void)
   5576 {
   5577     UCollator* vicoll;
   5578     UCollator* decoll;
   5579     UCollator* videcoll;
   5580     UCollator* importvidecoll;
   5581     UParseError error;
   5582     UErrorCode status = U_ZERO_ERROR;
   5583     const UChar* virules;
   5584     int32_t viruleslength;
   5585     const UChar* derules;
   5586     int32_t deruleslength;
   5587     UChar* viderules;
   5588     int32_t videruleslength;
   5589     const char srules[500] = "[import vi][import de-u-co-phonebk]";
   5590     UChar rules[500];
   5591     uint32_t length = 0;
   5592     int32_t itemCount;
   5593     int32_t i, k;
   5594     UChar32 start;
   5595     UChar32 end;
   5596     UChar str[500];
   5597     int32_t strLength;
   5598 
   5599     uint8_t sk1[500];
   5600     uint8_t sk2[500];
   5601 
   5602     USet* tailoredSet;
   5603     USet* importTailoredSet;
   5604 
   5605     vicoll = ucol_open("vi", &status);
   5606     if(U_FAILURE(status)){
   5607         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5608         return;
   5609     }
   5610     virules = ucol_getRules(vicoll, &viruleslength);
   5611     if(viruleslength == 0) {
   5612         log_data_err("missing vi tailoring rule string\n");
   5613         ucol_close(vicoll);
   5614         return;
   5615     }
   5616     /* decoll = ucol_open("de@collation=phonebook", &status); */
   5617     decoll = ucol_open("de-u-co-phonebk", &status);
   5618     if(U_FAILURE(status)){
   5619         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5620         return;
   5621     }
   5622 
   5623 
   5624     derules = ucol_getRules(decoll, &deruleslength);
   5625     viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
   5626     viderules[0] = 0;
   5627     u_strcat(viderules, virules);
   5628     u_strcat(viderules, derules);
   5629     videruleslength = viruleslength + deruleslength;
   5630     videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   5631 
   5632     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
   5633     length = u_unescape(srules, rules, 500);
   5634     importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
   5635     if(U_FAILURE(status)){
   5636         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5637         return;
   5638     }
   5639 
   5640     tailoredSet = ucol_getTailoredSet(videcoll, &status);
   5641     importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
   5642 
   5643     if(!uset_equals(tailoredSet, importTailoredSet)){
   5644         log_err("Tailored sets not equal");
   5645     }
   5646 
   5647     uset_close(importTailoredSet);
   5648 
   5649     itemCount = uset_getItemCount(tailoredSet);
   5650 
   5651     for( i = 0; i < itemCount; i++){
   5652         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
   5653         if(strLength < 2){
   5654             for (; start <= end; start++){
   5655                 k = 0;
   5656                 U16_APPEND_UNSAFE(str, k, start);
   5657                 ucol_getSortKey(videcoll, str, 1, sk1, 500);
   5658                 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
   5659                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
   5660                     log_err("Sort key for %s not equal\n", str);
   5661                     break;
   5662                 }
   5663             }
   5664         }else{
   5665             ucol_getSortKey(videcoll, str, strLength, sk1, 500);
   5666             ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
   5667             if(compare_uint8_t_arrays(sk1, sk2) != 0){
   5668                 log_err("Sort key for %s not equal\n", str);
   5669                 break;
   5670             }
   5671 
   5672         }
   5673     }
   5674 
   5675     uset_close(tailoredSet);
   5676 
   5677     uprv_free(viderules);
   5678 
   5679     ucol_close(videcoll);
   5680     ucol_close(importvidecoll);
   5681     ucol_close(vicoll);
   5682     ucol_close(decoll);
   5683 }
   5684 
   5685 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
   5686 static const UChar longUpperStr1[]= { /* 155 chars */
   5687     0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
   5688     0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
   5689     0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
   5690     0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
   5691     0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
   5692     0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
   5693     0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
   5694     0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
   5695     0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
   5696     0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
   5697 };
   5698 
   5699 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
   5700 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
   5701     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   5702     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   5703     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   5704     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   5705     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
   5706 };
   5707 
   5708 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
   5709 static const UChar longUpperStr3[]= { /* 324 chars */
   5710     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5711     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5712     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5713     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5714     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5715     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5716     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5717     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5718     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5719     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5720     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5721     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
   5722 };
   5723 
   5724 #define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
   5725 
   5726 typedef struct {
   5727     const UChar * longUpperStrPtr;
   5728     int32_t       longUpperStrLen;
   5729 } LongUpperStrItem;
   5730 
   5731 /* String pointers must be in reverse collation order of the corresponding strings */
   5732 static const LongUpperStrItem longUpperStrItems[] = {
   5733     { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
   5734     { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
   5735     { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
   5736     { NULL,          0                           }
   5737 };
   5738 
   5739 enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
   5740 
   5741 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
   5742 static void TestCaseLevelBufferOverflow(void)
   5743 {
   5744     UErrorCode status = U_ZERO_ERROR;
   5745     UCollator * ucol = ucol_open("root", &status);
   5746     if ( U_SUCCESS(status) ) {
   5747         ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
   5748         if ( U_SUCCESS(status) ) {
   5749             const LongUpperStrItem * itemPtr;
   5750             uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
   5751             for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
   5752                 int32_t sortKeyLen;
   5753                 if (itemPtr > longUpperStrItems) {
   5754                     uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
   5755                 }
   5756                 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
   5757                 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
   5758                     log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
   5759                     break;
   5760                 }
   5761                 if ( itemPtr > longUpperStrItems ) {
   5762                     int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
   5763                     if (compareResult >= 0) {
   5764                         log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
   5765                     }
   5766                 }
   5767             }
   5768         } else {
   5769             log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
   5770         }
   5771         ucol_close(ucol);
   5772     } else {
   5773         log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
   5774     }
   5775 }
   5776 
   5777 /* Test for #10595 */
   5778 static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
   5779 #define KEY_PART_SIZE 16
   5780 
   5781 static void TestNextSortKeyPartJaIdentical(void)
   5782 {
   5783     UErrorCode status = U_ZERO_ERROR;
   5784     UCollator *coll;
   5785     uint8_t keyPart[KEY_PART_SIZE];
   5786     UCharIterator iter;
   5787     uint32_t state[2] = {0, 0};
   5788     int32_t keyPartLen;
   5789 
   5790     coll = ucol_open("ja", &status);
   5791     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
   5792     if (U_FAILURE(status)) {
   5793         log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
   5794         return;
   5795     }
   5796 
   5797     uiter_setString(&iter, testJapaneseName, 5);
   5798     keyPartLen = KEY_PART_SIZE;
   5799     while (keyPartLen == KEY_PART_SIZE) {
   5800         keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
   5801         if (U_FAILURE(status)) {
   5802             log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
   5803             break;
   5804         }
   5805     }
   5806 
   5807     ucol_close(coll);
   5808 }
   5809 
   5810 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
   5811 
   5812 void addMiscCollTest(TestNode** root)
   5813 {
   5814     TEST(TestRuleOptions);
   5815     TEST(TestBeforePrefixFailure);
   5816     TEST(TestContractionClosure);
   5817     TEST(TestPrefixCompose);
   5818     TEST(TestStrCollIdenticalPrefix);
   5819     TEST(TestPrefix);
   5820     TEST(TestNewJapanese);
   5821     /*TEST(TestLimitations);*/
   5822     TEST(TestNonChars);
   5823     TEST(TestExtremeCompression);
   5824     TEST(TestSurrogates);
   5825     TEST(TestVariableTopSetting);
   5826     TEST(TestMaxVariable);
   5827     TEST(TestBocsuCoverage);
   5828     TEST(TestCyrillicTailoring);
   5829     TEST(TestCase);
   5830     TEST(IncompleteCntTest);
   5831     TEST(BlackBirdTest);
   5832     TEST(FunkyATest);
   5833     TEST(BillFairmanTest);
   5834     TEST(TestChMove);
   5835     TEST(TestImplicitTailoring);
   5836     TEST(TestFCDProblem);
   5837     TEST(TestEmptyRule);
   5838     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
   5839     TEST(TestJ815);
   5840     /*TEST(TestJ831);*/ /* we changed lv locale */
   5841     TEST(TestBefore);
   5842     TEST(TestHangulTailoring);
   5843     TEST(TestUCARules);
   5844     TEST(TestIncrementalNormalize);
   5845     TEST(TestComposeDecompose);
   5846     TEST(TestCompressOverlap);
   5847     TEST(TestContraction);
   5848     TEST(TestExpansion);
   5849     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
   5850     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
   5851     TEST(TestOptimize);
   5852     TEST(TestSuppressContractions);
   5853     TEST(Alexis2);
   5854     TEST(TestHebrewUCA);
   5855     TEST(TestPartialSortKeyTermination);
   5856     TEST(TestSettings);
   5857     TEST(TestEquals);
   5858     TEST(TestJ2726);
   5859     TEST(NullRule);
   5860     TEST(TestNumericCollation);
   5861     TEST(TestTibetanConformance);
   5862     TEST(TestPinyinProblem);
   5863     TEST(TestSeparateTrees);
   5864     TEST(TestBeforePinyin);
   5865     TEST(TestBeforeTightening);
   5866     /*TEST(TestMoreBefore);*/
   5867     TEST(TestTailorNULL);
   5868     TEST(TestUpperFirstQuaternary);
   5869     TEST(TestJ4960);
   5870     TEST(TestJ5223);
   5871     TEST(TestJ5232);
   5872     TEST(TestJ5367);
   5873     TEST(TestHiragana);
   5874     TEST(TestSortKeyConsistency);
   5875     TEST(TestVI5913);  /* VI, RO tailored rules */
   5876     TEST(TestCroatianSortKey);
   5877     TEST(TestTailor6179);
   5878     TEST(TestUCAPrecontext);
   5879     TEST(TestOutOfBuffer5468);
   5880     TEST(TestSameStrengthList);
   5881 
   5882     TEST(TestSameStrengthListQuoted);
   5883     TEST(TestSameStrengthListSupplemental);
   5884     TEST(TestSameStrengthListQwerty);
   5885     TEST(TestSameStrengthListQuotedQwerty);
   5886     TEST(TestSameStrengthListRanges);
   5887     TEST(TestSameStrengthListSupplementalRanges);
   5888     TEST(TestSpecialCharacters);
   5889     TEST(TestPrivateUseCharacters);
   5890     TEST(TestPrivateUseCharactersInList);
   5891     TEST(TestPrivateUseCharactersInRange);
   5892     TEST(TestInvalidListsAndRanges);
   5893     TEST(TestImportRulesDeWithPhonebook);
   5894     /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
   5895     /* TEST(TestImportRulesCJKWithUnihan); */
   5896     TEST(TestImport);
   5897     TEST(TestImportWithType);
   5898 
   5899     TEST(TestBeforeRuleWithScriptReordering);
   5900     TEST(TestNonLeadBytesDuringCollationReordering);
   5901     TEST(TestReorderingAPI);
   5902     TEST(TestReorderingAPIWithRuleCreatedCollator);
   5903     TEST(TestEquivalentReorderingScripts);
   5904     TEST(TestGreekFirstReorder);
   5905     TEST(TestGreekLastReorder);
   5906     TEST(TestNonScriptReorder);
   5907     TEST(TestHaniReorder);
   5908     TEST(TestHaniReorderWithOtherRules);
   5909     TEST(TestMultipleReorder);
   5910     TEST(TestReorderingAcrossCloning);
   5911     TEST(TestReorderWithNumericCollation);
   5912 
   5913     TEST(TestCaseLevelBufferOverflow);
   5914     TEST(TestNextSortKeyPartJaIdentical);
   5915 }
   5916 
   5917 #endif /* #if !UCONFIG_NO_COLLATION */
   5918