Home | History | Annotate | Download | only in cintltst
      1 
      2 /********************************************************************
      3  * COPYRIGHT:
      4  * Copyright (c) 2001-2015, International Business Machines Corporation and
      5  * others. All Rights Reserved.
      6  ********************************************************************/
      7 /*******************************************************************************
      8 *
      9 * File cmsccoll.C
     10 *
     11 *******************************************************************************/
     12 /**
     13  * These are the tests specific to ICU 1.8 and above, that I didn't know where
     14  * to fit.
     15  */
     16 
     17 #include <stdio.h>
     18 
     19 #include "unicode/utypes.h"
     20 
     21 #if !UCONFIG_NO_COLLATION
     22 
     23 #include "unicode/ucol.h"
     24 #include "unicode/ucoleitr.h"
     25 #include "unicode/uloc.h"
     26 #include "cintltst.h"
     27 #include "ccolltst.h"
     28 #include "callcoll.h"
     29 #include "unicode/ustring.h"
     30 #include "string.h"
     31 #include "ucol_imp.h"
     32 #include "cmemory.h"
     33 #include "cstring.h"
     34 #include "uassert.h"
     35 #include "unicode/parseerr.h"
     36 #include "unicode/ucnv.h"
     37 #include "unicode/ures.h"
     38 #include "unicode/uscript.h"
     39 #include "unicode/utf16.h"
     40 #include "uparse.h"
     41 #include "putilimp.h"
     42 
     43 
     44 #define LEN(a) (sizeof(a)/sizeof(a[0]))
     45 
     46 #define MAX_TOKEN_LEN 16
     47 
     48 typedef UCollationResult tst_strcoll(void *collator, const int object,
     49                         const UChar *source, const int sLen,
     50                         const UChar *target, const int tLen);
     51 
     52 
     53 
     54 const static char cnt1[][10] = {
     55 
     56   "AA",
     57   "AC",
     58   "AZ",
     59   "AQ",
     60   "AB",
     61   "ABZ",
     62   "ABQ",
     63   "Z",
     64   "ABC",
     65   "Q",
     66   "B"
     67 };
     68 
     69 const static char cnt2[][10] = {
     70   "DA",
     71   "DAD",
     72   "DAZ",
     73   "MAR",
     74   "Z",
     75   "DAVIS",
     76   "MARK",
     77   "DAV",
     78   "DAVI"
     79 };
     80 
     81 static void IncompleteCntTest(void)
     82 {
     83   UErrorCode status = U_ZERO_ERROR;
     84   UChar temp[90];
     85   UChar t1[90];
     86   UChar t2[90];
     87 
     88   UCollator *coll =  NULL;
     89   uint32_t i = 0, j = 0;
     90   uint32_t size = 0;
     91 
     92   u_uastrcpy(temp, " & Z < ABC < Q < B");
     93 
     94   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
     95 
     96   if(U_SUCCESS(status)) {
     97     size = sizeof(cnt1)/sizeof(cnt1[0]);
     98     for(i = 0; i < size-1; i++) {
     99       for(j = i+1; j < size; j++) {
    100         UCollationElements *iter;
    101         u_uastrcpy(t1, cnt1[i]);
    102         u_uastrcpy(t2, cnt1[j]);
    103         doTest(coll, t1, t2, UCOL_LESS);
    104         /* synwee : added collation element iterator test */
    105         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    106         if (U_FAILURE(status)) {
    107           log_err("Creation of iterator failed\n");
    108           break;
    109         }
    110         backAndForth(iter);
    111         ucol_closeElements(iter);
    112       }
    113     }
    114   }
    115 
    116   ucol_close(coll);
    117 
    118 
    119   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
    120   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
    121 
    122   if(U_SUCCESS(status)) {
    123     size = sizeof(cnt2)/sizeof(cnt2[0]);
    124     for(i = 0; i < size-1; i++) {
    125       for(j = i+1; j < size; j++) {
    126         UCollationElements *iter;
    127         u_uastrcpy(t1, cnt2[i]);
    128         u_uastrcpy(t2, cnt2[j]);
    129         doTest(coll, t1, t2, UCOL_LESS);
    130 
    131         /* synwee : added collation element iterator test */
    132         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    133         if (U_FAILURE(status)) {
    134           log_err("Creation of iterator failed\n");
    135           break;
    136         }
    137         backAndForth(iter);
    138         ucol_closeElements(iter);
    139       }
    140     }
    141   }
    142 
    143   ucol_close(coll);
    144 
    145 
    146 }
    147 
    148 const static char shifted[][20] = {
    149   "black bird",
    150   "black-bird",
    151   "blackbird",
    152   "black Bird",
    153   "black-Bird",
    154   "blackBird",
    155   "black birds",
    156   "black-birds",
    157   "blackbirds"
    158 };
    159 
    160 const static UCollationResult shiftedTert[] = {
    161   UCOL_EQUAL,
    162   UCOL_EQUAL,
    163   UCOL_EQUAL,
    164   UCOL_LESS,
    165   UCOL_EQUAL,
    166   UCOL_EQUAL,
    167   UCOL_LESS,
    168   UCOL_EQUAL,
    169   UCOL_EQUAL
    170 };
    171 
    172 const static char nonignorable[][20] = {
    173   "black bird",
    174   "black Bird",
    175   "black birds",
    176   "black-bird",
    177   "black-Bird",
    178   "black-birds",
    179   "blackbird",
    180   "blackBird",
    181   "blackbirds"
    182 };
    183 
    184 static void BlackBirdTest(void) {
    185   UErrorCode status = U_ZERO_ERROR;
    186   UChar t1[90];
    187   UChar t2[90];
    188 
    189   uint32_t i = 0, j = 0;
    190   uint32_t size = 0;
    191   UCollator *coll = ucol_open("en_US", &status);
    192 
    193   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
    194   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
    195 
    196   if(U_SUCCESS(status)) {
    197     size = sizeof(nonignorable)/sizeof(nonignorable[0]);
    198     for(i = 0; i < size-1; i++) {
    199       for(j = i+1; j < size; j++) {
    200         u_uastrcpy(t1, nonignorable[i]);
    201         u_uastrcpy(t2, nonignorable[j]);
    202         doTest(coll, t1, t2, UCOL_LESS);
    203       }
    204     }
    205   }
    206 
    207   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
    208   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
    209 
    210   if(U_SUCCESS(status)) {
    211     size = sizeof(shifted)/sizeof(shifted[0]);
    212     for(i = 0; i < size-1; i++) {
    213       for(j = i+1; j < size; j++) {
    214         u_uastrcpy(t1, shifted[i]);
    215         u_uastrcpy(t2, shifted[j]);
    216         doTest(coll, t1, t2, UCOL_LESS);
    217       }
    218     }
    219   }
    220 
    221   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
    222   if(U_SUCCESS(status)) {
    223     size = sizeof(shifted)/sizeof(shifted[0]);
    224     for(i = 1; i < size; i++) {
    225       u_uastrcpy(t1, shifted[i-1]);
    226       u_uastrcpy(t2, shifted[i]);
    227       doTest(coll, t1, t2, shiftedTert[i]);
    228     }
    229   }
    230 
    231   ucol_close(coll);
    232 }
    233 
    234 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
    235     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
    236     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
    237     {0x0041/*'A'*/, 0x0300, 0x0000},
    238     {0x00C0, 0x0301, 0x0000},
    239     /* this would work with forced normalization */
    240     {0x00C0, 0x0316, 0x0000}
    241 };
    242 
    243 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
    244     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    245     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
    246     {0x00C0, 0},
    247     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    248     /* this would work with forced normalization */
    249     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
    250 };
    251 
    252 const static UCollationResult results[] = {
    253     UCOL_GREATER,
    254     UCOL_EQUAL,
    255     UCOL_EQUAL,
    256     UCOL_GREATER,
    257     UCOL_EQUAL
    258 };
    259 
    260 static void FunkyATest(void)
    261 {
    262 
    263     int32_t i;
    264     UErrorCode status = U_ZERO_ERROR;
    265     UCollator  *myCollation;
    266     myCollation = ucol_open("en_US", &status);
    267     if(U_FAILURE(status)){
    268         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    269         return;
    270     }
    271     log_verbose("Testing some A letters, for some reason\n");
    272     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    273     ucol_setStrength(myCollation, UCOL_TERTIARY);
    274     for (i = 0; i < 4 ; i++)
    275     {
    276         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    277     }
    278     ucol_close(myCollation);
    279 }
    280 
    281 UColAttributeValue caseFirst[] = {
    282     UCOL_OFF,
    283     UCOL_LOWER_FIRST,
    284     UCOL_UPPER_FIRST
    285 };
    286 
    287 
    288 UColAttributeValue alternateHandling[] = {
    289     UCOL_NON_IGNORABLE,
    290     UCOL_SHIFTED
    291 };
    292 
    293 UColAttributeValue caseLevel[] = {
    294     UCOL_OFF,
    295     UCOL_ON
    296 };
    297 
    298 UColAttributeValue strengths[] = {
    299     UCOL_PRIMARY,
    300     UCOL_SECONDARY,
    301     UCOL_TERTIARY,
    302     UCOL_QUATERNARY,
    303     UCOL_IDENTICAL
    304 };
    305 
    306 #if 0
    307 static const char * strengthsC[] = {
    308     "UCOL_PRIMARY",
    309     "UCOL_SECONDARY",
    310     "UCOL_TERTIARY",
    311     "UCOL_QUATERNARY",
    312     "UCOL_IDENTICAL"
    313 };
    314 
    315 static const char * caseFirstC[] = {
    316     "UCOL_OFF",
    317     "UCOL_LOWER_FIRST",
    318     "UCOL_UPPER_FIRST"
    319 };
    320 
    321 
    322 static const char * alternateHandlingC[] = {
    323     "UCOL_NON_IGNORABLE",
    324     "UCOL_SHIFTED"
    325 };
    326 
    327 static const char * caseLevelC[] = {
    328     "UCOL_OFF",
    329     "UCOL_ON"
    330 };
    331 
    332 /* not used currently - does not test only prints */
    333 static void PrintMarkDavis(void)
    334 {
    335   UErrorCode status = U_ZERO_ERROR;
    336   UChar m[256];
    337   uint8_t sortkey[256];
    338   UCollator *coll = ucol_open("en_US", &status);
    339   uint32_t h,i,j,k, sortkeysize;
    340   uint32_t sizem = 0;
    341   char buffer[512];
    342   uint32_t len = 512;
    343 
    344   log_verbose("PrintMarkDavis");
    345 
    346   u_uastrcpy(m, "Mark Davis");
    347   sizem = u_strlen(m);
    348 
    349 
    350   m[1] = 0xe4;
    351 
    352   for(i = 0; i<sizem; i++) {
    353     fprintf(stderr, "\\u%04X ", m[i]);
    354   }
    355   fprintf(stderr, "\n");
    356 
    357   for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
    358     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
    359     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
    360 
    361     for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
    362       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
    363       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
    364 
    365       for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
    366         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
    367         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
    368 
    369         for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
    370           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
    371           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
    372           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
    373           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
    374         }
    375 
    376       }
    377 
    378     }
    379 
    380   }
    381 }
    382 #endif
    383 
    384 static void BillFairmanTest(void) {
    385 /*
    386 ** check for actual locale via ICU resource bundles
    387 **
    388 ** lp points to the original locale ("fr_FR_....")
    389 */
    390 
    391     UResourceBundle *lr,*cr;
    392     UErrorCode              lec = U_ZERO_ERROR;
    393     const char *lp = "fr_FR_you_ll_never_find_this_locale";
    394 
    395     log_verbose("BillFairmanTest\n");
    396 
    397     lr = ures_open(NULL,lp,&lec);
    398     if (lr) {
    399         cr = ures_getByKey(lr,"collations",0,&lec);
    400         if (cr) {
    401             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
    402             if (lp) {
    403                 if (U_SUCCESS(lec)) {
    404                     if(strcmp(lp, "fr") != 0) {
    405                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
    406                     }
    407                 }
    408             }
    409             ures_close(cr);
    410         }
    411         ures_close(lr);
    412     }
    413 }
    414 
    415 const static char chTest[][20] = {
    416   "c",
    417   "C",
    418   "ca", "cb", "cx", "cy", "CZ",
    419   "c\\u030C", "C\\u030C",
    420   "h",
    421   "H",
    422   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
    423   "ch", "cH", "Ch", "CH",
    424   "cha", "charly", "che", "chh", "chch", "chr",
    425   "i", "I", "iarly",
    426   "r", "R",
    427   "r\\u030C", "R\\u030C",
    428   "s",
    429   "S",
    430   "s\\u030C", "S\\u030C",
    431   "z", "Z",
    432   "z\\u030C", "Z\\u030C"
    433 };
    434 
    435 static void TestChMove(void) {
    436     UChar t1[256] = {0};
    437     UChar t2[256] = {0};
    438 
    439     uint32_t i = 0, j = 0;
    440     uint32_t size = 0;
    441     UErrorCode status = U_ZERO_ERROR;
    442 
    443     UCollator *coll = ucol_open("cs", &status);
    444 
    445     if(U_SUCCESS(status)) {
    446         size = sizeof(chTest)/sizeof(chTest[0]);
    447         for(i = 0; i < size-1; i++) {
    448             for(j = i+1; j < size; j++) {
    449                 u_unescape(chTest[i], t1, 256);
    450                 u_unescape(chTest[j], t2, 256);
    451                 doTest(coll, t1, t2, UCOL_LESS);
    452             }
    453         }
    454     }
    455     else {
    456         log_data_err("Can't open collator");
    457     }
    458     ucol_close(coll);
    459 }
    460 
    461 
    462 
    463 
    464 /*
    465 const static char impTest[][20] = {
    466   "\\u4e00",
    467     "a",
    468     "A",
    469     "b",
    470     "B",
    471     "\\u4e01"
    472 };
    473 */
    474 
    475 
    476 static void TestImplicitTailoring(void) {
    477   static const struct {
    478     const char *rules;
    479     const char *data[10];
    480     const uint32_t len;
    481   } tests[] = {
    482       {
    483         /* Tailor b and c before U+4E00. */
    484         "&[before 1]\\u4e00 < b < c "
    485         /* Now, before U+4E00 is c; put d and e after that. */
    486         "&[before 1]\\u4e00 < d < e",
    487         { "b", "c", "d", "e", "\\u4e00"}, 5 },
    488       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
    489       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
    490       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
    491   };
    492 
    493   int32_t i = 0;
    494 
    495   for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
    496       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
    497   }
    498 
    499 /*
    500   UChar t1[256] = {0};
    501   UChar t2[256] = {0};
    502 
    503   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
    504 
    505   uint32_t i = 0, j = 0;
    506   uint32_t size = 0;
    507   uint32_t ruleLen = 0;
    508   UErrorCode status = U_ZERO_ERROR;
    509   UCollator *coll = NULL;
    510   ruleLen = u_unescape(rule, t1, 256);
    511 
    512   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
    513 
    514   if(U_SUCCESS(status)) {
    515     size = sizeof(impTest)/sizeof(impTest[0]);
    516     for(i = 0; i < size-1; i++) {
    517       for(j = i+1; j < size; j++) {
    518         u_unescape(impTest[i], t1, 256);
    519         u_unescape(impTest[j], t2, 256);
    520         doTest(coll, t1, t2, UCOL_LESS);
    521       }
    522     }
    523   }
    524   else {
    525     log_err("Can't open collator");
    526   }
    527   ucol_close(coll);
    528   */
    529 }
    530 
    531 static void TestFCDProblem(void) {
    532   UChar t1[256] = {0};
    533   UChar t2[256] = {0};
    534 
    535   const char *s1 = "\\u0430\\u0306\\u0325";
    536   const char *s2 = "\\u04D1\\u0325";
    537 
    538   UErrorCode status = U_ZERO_ERROR;
    539   UCollator *coll = ucol_open("", &status);
    540   u_unescape(s1, t1, 256);
    541   u_unescape(s2, t2, 256);
    542 
    543   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
    544   doTest(coll, t1, t2, UCOL_EQUAL);
    545 
    546   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    547   doTest(coll, t1, t2, UCOL_EQUAL);
    548 
    549   ucol_close(coll);
    550 }
    551 
    552 /*
    553 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
    554 We're only using NFC/NFD in this test.
    555 */
    556 #define NORM_BUFFER_TEST_LEN 18
    557 typedef struct {
    558   UChar32 u;
    559   UChar NFC[NORM_BUFFER_TEST_LEN];
    560   UChar NFD[NORM_BUFFER_TEST_LEN];
    561 } tester;
    562 
    563 static void TestComposeDecompose(void) {
    564     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
    565     static const UChar UNICODESET_STR[] = {
    566         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
    567         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
    568         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
    569     };
    570     int32_t noOfLoc;
    571     int32_t i = 0, j = 0;
    572 
    573     UErrorCode status = U_ZERO_ERROR;
    574     const char *locName = NULL;
    575     uint32_t nfcSize;
    576     uint32_t nfdSize;
    577     tester **t;
    578     uint32_t noCases = 0;
    579     UCollator *coll = NULL;
    580     UChar32 u = 0;
    581     UChar comp[NORM_BUFFER_TEST_LEN];
    582     uint32_t len = 0;
    583     UCollationElements *iter;
    584     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
    585     int32_t charsToTestSize;
    586 
    587     noOfLoc = uloc_countAvailable();
    588 
    589     coll = ucol_open("", &status);
    590     if (U_FAILURE(status)) {
    591         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
    592         return;
    593     }
    594     charsToTestSize = uset_size(charsToTest);
    595     if (charsToTestSize <= 0) {
    596         log_err("Set was zero. Missing data?\n");
    597         return;
    598     }
    599     t = (tester **)malloc(charsToTestSize * sizeof(tester *));
    600     t[0] = (tester *)malloc(sizeof(tester));
    601     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
    602 
    603     for(u = 0; u < charsToTestSize; u++) {
    604         UChar32 ch = uset_charAt(charsToTest, u);
    605         len = 0;
    606         U16_APPEND_UNSAFE(comp, len, ch);
    607         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
    608         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
    609 
    610         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
    611           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
    612             t[noCases]->u = ch;
    613             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
    614                 u_strncpy(t[noCases]->NFC, comp, len);
    615                 t[noCases]->NFC[len] = 0;
    616             }
    617             noCases++;
    618             t[noCases] = (tester *)malloc(sizeof(tester));
    619             uprv_memset(t[noCases], 0, sizeof(tester));
    620         }
    621     }
    622     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
    623     uset_close(charsToTest);
    624     charsToTest = NULL;
    625 
    626     for(u=0; u<(UChar32)noCases; u++) {
    627         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
    628             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
    629             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
    630         }
    631     }
    632     /*
    633     for(u = 0; u < charsToTestSize; u++) {
    634       if(!(u&0xFFFF)) {
    635         log_verbose("%08X ", u);
    636       }
    637       uprv_memset(t[noCases], 0, sizeof(tester));
    638       t[noCases]->u = u;
    639       len = 0;
    640       U16_APPEND_UNSAFE(comp, len, u);
    641       comp[len] = 0;
    642       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
    643       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
    644       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
    645       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
    646     }
    647     */
    648 
    649     ucol_close(coll);
    650 
    651     log_verbose("Testing locales, number of cases = %i\n", noCases);
    652     for(i = 0; i<noOfLoc; i++) {
    653         status = U_ZERO_ERROR;
    654         locName = uloc_getAvailable(i);
    655         if(hasCollationElements(locName)) {
    656             char cName[256];
    657             UChar name[256];
    658             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
    659 
    660             for(j = 0; j<nameSize; j++) {
    661                 cName[j] = (char)name[j];
    662             }
    663             cName[nameSize] = 0;
    664             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
    665 
    666             coll = ucol_open(locName, &status);
    667             ucol_setStrength(coll, UCOL_IDENTICAL);
    668             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
    669 
    670             for(u=0; u<(UChar32)noCases; u++) {
    671                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
    672                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
    673                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
    674                     log_verbose("Testing NFC\n");
    675                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
    676                     backAndForth(iter);
    677                     log_verbose("Testing NFD\n");
    678                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
    679                     backAndForth(iter);
    680                 }
    681             }
    682             ucol_closeElements(iter);
    683             ucol_close(coll);
    684         }
    685     }
    686     for(u = 0; u <= (UChar32)noCases; u++) {
    687         free(t[u]);
    688     }
    689     free(t);
    690 }
    691 
    692 static void TestEmptyRule(void) {
    693   UErrorCode status = U_ZERO_ERROR;
    694   UChar rulez[] = { 0 };
    695   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
    696 
    697   ucol_close(coll);
    698 }
    699 
    700 static void TestUCARules(void) {
    701   UErrorCode status = U_ZERO_ERROR;
    702   UChar b[256];
    703   UChar *rules = b;
    704   uint32_t ruleLen = 0;
    705   UCollator *UCAfromRules = NULL;
    706   UCollator *coll = ucol_open("", &status);
    707   if(status == U_FILE_ACCESS_ERROR) {
    708     log_data_err("Is your data around?\n");
    709     return;
    710   } else if(U_FAILURE(status)) {
    711     log_err("Error opening collator\n");
    712     return;
    713   }
    714   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
    715 
    716   log_verbose("TestUCARules\n");
    717   if(ruleLen > 256) {
    718     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
    719     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
    720   }
    721   log_verbose("Rules length is %d\n", ruleLen);
    722   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
    723   if(U_SUCCESS(status)) {
    724     ucol_close(UCAfromRules);
    725   } else {
    726     log_verbose("Unable to create a collator from UCARules!\n");
    727   }
    728 /*
    729   u_unescape(blah, b, 256);
    730   ucol_getSortKey(coll, b, 1, res, 256);
    731 */
    732   ucol_close(coll);
    733   if(rules != b) {
    734     free(rules);
    735   }
    736 }
    737 
    738 
    739 /* Pinyin tonal order */
    740 /*
    741     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
    742           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
    743     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
    744     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
    745     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
    746     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
    747       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
    748 .. (\u00fc)
    749 
    750 However, in testing we got the following order:
    751     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
    752           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
    753     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
    754 .. (\u0113)
    755     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
    756     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
    757     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
    758 .. (\u01d8)
    759       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
    760 */
    761 
    762 static void TestBefore(void) {
    763   const static char *data[] = {
    764       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
    765       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
    766       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
    767       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
    768       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
    769       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
    770   };
    771   genericRulesStarter(
    772     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
    773     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
    774     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
    775     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
    776     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
    777     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
    778     data, sizeof(data)/sizeof(data[0]));
    779 }
    780 
    781 #if 0
    782 /* superceded by TestBeforePinyin */
    783 static void TestJ784(void) {
    784   const static char *data[] = {
    785       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
    786       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
    787       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
    788       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
    789       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
    790       "\\u00fc",
    791            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
    792   };
    793   genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
    794 }
    795 #endif
    796 
    797 #if 0
    798 /* superceded by the changes to the lv locale */
    799 static void TestJ831(void) {
    800   const static char *data[] = {
    801     "I",
    802       "i",
    803       "Y",
    804       "y"
    805   };
    806   genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
    807 }
    808 #endif
    809 
    810 static void TestJ815(void) {
    811   const static char *data[] = {
    812     "aa",
    813       "Aa",
    814       "ab",
    815       "Ab",
    816       "ad",
    817       "Ad",
    818       "ae",
    819       "Ae",
    820       "\\u00e6",
    821       "\\u00c6",
    822       "af",
    823       "Af",
    824       "b",
    825       "B"
    826   };
    827   genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
    828   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
    829 }
    830 
    831 
    832 static void TestCase(void)
    833 {
    834     const static UChar gRules[MAX_TOKEN_LEN] =
    835     /*" & 0 < 1,\u2461<a,A"*/
    836     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
    837 
    838     const static UChar testCase[][MAX_TOKEN_LEN] =
    839     {
    840         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
    841         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
    842         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
    843         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
    844     };
    845 
    846     const static UCollationResult caseTestResults[][9] =
    847     {
    848         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
    849         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
    850         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
    851         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
    852     };
    853 
    854     const static UColAttributeValue caseTestAttributes[][2] =
    855     {
    856         { UCOL_LOWER_FIRST, UCOL_OFF},
    857         { UCOL_UPPER_FIRST, UCOL_OFF},
    858         { UCOL_LOWER_FIRST, UCOL_ON},
    859         { UCOL_UPPER_FIRST, UCOL_ON}
    860     };
    861     int32_t i,j,k;
    862     UErrorCode status = U_ZERO_ERROR;
    863     UCollationElements *iter;
    864     UCollator  *myCollation;
    865     myCollation = ucol_open("en_US", &status);
    866 
    867     if(U_FAILURE(status)){
    868         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    869         return;
    870     }
    871     log_verbose("Testing different case settings\n");
    872     ucol_setStrength(myCollation, UCOL_TERTIARY);
    873 
    874     for(k = 0; k<4; k++) {
    875       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
    876       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
    877       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
    878       for (i = 0; i < 3 ; i++) {
    879         for(j = i+1; j<4; j++) {
    880           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
    881         }
    882       }
    883     }
    884     ucol_close(myCollation);
    885 
    886     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
    887     if(U_FAILURE(status)){
    888         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    889         return;
    890     }
    891     log_verbose("Testing different case settings with custom rules\n");
    892     ucol_setStrength(myCollation, UCOL_TERTIARY);
    893 
    894     for(k = 0; k<4; k++) {
    895       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
    896       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
    897       for (i = 0; i < 3 ; i++) {
    898         for(j = i+1; j<4; j++) {
    899           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
    900           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
    901           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
    902           backAndForth(iter);
    903           ucol_closeElements(iter);
    904           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
    905           backAndForth(iter);
    906           ucol_closeElements(iter);
    907         }
    908       }
    909     }
    910     ucol_close(myCollation);
    911     {
    912       const static char *lowerFirst[] = {
    913         "h",
    914         "H",
    915         "ch",
    916         "Ch",
    917         "CH",
    918         "cha",
    919         "chA",
    920         "Cha",
    921         "ChA",
    922         "CHa",
    923         "CHA",
    924         "i",
    925         "I"
    926       };
    927 
    928       const static char *upperFirst[] = {
    929         "H",
    930         "h",
    931         "CH",
    932         "Ch",
    933         "ch",
    934         "CHA",
    935         "CHa",
    936         "ChA",
    937         "Cha",
    938         "chA",
    939         "cha",
    940         "I",
    941         "i"
    942       };
    943       log_verbose("mixed case test\n");
    944       log_verbose("lower first, case level off\n");
    945       genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
    946       log_verbose("upper first, case level off\n");
    947       genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
    948       log_verbose("lower first, case level on\n");
    949       genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
    950       log_verbose("upper first, case level on\n");
    951       genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
    952     }
    953 
    954 }
    955 
    956 static void TestIncrementalNormalize(void) {
    957 
    958     /*UChar baseA     =0x61;*/
    959     UChar baseA     =0x41;
    960 /*    UChar baseB     = 0x42;*/
    961     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
    962     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
    963     /*
    964         0x316 is combining grave accent below, cc=220
    965         0x321 is combining palatalized hook below, cc=202
    966         0x300 is combining grave accent, cc=230
    967     */
    968 
    969 #define MAXSLEN 2000
    970     /*int          maxSLen   = 64000;*/
    971     int          sLen;
    972     int          i;
    973 
    974     UCollator        *coll;
    975     UErrorCode       status = U_ZERO_ERROR;
    976     UCollationResult result;
    977 
    978     int32_t myQ = getTestOption(QUICK_OPTION);
    979 
    980     if(getTestOption(QUICK_OPTION) < 0) {
    981         setTestOption(QUICK_OPTION, 1);
    982     }
    983 
    984     {
    985         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
    986         /*          most buffers along the way.*/
    987         UChar            strA[MAXSLEN+1];
    988         UChar            strB[MAXSLEN+1];
    989 
    990         coll = ucol_open("en_US", &status);
    991         if(status == U_FILE_ACCESS_ERROR) {
    992           log_data_err("Is your data around?\n");
    993           return;
    994         } else if(U_FAILURE(status)) {
    995           log_err("Error opening collator\n");
    996           return;
    997         }
    998         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    999 
   1000         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
   1001         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
   1002         /*for (sLen = 1000; sLen<1001; sLen++) {*/
   1003         for (sLen = 500; sLen<501; sLen++) {
   1004         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
   1005             strA[0] = baseA;
   1006             strB[0] = baseA;
   1007             for (i=1; i<=sLen-1; i++) {
   1008                 strA[i] = ccMix[i % 3];
   1009                 strB[sLen-i] = ccMix[i % 3];
   1010             }
   1011             strA[sLen]   = 0;
   1012             strB[sLen]   = 0;
   1013 
   1014             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
   1015             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
   1016             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
   1017             doTest(coll, strA, strB, UCOL_EQUAL);
   1018         }
   1019     }
   1020 
   1021     setTestOption(QUICK_OPTION, myQ);
   1022 
   1023 
   1024     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
   1025     /*         of the string.  Checks a couple of edge cases.*/
   1026 
   1027     {
   1028         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
   1029         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
   1030         ucol_setStrength(coll, UCOL_TERTIARY);
   1031         doTest(coll, strA, strB, UCOL_EQUAL);
   1032     }
   1033 
   1034     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
   1035 
   1036     {
   1037       /* New UCA  3.1.1.
   1038        * test below used a code point from Desseret, which sorts differently
   1039        * than d800 dc00
   1040        */
   1041         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
   1042         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
   1043         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
   1044         ucol_setStrength(coll, UCOL_TERTIARY);
   1045         doTest(coll, strA, strB, UCOL_GREATER);
   1046     }
   1047 
   1048     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
   1049 
   1050     {
   1051         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
   1052         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
   1053         char  sortKeyA[50];
   1054         char  sortKeyAz[50];
   1055         char  sortKeyB[50];
   1056         char  sortKeyBz[50];
   1057         int   r;
   1058 
   1059         /* there used to be -3 here. Hmmmm.... */
   1060         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
   1061         result = ucol_strcoll(coll, strA, 3, strB, 3);
   1062         if (result != UCOL_GREATER) {
   1063             log_err("ERROR 1 in test 4\n");
   1064         }
   1065         result = ucol_strcoll(coll, strA, -1, strB, -1);
   1066         if (result != UCOL_EQUAL) {
   1067             log_err("ERROR 2 in test 4\n");
   1068         }
   1069 
   1070         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   1071         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   1072         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   1073         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   1074 
   1075         r = strcmp(sortKeyA, sortKeyAz);
   1076         if (r <= 0) {
   1077             log_err("Error 3 in test 4\n");
   1078         }
   1079         r = strcmp(sortKeyA, sortKeyB);
   1080         if (r <= 0) {
   1081             log_err("Error 4 in test 4\n");
   1082         }
   1083         r = strcmp(sortKeyAz, sortKeyBz);
   1084         if (r != 0) {
   1085             log_err("Error 5 in test 4\n");
   1086         }
   1087 
   1088         ucol_setStrength(coll, UCOL_IDENTICAL);
   1089         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   1090         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   1091         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   1092         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   1093 
   1094         r = strcmp(sortKeyA, sortKeyAz);
   1095         if (r <= 0) {
   1096             log_err("Error 6 in test 4\n");
   1097         }
   1098         r = strcmp(sortKeyA, sortKeyB);
   1099         if (r <= 0) {
   1100             log_err("Error 7 in test 4\n");
   1101         }
   1102         r = strcmp(sortKeyAz, sortKeyBz);
   1103         if (r != 0) {
   1104             log_err("Error 8 in test 4\n");
   1105         }
   1106         ucol_setStrength(coll, UCOL_TERTIARY);
   1107     }
   1108 
   1109 
   1110     /*  Test 5:  Null characters in non-normal source strings.*/
   1111 
   1112     {
   1113         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
   1114         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
   1115         char  sortKeyA[50];
   1116         char  sortKeyAz[50];
   1117         char  sortKeyB[50];
   1118         char  sortKeyBz[50];
   1119         int   r;
   1120 
   1121         result = ucol_strcoll(coll, strA, 6, strB, 6);
   1122         if (result != UCOL_GREATER) {
   1123             log_err("ERROR 1 in test 5\n");
   1124         }
   1125         result = ucol_strcoll(coll, strA, -1, strB, -1);
   1126         if (result != UCOL_EQUAL) {
   1127             log_err("ERROR 2 in test 5\n");
   1128         }
   1129 
   1130         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   1131         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   1132         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   1133         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   1134 
   1135         r = strcmp(sortKeyA, sortKeyAz);
   1136         if (r <= 0) {
   1137             log_err("Error 3 in test 5\n");
   1138         }
   1139         r = strcmp(sortKeyA, sortKeyB);
   1140         if (r <= 0) {
   1141             log_err("Error 4 in test 5\n");
   1142         }
   1143         r = strcmp(sortKeyAz, sortKeyBz);
   1144         if (r != 0) {
   1145             log_err("Error 5 in test 5\n");
   1146         }
   1147 
   1148         ucol_setStrength(coll, UCOL_IDENTICAL);
   1149         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   1150         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   1151         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   1152         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   1153 
   1154         r = strcmp(sortKeyA, sortKeyAz);
   1155         if (r <= 0) {
   1156             log_err("Error 6 in test 5\n");
   1157         }
   1158         r = strcmp(sortKeyA, sortKeyB);
   1159         if (r <= 0) {
   1160             log_err("Error 7 in test 5\n");
   1161         }
   1162         r = strcmp(sortKeyAz, sortKeyBz);
   1163         if (r != 0) {
   1164             log_err("Error 8 in test 5\n");
   1165         }
   1166         ucol_setStrength(coll, UCOL_TERTIARY);
   1167     }
   1168 
   1169 
   1170     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
   1171 
   1172     {
   1173         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
   1174         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
   1175 
   1176         result = ucol_strcoll(coll, strA, 5, strB, 5);
   1177         if (result != UCOL_LESS) {
   1178             log_err("Error 1 in test 6\n");
   1179         }
   1180         result = ucol_strcoll(coll, strA, -1, strB, -1);
   1181         if (result != UCOL_EQUAL) {
   1182             log_err("Error 2 in test 6\n");
   1183         }
   1184     }
   1185 
   1186     ucol_close(coll);
   1187 }
   1188 
   1189 
   1190 
   1191 #if 0
   1192 static void TestGetCaseBit(void) {
   1193   static const char *caseBitData[] = {
   1194     "a", "A", "ch", "Ch", "CH",
   1195       "\\uFF9E", "\\u0009"
   1196   };
   1197 
   1198   static const uint8_t results[] = {
   1199     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
   1200       UCOL_UPPER_CASE, UCOL_LOWER_CASE
   1201   };
   1202 
   1203   uint32_t i, blen = 0;
   1204   UChar b[256] = {0};
   1205   UErrorCode status = U_ZERO_ERROR;
   1206   UCollator *UCA = ucol_open("", &status);
   1207   uint8_t res = 0;
   1208 
   1209   for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
   1210     blen = u_unescape(caseBitData[i], b, 256);
   1211     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
   1212     if(results[i] != res) {
   1213       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
   1214     }
   1215   }
   1216 }
   1217 #endif
   1218 
   1219 static void TestHangulTailoring(void) {
   1220     static const char *koreanData[] = {
   1221         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
   1222             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
   1223             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
   1224             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
   1225             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
   1226             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
   1227     };
   1228 
   1229     const char *rules =
   1230         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
   1231         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
   1232         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
   1233         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
   1234         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
   1235         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
   1236 
   1237 
   1238   UErrorCode status = U_ZERO_ERROR;
   1239   UChar rlz[2048] = { 0 };
   1240   uint32_t rlen = u_unescape(rules, rlz, 2048);
   1241 
   1242   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   1243   if(status == U_FILE_ACCESS_ERROR) {
   1244     log_data_err("Is your data around?\n");
   1245     return;
   1246   } else if(U_FAILURE(status)) {
   1247     log_err("Error opening collator\n");
   1248     return;
   1249   }
   1250 
   1251   log_verbose("Using start of korean rules\n");
   1252 
   1253   if(U_SUCCESS(status)) {
   1254     genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   1255   } else {
   1256     log_err("Unable to open collator with rules %s\n", rules);
   1257   }
   1258 
   1259   ucol_close(coll);
   1260 
   1261   log_verbose("Using ko__LOTUS locale\n");
   1262   genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   1263 }
   1264 
   1265 /*
   1266  * The secondary/tertiary compression middle byte
   1267  * as used by the current implementation.
   1268  * Subject to change as the sort key compression changes.
   1269  * See class CollationKeys.
   1270  */
   1271 enum {
   1272     SEC_COMMON_MIDDLE = 0x25,  /* range 05..45 */
   1273     TER_ONLY_COMMON_MIDDLE = 0x65  /* range 05..C5 */
   1274 };
   1275 
   1276 static void TestCompressOverlap(void) {
   1277     UChar       secstr[150];
   1278     UChar       tertstr[150];
   1279     UErrorCode  status = U_ZERO_ERROR;
   1280     UCollator  *coll;
   1281     uint8_t     result[500];
   1282     uint32_t    resultlen;
   1283     int         count = 0;
   1284     uint8_t    *tempptr;
   1285 
   1286     coll = ucol_open("", &status);
   1287 
   1288     if (U_FAILURE(status)) {
   1289         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
   1290         return;
   1291     }
   1292     while (count < 149) {
   1293         secstr[count] = 0x0020; /* [06, 05, 05] */
   1294         tertstr[count] = 0x0020;
   1295         count ++;
   1296     }
   1297 
   1298     /* top down compression ----------------------------------- */
   1299     secstr[count] = 0x0332; /* [, 87, 05] */
   1300     tertstr[count] = 0x3000; /* [06, 05, 07] */
   1301 
   1302     /* no compression secstr should have 150 secondary bytes, tertstr should
   1303     have 150 tertiary bytes.
   1304     with correct compression, secstr should have 6 secondary
   1305     bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
   1306     resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
   1307     (void)resultlen;    /* Suppress set but not used warning. */
   1308     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
   1309     while (*(tempptr + 1) != 1) {
   1310         /* the last secondary collation element is not checked since it is not
   1311         part of the compression */
   1312         if (*tempptr < SEC_COMMON_MIDDLE) {
   1313             log_err("Secondary top down compression overlapped\n");
   1314         }
   1315         tempptr ++;
   1316     }
   1317 
   1318     /* tertiary top/bottom/common for en_US is similar to the secondary
   1319     top/bottom/common */
   1320     resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
   1321     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
   1322     while (*(tempptr + 1) != 0) {
   1323         /* the last secondary collation element is not checked since it is not
   1324         part of the compression */
   1325         if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
   1326             log_err("Tertiary top down compression overlapped\n");
   1327         }
   1328         tempptr ++;
   1329     }
   1330 
   1331     /* bottom up compression ------------------------------------- */
   1332     secstr[count] = 0;
   1333     tertstr[count] = 0;
   1334     resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
   1335     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
   1336     while (*(tempptr + 1) != 1) {
   1337         /* the last secondary collation element is not checked since it is not
   1338         part of the compression */
   1339         if (*tempptr > SEC_COMMON_MIDDLE) {
   1340             log_err("Secondary bottom up compression overlapped\n");
   1341         }
   1342         tempptr ++;
   1343     }
   1344 
   1345     /* tertiary top/bottom/common for en_US is similar to the secondary
   1346     top/bottom/common */
   1347     resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
   1348     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
   1349     while (*(tempptr + 1) != 0) {
   1350         /* the last secondary collation element is not checked since it is not
   1351         part of the compression */
   1352         if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
   1353             log_err("Tertiary bottom up compression overlapped\n");
   1354         }
   1355         tempptr ++;
   1356     }
   1357 
   1358     ucol_close(coll);
   1359 }
   1360 
   1361 static void TestCyrillicTailoring(void) {
   1362   static const char *test[] = {
   1363     "\\u0410b",
   1364       "\\u0410\\u0306a",
   1365       "\\u04d0A"
   1366   };
   1367 
   1368     /* Russian overrides contractions, so this test is not valid anymore */
   1369     /*genericLocaleStarter("ru", test, 3);*/
   1370 
   1371     // Most of the following are commented out because UCA 8.0
   1372     // drops most of the Cyrillic contractions from the default order.
   1373     // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
   1374 
   1375     // genericLocaleStarter("root", test, 3);
   1376     // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
   1377     // genericRulesStarter("&Z < \\u0410", test, 3);
   1378     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
   1379     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
   1380     // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
   1381     // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
   1382 }
   1383 
   1384 static void TestSuppressContractions(void) {
   1385 
   1386   static const char *testNoCont2[] = {
   1387       "\\u0410\\u0302a",
   1388       "\\u0410\\u0306b",
   1389       "\\u0410c"
   1390   };
   1391   static const char *testNoCont[] = {
   1392       "a\\u0410",
   1393       "A\\u0410\\u0306",
   1394       "\\uFF21\\u0410\\u0302"
   1395   };
   1396 
   1397   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
   1398   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
   1399 }
   1400 
   1401 static void TestContraction(void) {
   1402     const static char *testrules[] = {
   1403         "&A = AB / B",
   1404         "&A = A\\u0306/\\u0306",
   1405         "&c = ch / h"
   1406     };
   1407     const static UChar testdata[][2] = {
   1408         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
   1409         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
   1410         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
   1411     };
   1412     const static UChar testdata2[][2] = {
   1413         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
   1414         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
   1415         {0x0063 /* 'c' */, 0x006C /* 'l' */}
   1416     };
   1417 #if 0
   1418     /*
   1419      * These pairs of rule strings are not guaranteed to yield the very same mappings.
   1420      * In fact, LDML 24 recommends an improved way of creating mappings
   1421      * which always yields different mappings for such pairs. See
   1422      * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
   1423      */
   1424     const static char *testrules3[] = {
   1425         "&z < xyz &xyzw << B",
   1426         "&z < xyz &xyz << B / w",
   1427         "&z < ch &achm << B",
   1428         "&z < ch &a << B / chm",
   1429         "&\\ud800\\udc00w << B",
   1430         "&\\ud800\\udc00 << B / w",
   1431         "&a\\ud800\\udc00m << B",
   1432         "&a << B / \\ud800\\udc00m",
   1433     };
   1434 #endif
   1435 
   1436     UErrorCode  status   = U_ZERO_ERROR;
   1437     UCollator  *coll;
   1438     UChar       rule[256] = {0};
   1439     uint32_t    rlen     = 0;
   1440     int         i;
   1441 
   1442     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
   1443         UCollationElements *iter1;
   1444         int j = 0;
   1445         log_verbose("Rule %s for testing\n", testrules[i]);
   1446         rlen = u_unescape(testrules[i], rule, 32);
   1447         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1448         if (U_FAILURE(status)) {
   1449             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   1450             return;
   1451         }
   1452         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
   1453         if (U_FAILURE(status)) {
   1454             log_err("Collation iterator creation failed\n");
   1455             return;
   1456         }
   1457         while (j < 2) {
   1458             UCollationElements *iter2 = ucol_openElements(coll,
   1459                                                          &(testdata[i][j]),
   1460                                                          1, &status);
   1461             uint32_t ce;
   1462             if (U_FAILURE(status)) {
   1463                 log_err("Collation iterator creation failed\n");
   1464                 return;
   1465             }
   1466             ce = ucol_next(iter2, &status);
   1467             while (ce != UCOL_NULLORDER) {
   1468                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
   1469                     log_err("Collation elements in contraction split does not match\n");
   1470                     return;
   1471                 }
   1472                 ce = ucol_next(iter2, &status);
   1473             }
   1474             j ++;
   1475             ucol_closeElements(iter2);
   1476         }
   1477         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
   1478             log_err("Collation elements not exhausted\n");
   1479             return;
   1480         }
   1481         ucol_closeElements(iter1);
   1482         ucol_close(coll);
   1483     }
   1484 
   1485     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
   1486     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1487     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
   1488         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   1489                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
   1490                 testdata2[1][1]);
   1491         return;
   1492     }
   1493     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
   1494         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   1495                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
   1496                 testdata2[2][1]);
   1497         return;
   1498     }
   1499     ucol_close(coll);
   1500 #if 0  /* see above */
   1501     for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
   1502         log_verbose("testrules3 i==%d  \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
   1503         UCollator          *coll1,
   1504                            *coll2;
   1505         UCollationElements *iter1,
   1506                            *iter2;
   1507         UChar               ch = 0x0042 /* 'B' */;
   1508         uint32_t            ce;
   1509         rlen = u_unescape(testrules3[i], rule, 32);
   1510         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1511         rlen = u_unescape(testrules3[i + 1], rule, 32);
   1512         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1513         if (U_FAILURE(status)) {
   1514             log_err("Collator creation failed %s\n", testrules[i]);
   1515             return;
   1516         }
   1517         iter1 = ucol_openElements(coll1, &ch, 1, &status);
   1518         iter2 = ucol_openElements(coll2, &ch, 1, &status);
   1519         if (U_FAILURE(status)) {
   1520             log_err("Collation iterator creation failed\n");
   1521             return;
   1522         }
   1523         ce = ucol_next(iter1, &status);
   1524         if (U_FAILURE(status)) {
   1525             log_err("Retrieving ces failed\n");
   1526             return;
   1527         }
   1528         while (ce != UCOL_NULLORDER) {
   1529             uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
   1530             if (ce == ce2) {
   1531                 log_verbose("CEs match: %08x\n", ce);
   1532             } else {
   1533                 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
   1534                 return;
   1535             }
   1536             ce = ucol_next(iter1, &status);
   1537             if (U_FAILURE(status)) {
   1538                 log_err("Retrieving ces failed\n");
   1539                 return;
   1540             }
   1541         }
   1542         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
   1543             log_err("CEs not exhausted\n");
   1544             return;
   1545         }
   1546         ucol_closeElements(iter1);
   1547         ucol_closeElements(iter2);
   1548         ucol_close(coll1);
   1549         ucol_close(coll2);
   1550     }
   1551 #endif
   1552 }
   1553 
   1554 static void TestExpansion(void) {
   1555     const static char *testrules[] = {
   1556 #if 0
   1557         /*
   1558          * This seems to have tested that M was not mapped to an expansion.
   1559          * I believe the old builder just did that because it computed the extension CEs
   1560          * at the very end, which was a bug.
   1561          * Among other problems, it violated the core tailoring principle
   1562          * by making an earlier rule depend on a later one.
   1563          * And, of course, if M did not get an expansion, then it was primary different from K,
   1564          * unlike what the rule &K<<M says.
   1565          */
   1566         "&J << K / B & K << M",
   1567 #endif
   1568         "&J << K / B << M"
   1569     };
   1570     const static UChar testdata[][3] = {
   1571         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
   1572         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
   1573         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
   1574         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
   1575         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
   1576         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
   1577     };
   1578 
   1579     UErrorCode  status   = U_ZERO_ERROR;
   1580     UCollator  *coll;
   1581     UChar       rule[256] = {0};
   1582     uint32_t    rlen     = 0;
   1583     int         i;
   1584 
   1585     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
   1586         int j = 0;
   1587         log_verbose("Rule %s for testing\n", testrules[i]);
   1588         rlen = u_unescape(testrules[i], rule, 32);
   1589         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1590         if (U_FAILURE(status)) {
   1591             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   1592             return;
   1593         }
   1594 
   1595         for (j = 0; j < 5; j ++) {
   1596             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
   1597         }
   1598         ucol_close(coll);
   1599     }
   1600 }
   1601 
   1602 #if 0
   1603 /* this test tests the current limitations of the engine */
   1604 /* it always fail, so it is disabled by default */
   1605 static void TestLimitations(void) {
   1606   /* recursive expansions */
   1607   {
   1608     static const char *rule = "&a=b/c&d=c/e";
   1609     static const char *tlimit01[] = {"add","b","adf"};
   1610     static const char *tlimit02[] = {"aa","b","af"};
   1611     log_verbose("recursive expansions\n");
   1612     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
   1613     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
   1614   }
   1615   /* contractions spanning expansions */
   1616   {
   1617     static const char *rule = "&a<<<c/e&g<<<eh";
   1618     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
   1619     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
   1620     log_verbose("contractions spanning expansions\n");
   1621     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
   1622     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
   1623   }
   1624   /* normalization: nulls in contractions */
   1625   {
   1626     static const char *rule = "&a<<<\\u0000\\u0302";
   1627     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   1628     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   1629     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   1630     static const UColAttributeValue valOn[] = { UCOL_ON };
   1631     static const UColAttributeValue valOff[] = { UCOL_OFF };
   1632 
   1633     log_verbose("NULL in contractions\n");
   1634     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   1635     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   1636     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   1637     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   1638 
   1639   }
   1640   /* normalization: contractions spanning normalization */
   1641   {
   1642     static const char *rule = "&a<<<\\u0000\\u0302";
   1643     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   1644     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   1645     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   1646     static const UColAttributeValue valOn[] = { UCOL_ON };
   1647     static const UColAttributeValue valOff[] = { UCOL_OFF };
   1648 
   1649     log_verbose("contractions spanning normalization\n");
   1650     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   1651     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   1652     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   1653     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   1654 
   1655   }
   1656   /* variable top:  */
   1657   {
   1658     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
   1659     static const char *rule = "&\\u2010<x<[variable top]=z";
   1660     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
   1661     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
   1662     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
   1663     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
   1664     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
   1665     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
   1666     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
   1667 
   1668     log_verbose("variable top\n");
   1669     genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   1670     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   1671     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   1672     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
   1673     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
   1674 
   1675   }
   1676   /* case level */
   1677   {
   1678     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
   1679     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
   1680     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
   1681     static const UColAttribute att[] = { UCOL_CASE_FIRST};
   1682     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
   1683     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
   1684     log_verbose("case level\n");
   1685     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   1686     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   1687     /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
   1688     /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
   1689   }
   1690 
   1691 }
   1692 #endif
   1693 
   1694 static void TestBocsuCoverage(void) {
   1695   UErrorCode status = U_ZERO_ERROR;
   1696   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
   1697   UChar       test[256] = {0};
   1698   uint32_t    tlen     = u_unescape(testString, test, 32);
   1699   uint8_t key[256]     = {0};
   1700   uint32_t klen         = 0;
   1701 
   1702   UCollator *coll = ucol_open("", &status);
   1703   if(U_SUCCESS(status)) {
   1704   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
   1705 
   1706   klen = ucol_getSortKey(coll, test, tlen, key, 256);
   1707   (void)klen;    /* Suppress set but not used warning. */
   1708 
   1709   ucol_close(coll);
   1710   } else {
   1711     log_data_err("Couldn't open UCA\n");
   1712   }
   1713 }
   1714 
   1715 static void TestVariableTopSetting(void) {
   1716   UErrorCode status = U_ZERO_ERROR;
   1717   uint32_t varTopOriginal = 0, varTop1, varTop2;
   1718   UCollator *coll = ucol_open("", &status);
   1719   if(U_SUCCESS(status)) {
   1720 
   1721   static const UChar nul = 0;
   1722   static const UChar space = 0x20;
   1723   static const UChar dot = 0x2e;  /* punctuation */
   1724   static const UChar degree = 0xb0;  /* symbol */
   1725   static const UChar dollar = 0x24;  /* currency symbol */
   1726   static const UChar zero = 0x30;  /* digit */
   1727 
   1728   varTopOriginal = ucol_getVariableTop(coll, &status);
   1729   log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
   1730   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   1731 
   1732   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
   1733   varTop2 = ucol_getVariableTop(coll, &status);
   1734   log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
   1735   if(U_FAILURE(status) || varTop1 != varTop2 ||
   1736       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1737       ucol_equal(coll, &nul, 0, &dot, 1) ||
   1738       ucol_equal(coll, &nul, 0, &degree, 1) ||
   1739       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1740       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1741       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
   1742     log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
   1743   }
   1744 
   1745   varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
   1746   varTop2 = ucol_getVariableTop(coll, &status);
   1747   log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
   1748   if(U_FAILURE(status) || varTop1 != varTop2 ||
   1749       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1750       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1751       ucol_equal(coll, &nul, 0, &degree, 1) ||
   1752       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1753       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1754       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
   1755     log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
   1756   }
   1757 
   1758   varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
   1759   varTop2 = ucol_getVariableTop(coll, &status);
   1760   log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
   1761   if(U_FAILURE(status) || varTop1 != varTop2 ||
   1762       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1763       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1764       !ucol_equal(coll, &nul, 0, &degree, 1) ||
   1765       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1766       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1767       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
   1768     log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
   1769   }
   1770 
   1771   varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
   1772   varTop2 = ucol_getVariableTop(coll, &status);
   1773   log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
   1774   if(U_FAILURE(status) || varTop1 != varTop2 ||
   1775       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1776       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1777       !ucol_equal(coll, &nul, 0, &degree, 1) ||
   1778       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1779       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1780       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
   1781     log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
   1782   }
   1783 
   1784   log_verbose("Testing setting variable top to contractions\n");
   1785   {
   1786     UChar first[4] = { 0 };
   1787     first[0] = 0x0040;
   1788     first[1] = 0x0050;
   1789     first[2] = 0x0000;
   1790 
   1791     status = U_ZERO_ERROR;
   1792     ucol_setVariableTop(coll, first, -1, &status);
   1793 
   1794     if(U_SUCCESS(status)) {
   1795       log_err("Invalid contraction succeded in setting variable top!\n");
   1796     }
   1797 
   1798   }
   1799 
   1800   log_verbose("Test restoring variable top\n");
   1801 
   1802   status = U_ZERO_ERROR;
   1803   ucol_restoreVariableTop(coll, varTopOriginal, &status);
   1804   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
   1805     log_err("Couldn't restore old variable top\n");
   1806   }
   1807 
   1808   log_verbose("Testing calling with error set\n");
   1809 
   1810   status = U_INTERNAL_PROGRAM_ERROR;
   1811   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
   1812   varTop2 = ucol_getVariableTop(coll, &status);
   1813   ucol_restoreVariableTop(coll, varTop2, &status);
   1814   varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
   1815   varTop2 = ucol_getVariableTop(NULL, &status);
   1816   ucol_restoreVariableTop(NULL, varTop2, &status);
   1817   if(status != U_INTERNAL_PROGRAM_ERROR) {
   1818     log_err("Bad reaction to passed error!\n");
   1819   }
   1820   ucol_close(coll);
   1821   } else {
   1822     log_data_err("Couldn't open UCA collator\n");
   1823   }
   1824 }
   1825 
   1826 static void TestMaxVariable() {
   1827   UErrorCode status = U_ZERO_ERROR;
   1828   UColReorderCode oldMax, max;
   1829   UCollator *coll;
   1830 
   1831   static const UChar nul = 0;
   1832   static const UChar space = 0x20;
   1833   static const UChar dot = 0x2e;  /* punctuation */
   1834   static const UChar degree = 0xb0;  /* symbol */
   1835   static const UChar dollar = 0x24;  /* currency symbol */
   1836   static const UChar zero = 0x30;  /* digit */
   1837 
   1838   coll = ucol_open("", &status);
   1839   if(U_FAILURE(status)) {
   1840     log_data_err("Couldn't open root collator\n");
   1841     return;
   1842   }
   1843 
   1844   oldMax = ucol_getMaxVariable(coll);
   1845   log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
   1846   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   1847 
   1848   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
   1849   max = ucol_getMaxVariable(coll);
   1850   log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
   1851   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
   1852       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1853       ucol_equal(coll, &nul, 0, &dot, 1) ||
   1854       ucol_equal(coll, &nul, 0, &degree, 1) ||
   1855       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1856       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1857       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
   1858     log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
   1859   }
   1860 
   1861   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
   1862   max = ucol_getMaxVariable(coll);
   1863   log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
   1864   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
   1865       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1866       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1867       ucol_equal(coll, &nul, 0, &degree, 1) ||
   1868       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1869       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1870       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
   1871     log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
   1872   }
   1873 
   1874   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
   1875   max = ucol_getMaxVariable(coll);
   1876   log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
   1877   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
   1878       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1879       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1880       !ucol_equal(coll, &nul, 0, &degree, 1) ||
   1881       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1882       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1883       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
   1884     log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
   1885   }
   1886 
   1887   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
   1888   max = ucol_getMaxVariable(coll);
   1889   log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
   1890   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
   1891       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1892       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1893       !ucol_equal(coll, &nul, 0, &degree, 1) ||
   1894       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1895       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1896       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
   1897     log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
   1898   }
   1899 
   1900   log_verbose("Test restoring maxVariable\n");
   1901   status = U_ZERO_ERROR;
   1902   ucol_setMaxVariable(coll, oldMax, &status);
   1903   if(oldMax != ucol_getMaxVariable(coll)) {
   1904     log_err("Couldn't restore old maxVariable\n");
   1905   }
   1906 
   1907   log_verbose("Testing calling with error set\n");
   1908   status = U_INTERNAL_PROGRAM_ERROR;
   1909   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
   1910   max = ucol_getMaxVariable(coll);
   1911   if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
   1912     log_err("Bad reaction to passed error!\n");
   1913   }
   1914   ucol_close(coll);
   1915 }
   1916 
   1917 static void TestNonChars(void) {
   1918   static const char *test[] = {
   1919       "\\u0000",  /* ignorable */
   1920       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
   1921       "\\uFDD0", "\\uFDEF",
   1922       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
   1923       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
   1924       "\\U0003FFFE", "\\U0003FFFF",
   1925       "\\U0004FFFE", "\\U0004FFFF",
   1926       "\\U0005FFFE", "\\U0005FFFF",
   1927       "\\U0006FFFE", "\\U0006FFFF",
   1928       "\\U0007FFFE", "\\U0007FFFF",
   1929       "\\U0008FFFE", "\\U0008FFFF",
   1930       "\\U0009FFFE", "\\U0009FFFF",
   1931       "\\U000AFFFE", "\\U000AFFFF",
   1932       "\\U000BFFFE", "\\U000BFFFF",
   1933       "\\U000CFFFE", "\\U000CFFFF",
   1934       "\\U000DFFFE", "\\U000DFFFF",
   1935       "\\U000EFFFE", "\\U000EFFFF",
   1936       "\\U000FFFFE", "\\U000FFFFF",
   1937       "\\U0010FFFE", "\\U0010FFFF",
   1938       "\\uFFFF"  /* special character with maximum primary weight */
   1939   };
   1940   UErrorCode status = U_ZERO_ERROR;
   1941   UCollator *coll = ucol_open("en_US", &status);
   1942 
   1943   log_verbose("Test non characters\n");
   1944 
   1945   if(U_SUCCESS(status)) {
   1946     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
   1947   } else {
   1948     log_err_status(status, "Unable to open collator\n");
   1949   }
   1950 
   1951   ucol_close(coll);
   1952 }
   1953 
   1954 static void TestExtremeCompression(void) {
   1955   static char *test[4];
   1956   int32_t j = 0, i = 0;
   1957 
   1958   for(i = 0; i<4; i++) {
   1959     test[i] = (char *)malloc(2048*sizeof(char));
   1960   }
   1961 
   1962   for(j = 20; j < 500; j++) {
   1963     for(i = 0; i<4; i++) {
   1964       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   1965       test[i][j-1] = (char)('a'+i);
   1966       test[i][j] = 0;
   1967     }
   1968     genericLocaleStarter("en_US", (const char **)test, 4);
   1969   }
   1970 
   1971 
   1972   for(i = 0; i<4; i++) {
   1973     free(test[i]);
   1974   }
   1975 }
   1976 
   1977 #if 0
   1978 static void TestExtremeCompression(void) {
   1979   static char *test[4];
   1980   int32_t j = 0, i = 0;
   1981   UErrorCode status = U_ZERO_ERROR;
   1982   UCollator *coll = ucol_open("en_US", status);
   1983   for(i = 0; i<4; i++) {
   1984     test[i] = (char *)malloc(2048*sizeof(char));
   1985   }
   1986   for(j = 10; j < 2048; j++) {
   1987     for(i = 0; i<4; i++) {
   1988       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
   1989       test[i][j-1] = (char)('a'+i);
   1990       test[i][j] = 0;
   1991     }
   1992   }
   1993   genericLocaleStarter("en_US", (const char **)test, 4);
   1994 
   1995   for(j = 10; j < 2048; j++) {
   1996     for(i = 0; i<1; i++) {
   1997       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   1998       test[i][j] = 0;
   1999     }
   2000   }
   2001   for(i = 0; i<4; i++) {
   2002     free(test[i]);
   2003   }
   2004 }
   2005 #endif
   2006 
   2007 static void TestSurrogates(void) {
   2008   static const char *test[] = {
   2009     "z","\\ud900\\udc25",  "\\ud805\\udc50",
   2010        "\\ud800\\udc00y",  "\\ud800\\udc00r",
   2011        "\\ud800\\udc00f",  "\\ud800\\udc00",
   2012        "\\ud800\\udc00c", "\\ud800\\udc00b",
   2013        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
   2014        "\\ud800\\udc00a",
   2015        "c", "b"
   2016   };
   2017 
   2018   static const char *rule =
   2019     "&z < \\ud900\\udc25   < \\ud805\\udc50"
   2020        "< \\ud800\\udc00y  < \\ud800\\udc00r"
   2021        "< \\ud800\\udc00f  << \\ud800\\udc00"
   2022        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
   2023        "< \\ud800\\udc00a  < c < b" ;
   2024 
   2025   genericRulesStarter(rule, test, 14);
   2026 }
   2027 
   2028 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
   2029 static void TestPrefix(void) {
   2030   uint32_t i;
   2031 
   2032   static const struct {
   2033     const char *rules;
   2034     const char *data[50];
   2035     const uint32_t len;
   2036   } tests[] = {
   2037     { "&z <<< z|a",
   2038       {"zz", "za"}, 2 },
   2039 
   2040     { "&z <<< z|   a",
   2041       {"zz", "za"}, 2 },
   2042     { "[strength I]"
   2043       "&a=\\ud900\\udc25"
   2044       "&z<<<\\ud900\\udc25|a",
   2045       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
   2046   };
   2047 
   2048 
   2049   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   2050     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   2051   }
   2052 }
   2053 
   2054 /* This test uses data suplied by Masashiko Maedera to test the implementation */
   2055 /* JIS X 4061 collation order implementation                                   */
   2056 static void TestNewJapanese(void) {
   2057 
   2058   static const char * const test1[] = {
   2059       "\\u30b7\\u30e3\\u30fc\\u30ec",
   2060       "\\u30b7\\u30e3\\u30a4",
   2061       "\\u30b7\\u30e4\\u30a3",
   2062       "\\u30b7\\u30e3\\u30ec",
   2063       "\\u3061\\u3087\\u3053",
   2064       "\\u3061\\u3088\\u3053",
   2065       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
   2066       "\\u3066\\u30fc\\u305f",
   2067       "\\u30c6\\u30fc\\u30bf",
   2068       "\\u30c6\\u30a7\\u30bf",
   2069       "\\u3066\\u3048\\u305f",
   2070       "\\u3067\\u30fc\\u305f",
   2071       "\\u30c7\\u30fc\\u30bf",
   2072       "\\u30c7\\u30a7\\u30bf",
   2073       "\\u3067\\u3048\\u305f",
   2074       "\\u3066\\u30fc\\u305f\\u30fc",
   2075       "\\u30c6\\u30fc\\u30bf\\u30a1",
   2076       "\\u30c6\\u30a7\\u30bf\\u30fc",
   2077       "\\u3066\\u3047\\u305f\\u3041",
   2078       "\\u3066\\u3048\\u305f\\u30fc",
   2079       "\\u3067\\u30fc\\u305f\\u30fc",
   2080       "\\u30c7\\u30fc\\u30bf\\u30a1",
   2081       "\\u3067\\u30a7\\u305f\\u30a1",
   2082       "\\u30c7\\u3047\\u30bf\\u3041",
   2083       "\\u30c7\\u30a8\\u30bf\\u30a2",
   2084       "\\u3072\\u3086",
   2085       "\\u3073\\u3085\\u3042",
   2086       "\\u3074\\u3085\\u3042",
   2087       "\\u3073\\u3085\\u3042\\u30fc",
   2088       "\\u30d3\\u30e5\\u30a2\\u30fc",
   2089       "\\u3074\\u3085\\u3042\\u30fc",
   2090       "\\u30d4\\u30e5\\u30a2\\u30fc",
   2091       "\\u30d2\\u30e5\\u30a6",
   2092       "\\u30d2\\u30e6\\u30a6",
   2093       "\\u30d4\\u30e5\\u30a6\\u30a2",
   2094       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
   2095       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
   2096       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
   2097       "\\u3072\\u3085\\u3093",
   2098       "\\u3074\\u3085\\u3093",
   2099       "\\u3075\\u30fc\\u308a",
   2100       "\\u30d5\\u30fc\\u30ea",
   2101       "\\u3075\\u3045\\u308a",
   2102       "\\u3075\\u30a5\\u308a",
   2103       "\\u3075\\u30a5\\u30ea",
   2104       "\\u30d5\\u30a6\\u30ea",
   2105       "\\u3076\\u30fc\\u308a",
   2106       "\\u30d6\\u30fc\\u30ea",
   2107       "\\u3076\\u3045\\u308a",
   2108       "\\u30d6\\u30a5\\u308a",
   2109       "\\u3077\\u3046\\u308a",
   2110       "\\u30d7\\u30a6\\u30ea",
   2111       "\\u3075\\u30fc\\u308a\\u30fc",
   2112       "\\u30d5\\u30a5\\u30ea\\u30fc",
   2113       "\\u3075\\u30a5\\u308a\\u30a3",
   2114       "\\u30d5\\u3045\\u308a\\u3043",
   2115       "\\u30d5\\u30a6\\u30ea\\u30fc",
   2116       "\\u3075\\u3046\\u308a\\u3043",
   2117       "\\u30d6\\u30a6\\u30ea\\u30a4",
   2118       "\\u3077\\u30fc\\u308a\\u30fc",
   2119       "\\u3077\\u30a5\\u308a\\u30a4",
   2120       "\\u3077\\u3046\\u308a\\u30fc",
   2121       "\\u30d7\\u30a6\\u30ea\\u30a4",
   2122       "\\u30d5\\u30fd",
   2123       "\\u3075\\u309e",
   2124       "\\u3076\\u309d",
   2125       "\\u3076\\u3075",
   2126       "\\u3076\\u30d5",
   2127       "\\u30d6\\u3075",
   2128       "\\u30d6\\u30d5",
   2129       "\\u3076\\u309e",
   2130       "\\u3076\\u3077",
   2131       "\\u30d6\\u3077",
   2132       "\\u3077\\u309d",
   2133       "\\u30d7\\u30fd",
   2134       "\\u3077\\u3075",
   2135 };
   2136 
   2137   static const char *test2[] = {
   2138     "\\u306f\\u309d", /* H\\u309d */
   2139     "\\u30cf\\u30fd", /* K\\u30fd */
   2140     "\\u306f\\u306f", /* HH */
   2141     "\\u306f\\u30cf", /* HK */
   2142     "\\u30cf\\u30cf", /* KK */
   2143     "\\u306f\\u309e", /* H\\u309e */
   2144     "\\u30cf\\u30fe", /* K\\u30fe */
   2145     "\\u306f\\u3070", /* HH\\u309b */
   2146     "\\u30cf\\u30d0", /* KK\\u309b */
   2147     "\\u306f\\u3071", /* HH\\u309c */
   2148     "\\u30cf\\u3071", /* KH\\u309c */
   2149     "\\u30cf\\u30d1", /* KK\\u309c */
   2150     "\\u3070\\u309d", /* H\\u309b\\u309d */
   2151     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
   2152     "\\u3070\\u306f", /* H\\u309bH */
   2153     "\\u30d0\\u30cf", /* K\\u309bK */
   2154     "\\u3070\\u309e", /* H\\u309b\\u309e */
   2155     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
   2156     "\\u3070\\u3070", /* H\\u309bH\\u309b */
   2157     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
   2158     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
   2159     "\\u3070\\u3071", /* H\\u309bH\\u309c */
   2160     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
   2161     "\\u3071\\u309d", /* H\\u309c\\u309d */
   2162     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
   2163     "\\u3071\\u306f", /* H\\u309cH */
   2164     "\\u30d1\\u30cf", /* K\\u309cK */
   2165     "\\u3071\\u3070", /* H\\u309cH\\u309b */
   2166     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
   2167     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
   2168     "\\u3071\\u3071", /* H\\u309cH\\u309c */
   2169     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
   2170   };
   2171   /*
   2172   static const char *test3[] = {
   2173     "\\u221er\\u221e",
   2174     "\\u221eR#",
   2175     "\\u221et\\u221e",
   2176     "#r\\u221e",
   2177     "#R#",
   2178     "#t%",
   2179     "#T%",
   2180     "8t\\u221e",
   2181     "8T\\u221e",
   2182     "8t#",
   2183     "8T#",
   2184     "8t%",
   2185     "8T%",
   2186     "8t8",
   2187     "8T8",
   2188     "\\u03c9r\\u221e",
   2189     "\\u03a9R%",
   2190     "rr\\u221e",
   2191     "rR\\u221e",
   2192     "Rr\\u221e",
   2193     "RR\\u221e",
   2194     "RT%",
   2195     "rt8",
   2196     "tr\\u221e",
   2197     "tr8",
   2198     "TR8",
   2199     "tt8",
   2200     "\\u30b7\\u30e3\\u30fc\\u30ec",
   2201   };
   2202   */
   2203   static const UColAttribute att[] = { UCOL_STRENGTH };
   2204   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
   2205 
   2206   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
   2207   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
   2208 
   2209   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
   2210   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
   2211   /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
   2212   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
   2213   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
   2214 }
   2215 
   2216 static void TestStrCollIdenticalPrefix(void) {
   2217   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
   2218   const char* test[] = {
   2219     "ab\\ud9b0\\udc70",
   2220     "ab\\ud9b0\\udc71"
   2221   };
   2222   genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
   2223 }
   2224 /* Contractions should have all their canonically equivalent */
   2225 /* strings included */
   2226 static void TestContractionClosure(void) {
   2227   static const struct {
   2228     const char *rules;
   2229     const char *data[10];
   2230     const uint32_t len;
   2231   } tests[] = {
   2232     {   "&b=\\u00e4\\u00e4",
   2233       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
   2234     {   "&b=\\u00C5",
   2235       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
   2236   };
   2237   uint32_t i;
   2238 
   2239 
   2240   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   2241     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
   2242   }
   2243 }
   2244 
   2245 /* This tests also fails*/
   2246 static void TestBeforePrefixFailure(void) {
   2247   static const struct {
   2248     const char *rules;
   2249     const char *data[10];
   2250     const uint32_t len;
   2251   } tests[] = {
   2252     { "&g <<< a"
   2253       "&[before 3]\\uff41 <<< x",
   2254       {"x", "\\uff41"}, 2 },
   2255     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   2256         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   2257         "&[before 3]\\u30a7<<<\\u30a9",
   2258       {"\\u30a9", "\\u30a7"}, 2 },
   2259     {   "&[before 3]\\u30a7<<<\\u30a9"
   2260         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   2261         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
   2262       {"\\u30a9", "\\u30a7"}, 2 },
   2263   };
   2264   uint32_t i;
   2265 
   2266 
   2267   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   2268     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   2269   }
   2270 
   2271 #if 0
   2272   const char* rule1 =
   2273         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   2274         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   2275         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
   2276   const char* rule2 =
   2277         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
   2278         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   2279         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
   2280   const char* test[] = {
   2281       "\\u30c6\\u30fc\\u30bf",
   2282       "\\u30c6\\u30a7\\u30bf",
   2283   };
   2284   genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
   2285   genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
   2286 /* this piece of code should be in some sort of verbose mode     */
   2287 /* it gets the collation elements for elements and prints them   */
   2288 /* This is useful when trying to see whether the problem is      */
   2289   {
   2290     UErrorCode status = U_ZERO_ERROR;
   2291     uint32_t i = 0;
   2292     UCollationElements *it = NULL;
   2293     uint32_t CE;
   2294     UChar string[256];
   2295     uint32_t uStringLen;
   2296     UCollator *coll = NULL;
   2297 
   2298     uStringLen = u_unescape(rule1, string, 256);
   2299 
   2300     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2301 
   2302     /*coll = ucol_open("ja_JP_JIS", &status);*/
   2303     it = ucol_openElements(coll, string, 0, &status);
   2304 
   2305     for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
   2306       log_verbose("%s\n", test[i]);
   2307       uStringLen = u_unescape(test[i], string, 256);
   2308       ucol_setText(it, string, uStringLen, &status);
   2309 
   2310       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
   2311         log_verbose("%08X\n", CE);
   2312       }
   2313       log_verbose("\n");
   2314 
   2315     }
   2316 
   2317     ucol_closeElements(it);
   2318     ucol_close(coll);
   2319   }
   2320 #endif
   2321 }
   2322 
   2323 static void TestPrefixCompose(void) {
   2324   const char* rule1 =
   2325         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
   2326   /*
   2327   const char* test[] = {
   2328       "\\u30c6\\u30fc\\u30bf",
   2329       "\\u30c6\\u30a7\\u30bf",
   2330   };
   2331   */
   2332   {
   2333     UErrorCode status = U_ZERO_ERROR;
   2334     /*uint32_t i = 0;*/
   2335     /*UCollationElements *it = NULL;*/
   2336 /*    uint32_t CE;*/
   2337     UChar string[256];
   2338     uint32_t uStringLen;
   2339     UCollator *coll = NULL;
   2340 
   2341     uStringLen = u_unescape(rule1, string, 256);
   2342 
   2343     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2344     ucol_close(coll);
   2345   }
   2346 
   2347 
   2348 }
   2349 
   2350 /*
   2351 [last variable] last variable value
   2352 [last primary ignorable] largest CE for primary ignorable
   2353 [last secondary ignorable] largest CE for secondary ignorable
   2354 [last tertiary ignorable] largest CE for tertiary ignorable
   2355 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
   2356 */
   2357 
   2358 static void TestRuleOptions(void) {
   2359   /* values here are hardcoded and are correct for the current UCA
   2360    * when the UCA changes, one might be forced to change these
   2361    * values.
   2362    */
   2363 
   2364   /*
   2365    * These strings contain the last character before [variable top]
   2366    * and the first and second characters (by primary weights) after it.
   2367    * See FractionalUCA.txt. For example:
   2368       [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
   2369       [variable top = 0C FE]
   2370       [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
   2371      and
   2372       00B4; [0D 0C, 05, 05]
   2373    *
   2374    * Note: Starting with UCA 6.0, the [variable top] collation element
   2375    * is not the weight of any character or string,
   2376    * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
   2377    */
   2378 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
   2379 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
   2380 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
   2381 
   2382   /*
   2383    * This string has to match the character that has the [last regular] weight
   2384    * which changes with each UCA version.
   2385    * See the bottom of FractionalUCA.txt which says something like
   2386       [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
   2387    *
   2388    * Note: Starting with UCA 6.0, the [last regular] collation element
   2389    * is not the weight of any character or string,
   2390    * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
   2391    */
   2392 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
   2393 
   2394   static const struct {
   2395     const char *rules;
   2396     const char *data[10];
   2397     const uint32_t len;
   2398   } tests[] = {
   2399 #if 0
   2400     /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
   2401     /* - all befores here amount to zero */
   2402     { "&[before 3][first tertiary ignorable]<<<a",
   2403         { "\\u0000", "a"}, 2
   2404     }, /* you cannot go before first tertiary ignorable */
   2405 
   2406     { "&[before 3][last tertiary ignorable]<<<a",
   2407         { "\\u0000", "a"}, 2
   2408     }, /* you cannot go before last tertiary ignorable */
   2409 #endif
   2410     /*
   2411      * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
   2412      * and it *is* possible to "go before" that.
   2413      */
   2414     { "&[before 3][first secondary ignorable]<<<a",
   2415         { "\\u0000", "a"}, 2
   2416     },
   2417 
   2418     { "&[before 3][last secondary ignorable]<<<a",
   2419         { "\\u0000", "a"}, 2
   2420     },
   2421 
   2422     /* 'normal' befores */
   2423 
   2424     /*
   2425      * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
   2426      * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
   2427      * because there is no tailoring space before that boundary.
   2428      * Made the tests work by tailoring to a space instead.
   2429      */
   2430     { "&[before 3][first primary ignorable]<<<c<<<b &' '<a",  /* was &[first primary ignorable]<a */
   2431         {  "c", "b", "\\u0332", "a" }, 4
   2432     },
   2433 
   2434     /* we don't have a code point that corresponds to
   2435      * the last primary ignorable
   2436      */
   2437     { "&[before 3][last primary ignorable]<<<c<<<b &' '<a",  /* was &[last primary ignorable]<a */
   2438         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
   2439     },
   2440 
   2441     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
   2442         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
   2443     },
   2444 
   2445     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
   2446         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
   2447     },
   2448 
   2449     { "&[first regular]<a"
   2450       "&[before 1][first regular]<b",
   2451       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
   2452     },
   2453 
   2454     { "&[before 1][last regular]<b"
   2455       "&[last regular]<a",
   2456         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
   2457     },
   2458 
   2459     { "&[before 1][first implicit]<b"
   2460       "&[first implicit]<a",
   2461         { "b", "\\u4e00", "a", "\\u4e01"}, 4
   2462     },
   2463 #if 0  /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
   2464     { "&[before 1][last implicit]<b"
   2465       "&[last implicit]<a",
   2466         { "b", "\\U0010FFFD", "a" }, 3
   2467     },
   2468 #endif
   2469     { "&[last variable]<z"
   2470       "&' '<x"  /* was &[last primary ignorable]<x, see above */
   2471       "&[last secondary ignorable]<<y"
   2472       "&[last tertiary ignorable]<<<w"
   2473       "&[top]<u",
   2474       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
   2475     }
   2476 
   2477   };
   2478   uint32_t i;
   2479 
   2480   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   2481     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   2482   }
   2483 }
   2484 
   2485 
   2486 static void TestOptimize(void) {
   2487   /* this is not really a test - just trying out
   2488    * whether copying of UCA contents will fail
   2489    * Cannot really test, since the functionality
   2490    * remains the same.
   2491    */
   2492   static const struct {
   2493     const char *rules;
   2494     const char *data[10];
   2495     const uint32_t len;
   2496   } tests[] = {
   2497     /* - all befores here amount to zero */
   2498     { "[optimize [\\uAC00-\\uD7FF]]",
   2499     { "a", "b"}, 2}
   2500   };
   2501   uint32_t i;
   2502 
   2503   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   2504     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   2505   }
   2506 }
   2507 
   2508 /*
   2509 cycheng (at) ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
   2510 weiv    ucol_strcollIter?
   2511 cycheng (at) ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
   2512 weiv    these are the input strings?
   2513 cycheng (at) ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
   2514 weiv    will check - could be a problem with utf-8 iterator
   2515 cycheng (at) ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
   2516 weiv    hmmm
   2517 cycheng (at) ca.ibm.c... note that we have a standalone high surrogate
   2518 weiv    that doesn't sound right
   2519 cycheng (at) ca.ibm.c... we got the same inconsistent results on AIX and Win2000
   2520 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
   2521 cycheng (at) ca.ibm.c... yes
   2522 weiv    and then do the comparison
   2523 cycheng (at) ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
   2524 weiv    utf-16 strings look like a little endian ones in the example you sent me
   2525 weiv    It could be a bug - let me try to test it out
   2526 cycheng (at) ca.ibm.c... ok
   2527 cycheng (at) ca.ibm.c... we can wait till the conf. call
   2528 cycheng (at) ca.ibm.c... next weke
   2529 weiv    that would be great
   2530 weiv    hmmm
   2531 weiv    I might be wrong
   2532 weiv    let me play with it some more
   2533 cycheng (at) ca.ibm.c... ok
   2534 cycheng (at) ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
   2535 cycheng (at) ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
   2536 cycheng (at) ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
   2537 weiv    ok
   2538 cycheng (at) ca.ibm.c... i ask sherman to send you more inconsistent data
   2539 weiv    thanks
   2540 cycheng (at) ca.ibm.c... the 4 strings we sent are just samples
   2541 */
   2542 #if 0
   2543 static void Alexis(void) {
   2544   UErrorCode status = U_ZERO_ERROR;
   2545   UCollator *coll = ucol_open("", &status);
   2546 
   2547 
   2548   const char utf16be[2][4] = {
   2549     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
   2550     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
   2551   };
   2552 
   2553   const char utf8[2][4] = {
   2554     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
   2555     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
   2556   };
   2557 
   2558   UCharIterator iterU161, iterU162;
   2559   UCharIterator iterU81, iterU82;
   2560 
   2561   UCollationResult resU16, resU8;
   2562 
   2563   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
   2564   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
   2565 
   2566   uiter_setUTF8(&iterU81, utf8[0], 4);
   2567   uiter_setUTF8(&iterU82, utf8[1], 4);
   2568 
   2569   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   2570 
   2571   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
   2572   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
   2573 
   2574 
   2575   if(resU16 != resU8) {
   2576     log_err("different results\n");
   2577   }
   2578 
   2579   ucol_close(coll);
   2580 }
   2581 #endif
   2582 
   2583 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
   2584 static void Alexis2(void) {
   2585   UErrorCode status = U_ZERO_ERROR;
   2586   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   2587   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   2588   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   2589   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
   2590 
   2591   UConverter *conv = NULL;
   2592 
   2593   UCharIterator U16BEItS, U16BEItT;
   2594   UCharIterator U8ItS, U8ItT;
   2595 
   2596   UCollationResult resU16, resU16BE, resU8;
   2597 
   2598   static const char* const pairs[][2] = {
   2599     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
   2600     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
   2601     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
   2602     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
   2603     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
   2604     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
   2605     { "\\u0020", "\\u0020\\u0000"}
   2606 /*
   2607 5F20 (my result here)
   2608 5F204E008E3F
   2609 5F20 (your result here)
   2610 */
   2611   };
   2612 
   2613   int32_t i = 0;
   2614 
   2615   UCollator *coll = ucol_open("", &status);
   2616   if(status == U_FILE_ACCESS_ERROR) {
   2617     log_data_err("Is your data around?\n");
   2618     return;
   2619   } else if(U_FAILURE(status)) {
   2620     log_err("Error opening collator\n");
   2621     return;
   2622   }
   2623   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   2624   conv = ucnv_open("UTF16BE", &status);
   2625   for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
   2626     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   2627     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   2628 
   2629     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
   2630 
   2631     log_verbose("Result of strcoll is %i\n", resU16);
   2632 
   2633     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
   2634     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
   2635     (void)U16BELenS;    /* Suppress set but not used warnings. */
   2636     (void)U16BELenT;
   2637 
   2638     /* use the original sizes, as the result from converter is in bytes */
   2639     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
   2640     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
   2641 
   2642     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
   2643 
   2644     log_verbose("Result of U16BE is %i\n", resU16BE);
   2645 
   2646     if(resU16 != resU16BE) {
   2647       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
   2648     }
   2649 
   2650     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
   2651     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
   2652 
   2653     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
   2654     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
   2655 
   2656     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
   2657 
   2658     if(resU16 != resU8) {
   2659       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
   2660     }
   2661 
   2662   }
   2663 
   2664   ucol_close(coll);
   2665   ucnv_close(conv);
   2666 }
   2667 
   2668 static void TestHebrewUCA(void) {
   2669   UErrorCode status = U_ZERO_ERROR;
   2670   static const char *first[] = {
   2671     "d790d6b8d79cd795d6bcd7a9",
   2672     "d790d79cd79ed7a7d799d799d7a1",
   2673     "d790d6b4d79ed795d6bcd7a9",
   2674   };
   2675 
   2676   char utf8String[3][256];
   2677   UChar utf16String[3][256];
   2678 
   2679   int32_t i = 0, j = 0;
   2680   int32_t sizeUTF8[3];
   2681   int32_t sizeUTF16[3];
   2682 
   2683   UCollator *coll = ucol_open("", &status);
   2684   if (U_FAILURE(status)) {
   2685       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
   2686       return;
   2687   }
   2688   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
   2689 
   2690   for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
   2691     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
   2692     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
   2693     log_verbose("%i: ");
   2694     for(j = 0; j < sizeUTF16[i]; j++) {
   2695       /*log_verbose("\\u%04X", utf16String[i][j]);*/
   2696       log_verbose("%04X", utf16String[i][j]);
   2697     }
   2698     log_verbose("\n");
   2699   }
   2700   for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
   2701     for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
   2702       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
   2703     }
   2704   }
   2705 
   2706   ucol_close(coll);
   2707 
   2708 }
   2709 
   2710 static void TestPartialSortKeyTermination(void) {
   2711   static const char* cases[] = {
   2712     "\\u1234\\u1234\\udc00",
   2713     "\\udc00\\ud800\\ud800"
   2714   };
   2715 
   2716   int32_t i;
   2717 
   2718   UErrorCode status = U_ZERO_ERROR;
   2719 
   2720   UCollator *coll = ucol_open("", &status);
   2721 
   2722   UCharIterator iter;
   2723 
   2724   UChar currCase[256];
   2725   int32_t length = 0;
   2726   int32_t pKeyLen = 0;
   2727 
   2728   uint8_t key[256];
   2729 
   2730   for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
   2731     uint32_t state[2] = {0, 0};
   2732     length = u_unescape(cases[i], currCase, 256);
   2733     uiter_setString(&iter, currCase, length);
   2734     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
   2735     (void)pKeyLen;   /* Suppress set but not used warning. */
   2736 
   2737     log_verbose("Done\n");
   2738 
   2739   }
   2740   ucol_close(coll);
   2741 }
   2742 
   2743 static void TestSettings(void) {
   2744   static const char* cases[] = {
   2745     "apple",
   2746       "Apple"
   2747   };
   2748 
   2749   static const char* locales[] = {
   2750     "",
   2751       "en"
   2752   };
   2753 
   2754   UErrorCode status = U_ZERO_ERROR;
   2755 
   2756   int32_t i = 0, j = 0;
   2757 
   2758   UChar source[256], target[256];
   2759   int32_t sLen = 0, tLen = 0;
   2760 
   2761   UCollator *collateObject = NULL;
   2762   for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
   2763     collateObject = ucol_open(locales[i], &status);
   2764     ucol_setStrength(collateObject, UCOL_PRIMARY);
   2765     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
   2766     for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
   2767       sLen = u_unescape(cases[j-1], source, 256);
   2768       source[sLen] = 0;
   2769       tLen = u_unescape(cases[j], target, 256);
   2770       source[tLen] = 0;
   2771       doTest(collateObject, source, target, UCOL_EQUAL);
   2772     }
   2773     ucol_close(collateObject);
   2774   }
   2775 }
   2776 
   2777 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
   2778     UErrorCode status = U_ZERO_ERROR;
   2779     int32_t errorNo = 0;
   2780     const UChar *sourceRules = NULL;
   2781     int32_t sourceRulesLen = 0;
   2782     UParseError parseError;
   2783     UColAttributeValue french = UCOL_OFF;
   2784 
   2785     if(!ucol_equals(source, target)) {
   2786         log_err("Same collators, different address not equal\n");
   2787         errorNo++;
   2788     }
   2789     ucol_close(target);
   2790     if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
   2791         target = ucol_safeClone(source, NULL, NULL, &status);
   2792         if(U_FAILURE(status)) {
   2793             log_err("Error creating clone\n");
   2794             errorNo++;
   2795             return errorNo;
   2796         }
   2797         if(!ucol_equals(source, target)) {
   2798             log_err("Collator different from it's clone\n");
   2799             errorNo++;
   2800         }
   2801         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
   2802         if(french == UCOL_ON) {
   2803             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
   2804         } else {
   2805             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
   2806         }
   2807         if(U_FAILURE(status)) {
   2808             log_err("Error setting attributes\n");
   2809             errorNo++;
   2810             return errorNo;
   2811         }
   2812         if(ucol_equals(source, target)) {
   2813             log_err("Collators same even when options changed\n");
   2814             errorNo++;
   2815         }
   2816         ucol_close(target);
   2817 
   2818         sourceRules = ucol_getRules(source, &sourceRulesLen);
   2819         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   2820         if(U_FAILURE(status)) {
   2821             log_err("Error instantiating target from rules - %s\n", u_errorName(status));
   2822             errorNo++;
   2823             return errorNo;
   2824         }
   2825         /* Note: The tailoring rule string is an optional data item. */
   2826         if(!ucol_equals(source, target) && sourceRulesLen != 0) {
   2827             log_err("Collator different from collator that was created from the same rules\n");
   2828             errorNo++;
   2829         }
   2830         ucol_close(target);
   2831     }
   2832     return errorNo;
   2833 }
   2834 
   2835 
   2836 static void TestEquals(void) {
   2837     /* ucol_equals is not currently a public API. There is a chance that it will become
   2838     * something like this.
   2839     */
   2840     /* test whether the two collators instantiated from the same locale are equal */
   2841     UErrorCode status = U_ZERO_ERROR;
   2842     UParseError parseError;
   2843     int32_t noOfLoc = uloc_countAvailable();
   2844     const char *locName = NULL;
   2845     UCollator *source = NULL, *target = NULL;
   2846     int32_t i = 0;
   2847 
   2848     const char* rules[] = {
   2849         "&l < lj <<< Lj <<< LJ",
   2850         "&n < nj <<< Nj <<< NJ",
   2851         "&ae <<< \\u00e4",
   2852         "&AE <<< \\u00c4"
   2853     };
   2854     /*
   2855     const char* badRules[] = {
   2856     "&l <<< Lj",
   2857     "&n < nj <<< nJ <<< NJ",
   2858     "&a <<< \\u00e4",
   2859     "&AE <<< \\u00c4 <<< x"
   2860     };
   2861     */
   2862 
   2863     UChar sourceRules[1024], targetRules[1024];
   2864     int32_t sourceRulesSize = 0, targetRulesSize = 0;
   2865     int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
   2866 
   2867     for(i = 0; i < rulesSize; i++) {
   2868         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
   2869         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
   2870     }
   2871 
   2872     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   2873     if(status == U_FILE_ACCESS_ERROR) {
   2874         log_data_err("Is your data around?\n");
   2875         return;
   2876     } else if(U_FAILURE(status)) {
   2877         log_err("Error opening collator\n");
   2878         return;
   2879     }
   2880     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   2881     if(!ucol_equals(source, target)) {
   2882         log_err("Equivalent collators not equal!\n");
   2883     }
   2884     ucol_close(source);
   2885     ucol_close(target);
   2886 
   2887     source = ucol_open("root", &status);
   2888     target = ucol_open("root", &status);
   2889     log_verbose("Testing root\n");
   2890     if(!ucol_equals(source, source)) {
   2891         log_err("Same collator not equal\n");
   2892     }
   2893     if(TestEqualsForCollator("root", source, target)) {
   2894         log_err("Errors for root\n");
   2895     }
   2896     ucol_close(source);
   2897 
   2898     for(i = 0; i<noOfLoc; i++) {
   2899         status = U_ZERO_ERROR;
   2900         locName = uloc_getAvailable(i);
   2901         /*if(hasCollationElements(locName)) {*/
   2902         log_verbose("Testing equality for locale %s\n", locName);
   2903         source = ucol_open(locName, &status);
   2904         target = ucol_open(locName, &status);
   2905         if (U_FAILURE(status)) {
   2906             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
   2907             continue;
   2908         }
   2909         if(TestEqualsForCollator(locName, source, target)) {
   2910             log_err("Errors for locale %s\n", locName);
   2911         }
   2912         ucol_close(source);
   2913         /*}*/
   2914     }
   2915 }
   2916 
   2917 static void TestJ2726(void) {
   2918     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
   2919     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
   2920     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
   2921     UErrorCode status = U_ZERO_ERROR;
   2922     UCollator *coll = ucol_open("en", &status);
   2923     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   2924     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   2925     doTest(coll, a, aSpace, UCOL_EQUAL);
   2926     doTest(coll, aSpace, a, UCOL_EQUAL);
   2927     doTest(coll, a, spaceA, UCOL_EQUAL);
   2928     doTest(coll, spaceA, a, UCOL_EQUAL);
   2929     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
   2930     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
   2931     ucol_close(coll);
   2932 }
   2933 
   2934 static void NullRule(void) {
   2935     UChar r[3] = {0};
   2936     UErrorCode status = U_ZERO_ERROR;
   2937     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2938     if(U_SUCCESS(status)) {
   2939         log_err("This should have been an error!\n");
   2940         ucol_close(coll);
   2941     } else {
   2942         status = U_ZERO_ERROR;
   2943     }
   2944     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2945     if(U_FAILURE(status)) {
   2946         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
   2947     } else {
   2948         ucol_close(coll);
   2949     }
   2950 }
   2951 
   2952 /**
   2953  * Test for CollationElementIterator previous and next for the whole set of
   2954  * unicode characters with normalization on.
   2955  */
   2956 static void TestNumericCollation(void)
   2957 {
   2958     UErrorCode status = U_ZERO_ERROR;
   2959 
   2960     const static char *basicTestStrings[]={
   2961     "hello1",
   2962     "hello2",
   2963     "hello2002",
   2964     "hello2003",
   2965     "hello123456",
   2966     "hello1234567",
   2967     "hello10000000",
   2968     "hello100000000",
   2969     "hello1000000000",
   2970     "hello10000000000",
   2971     };
   2972 
   2973     const static char *preZeroTestStrings[]={
   2974     "avery10000",
   2975     "avery010000",
   2976     "avery0010000",
   2977     "avery00010000",
   2978     "avery000010000",
   2979     "avery0000010000",
   2980     "avery00000010000",
   2981     "avery000000010000",
   2982     };
   2983 
   2984     const static char *thirtyTwoBitNumericStrings[]={
   2985     "avery42949672960",
   2986     "avery42949672961",
   2987     "avery42949672962",
   2988     "avery429496729610"
   2989     };
   2990 
   2991      const static char *longNumericStrings[]={
   2992      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
   2993         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
   2994         are treated as multiple collation elements. */
   2995     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
   2996     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
   2997     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
   2998     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
   2999     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
   3000     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
   3001     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
   3002     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
   3003     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
   3004     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
   3005     };
   3006 
   3007     const static char *supplementaryDigits[] = {
   3008       "\\uD835\\uDFCE", /* 0 */
   3009       "\\uD835\\uDFCF", /* 1 */
   3010       "\\uD835\\uDFD0", /* 2 */
   3011       "\\uD835\\uDFD1", /* 3 */
   3012       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
   3013       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
   3014       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
   3015       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
   3016       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
   3017       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
   3018     };
   3019 
   3020     const static char *foreignDigits[] = {
   3021       "\\u0661",
   3022         "\\u0662",
   3023         "\\u0663",
   3024       "\\u0661\\u0660",
   3025       "\\u0661\\u0662",
   3026       "\\u0661\\u0663",
   3027       "\\u0662\\u0660",
   3028       "\\u0662\\u0662",
   3029       "\\u0662\\u0663",
   3030       "\\u0663\\u0660",
   3031       "\\u0663\\u0662",
   3032       "\\u0663\\u0663"
   3033     };
   3034 
   3035     const static char *evenZeroes[] = {
   3036       "2000",
   3037       "2001",
   3038         "2002",
   3039         "2003"
   3040     };
   3041 
   3042     UColAttribute att = UCOL_NUMERIC_COLLATION;
   3043     UColAttributeValue val = UCOL_ON;
   3044 
   3045     /* Open our collator. */
   3046     UCollator* coll = ucol_open("root", &status);
   3047     if (U_FAILURE(status)){
   3048         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
   3049               myErrorName(status));
   3050         return;
   3051     }
   3052     genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
   3053     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
   3054     genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
   3055     genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
   3056     genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
   3057     genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
   3058 
   3059     /* Setting up our collator to do digits. */
   3060     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   3061     if (U_FAILURE(status)){
   3062         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
   3063               myErrorName(status));
   3064         return;
   3065     }
   3066 
   3067     /*
   3068        Testing that prepended zeroes still yield the correct collation behavior.
   3069        We expect that every element in our strings array will be equal.
   3070     */
   3071     genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
   3072 
   3073     ucol_close(coll);
   3074 }
   3075 
   3076 static void TestTibetanConformance(void)
   3077 {
   3078     const char* test[] = {
   3079         "\\u0FB2\\u0591\\u0F71\\u0061",
   3080         "\\u0FB2\\u0F71\\u0061"
   3081     };
   3082 
   3083     UErrorCode status = U_ZERO_ERROR;
   3084     UCollator *coll = ucol_open("", &status);
   3085     UChar source[100];
   3086     UChar target[100];
   3087     int result;
   3088     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3089     if (U_SUCCESS(status)) {
   3090         u_unescape(test[0], source, 100);
   3091         u_unescape(test[1], target, 100);
   3092         doTest(coll, source, target, UCOL_EQUAL);
   3093         result = ucol_strcoll(coll, source, -1,   target, -1);
   3094         log_verbose("result %d\n", result);
   3095         if (UCOL_EQUAL != result) {
   3096             log_err("Tibetan comparison error\n");
   3097         }
   3098     }
   3099     ucol_close(coll);
   3100 
   3101     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
   3102 }
   3103 
   3104 static void TestPinyinProblem(void) {
   3105     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
   3106     genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
   3107 }
   3108 
   3109 /**
   3110  * Iterate through the given iterator, checking to see that all the strings
   3111  * in the expected array are present.
   3112  * @param expected array of strings we expect to see, or NULL
   3113  * @param expectedCount number of elements of expected, or 0
   3114  */
   3115 static int32_t checkUEnumeration(const char* msg,
   3116                                  UEnumeration* iter,
   3117                                  const char** expected,
   3118                                  int32_t expectedCount) {
   3119     UErrorCode ec = U_ZERO_ERROR;
   3120     int32_t i = 0, n, j, bit;
   3121     int32_t seenMask = 0;
   3122 
   3123     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
   3124     n = uenum_count(iter, &ec);
   3125     if (!assertSuccess("count", &ec)) return -1;
   3126     log_verbose("%s = [", msg);
   3127     for (;; ++i) {
   3128         const char* s = uenum_next(iter, NULL, &ec);
   3129         if (!assertSuccess("snext", &ec) || s == NULL) break;
   3130         if (i != 0) log_verbose(",");
   3131         log_verbose("%s", s);
   3132         /* check expected list */
   3133         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   3134             if ((seenMask&bit) == 0 &&
   3135                 uprv_strcmp(s, expected[j]) == 0) {
   3136                 seenMask |= bit;
   3137                 break;
   3138             }
   3139         }
   3140     }
   3141     log_verbose("] (%d)\n", i);
   3142     assertTrue("count verified", i==n);
   3143     /* did we see all expected strings? */
   3144     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   3145         if ((seenMask&bit)!=0) {
   3146             log_verbose("Ok: \"%s\" seen\n", expected[j]);
   3147         } else {
   3148             log_err("FAIL: \"%s\" not seen\n", expected[j]);
   3149         }
   3150     }
   3151     return n;
   3152 }
   3153 
   3154 /**
   3155  * Test new API added for separate collation tree.
   3156  */
   3157 static void TestSeparateTrees(void) {
   3158     UErrorCode ec = U_ZERO_ERROR;
   3159     UEnumeration *e = NULL;
   3160     int32_t n = -1;
   3161     UBool isAvailable;
   3162     char loc[256];
   3163 
   3164     static const char* AVAIL[] = { "en", "de" };
   3165 
   3166     static const char* KW[] = { "collation" };
   3167 
   3168     static const char* KWVAL[] = { "phonebook", "stroke" };
   3169 
   3170 #if !UCONFIG_NO_SERVICE
   3171     e = ucol_openAvailableLocales(&ec);
   3172     if (e != NULL) {
   3173         assertSuccess("ucol_openAvailableLocales", &ec);
   3174         assertTrue("ucol_openAvailableLocales!=0", e!=0);
   3175         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
   3176         (void)n;    /* Suppress set but not used warnings. */
   3177         /* Don't need to check n because we check list */
   3178         uenum_close(e);
   3179     } else {
   3180         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
   3181     }
   3182 #endif
   3183 
   3184     e = ucol_getKeywords(&ec);
   3185     if (e != NULL) {
   3186         assertSuccess("ucol_getKeywords", &ec);
   3187         assertTrue("ucol_getKeywords!=0", e!=0);
   3188         n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
   3189         /* Don't need to check n because we check list */
   3190         uenum_close(e);
   3191     } else {
   3192         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
   3193     }
   3194 
   3195     e = ucol_getKeywordValues(KW[0], &ec);
   3196     if (e != NULL) {
   3197         assertSuccess("ucol_getKeywordValues", &ec);
   3198         assertTrue("ucol_getKeywordValues!=0", e!=0);
   3199         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
   3200         /* Don't need to check n because we check list */
   3201         uenum_close(e);
   3202     } else {
   3203         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
   3204     }
   3205 
   3206     /* Try setting a warning before calling ucol_getKeywordValues */
   3207     ec = U_USING_FALLBACK_WARNING;
   3208     e = ucol_getKeywordValues(KW[0], &ec);
   3209     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
   3210         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
   3211         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
   3212         /* Don't need to check n because we check list */
   3213         uenum_close(e);
   3214     }
   3215 
   3216     /*
   3217 U_DRAFT int32_t U_EXPORT2
   3218 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
   3219                              const char* locale, UBool* isAvailable,
   3220                              UErrorCode* status);
   3221 }
   3222 */
   3223     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
   3224                                      &isAvailable, &ec);
   3225     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   3226         assertEquals("getFunctionalEquivalent(de)", "root", loc);
   3227         assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
   3228                    isAvailable == TRUE);
   3229     }
   3230 
   3231     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
   3232                                      &isAvailable, &ec);
   3233     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   3234         assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
   3235         assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
   3236                    isAvailable == FALSE);
   3237     }
   3238 }
   3239 
   3240 /* supercedes TestJ784 */
   3241 static void TestBeforePinyin(void) {
   3242     const static char rules[] = {
   3243         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
   3244         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
   3245         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
   3246         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
   3247         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
   3248         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
   3249     };
   3250 
   3251     const static char *test[] = {
   3252         "l\\u0101",
   3253         "la",
   3254         "l\\u0101n",
   3255         "lan ",
   3256         "l\\u0113",
   3257         "le",
   3258         "l\\u0113n",
   3259         "len"
   3260     };
   3261 
   3262     const static char *test2[] = {
   3263         "x\\u0101",
   3264         "x\\u0100",
   3265         "X\\u0101",
   3266         "X\\u0100",
   3267         "x\\u00E1",
   3268         "x\\u00C1",
   3269         "X\\u00E1",
   3270         "X\\u00C1",
   3271         "x\\u01CE",
   3272         "x\\u01CD",
   3273         "X\\u01CE",
   3274         "X\\u01CD",
   3275         "x\\u00E0",
   3276         "x\\u00C0",
   3277         "X\\u00E0",
   3278         "X\\u00C0",
   3279         "xa",
   3280         "xA",
   3281         "Xa",
   3282         "XA",
   3283         "x\\u0101x",
   3284         "x\\u0100x",
   3285         "x\\u00E1x",
   3286         "x\\u00C1x",
   3287         "x\\u01CEx",
   3288         "x\\u01CDx",
   3289         "x\\u00E0x",
   3290         "x\\u00C0x",
   3291         "xax",
   3292         "xAx"
   3293     };
   3294 
   3295     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
   3296     genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
   3297     genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
   3298     genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
   3299 }
   3300 
   3301 static void TestBeforeTightening(void) {
   3302     static const struct {
   3303         const char *rules;
   3304         UErrorCode expectedStatus;
   3305     } tests[] = {
   3306         { "&[before 1]a<x", U_ZERO_ERROR },
   3307         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
   3308         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
   3309         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
   3310         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
   3311         { "&[before 2]a<<x",U_ZERO_ERROR },
   3312         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
   3313         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
   3314         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
   3315         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
   3316         { "&[before 3]a<<<x",U_ZERO_ERROR },
   3317         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
   3318         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
   3319     };
   3320 
   3321     int32_t i = 0;
   3322 
   3323     UErrorCode status = U_ZERO_ERROR;
   3324     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   3325     uint32_t rlen = 0;
   3326 
   3327     UCollator *coll = NULL;
   3328 
   3329 
   3330     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   3331         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
   3332         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   3333         if(status != tests[i].expectedStatus) {
   3334             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
   3335                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
   3336         }
   3337         ucol_close(coll);
   3338         status = U_ZERO_ERROR;
   3339     }
   3340 
   3341 }
   3342 
   3343 /*
   3344 &m < a
   3345 &[before 1] a < x <<< X << q <<< Q < z
   3346 assert: m <<< M < x <<< X << q <<< Q < z < a < n
   3347 
   3348 &m < a
   3349 &[before 2] a << x <<< X << q <<< Q < z
   3350 assert: m <<< M < x <<< X << q <<< Q << a < z < n
   3351 
   3352 &m < a
   3353 &[before 3] a <<< x <<< X << q <<< Q < z
   3354 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
   3355 
   3356 
   3357 &m << a
   3358 &[before 1] a < x <<< X << q <<< Q < z
   3359 assert: x <<< X << q <<< Q < z < m <<< M << a < n
   3360 
   3361 &m << a
   3362 &[before 2] a << x <<< X << q <<< Q < z
   3363 assert: m <<< M << x <<< X << q <<< Q << a < z < n
   3364 
   3365 &m << a
   3366 &[before 3] a <<< x <<< X << q <<< Q < z
   3367 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
   3368 
   3369 
   3370 &m <<< a
   3371 &[before 1] a < x <<< X << q <<< Q < z
   3372 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
   3373 
   3374 &m <<< a
   3375 &[before 2] a << x <<< X << q <<< Q < z
   3376 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
   3377 
   3378 &m <<< a
   3379 &[before 3] a <<< x <<< X << q <<< Q < z
   3380 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
   3381 
   3382 
   3383 &[before 1] s < x <<< X << q <<< Q < z
   3384 assert: r <<< R < x <<< X << q <<< Q < z < s < n
   3385 
   3386 &[before 2] s << x <<< X << q <<< Q < z
   3387 assert: r <<< R < x <<< X << q <<< Q << s < z < n
   3388 
   3389 &[before 3] s <<< x <<< X << q <<< Q < z
   3390 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
   3391 
   3392 
   3393 &[before 1] \u24DC < x <<< X << q <<< Q < z
   3394 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
   3395 
   3396 &[before 2] \u24DC << x <<< X << q <<< Q < z
   3397 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
   3398 
   3399 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
   3400 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
   3401 */
   3402 
   3403 
   3404 #if 0
   3405 /* requires features not yet supported */
   3406 static void TestMoreBefore(void) {
   3407     static const struct {
   3408         const char* rules;
   3409         const char* order[16];
   3410         int32_t size;
   3411     } tests[] = {
   3412         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
   3413         { "m","M","x","X","q","Q","z","a","n" }, 9},
   3414         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
   3415         { "m","M","x","X","q","Q","a","z","n" }, 9},
   3416         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
   3417         { "m","M","x","X","a","q","Q","z","n" }, 9},
   3418         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
   3419         { "x","X","q","Q","z","m","M","a","n" }, 9},
   3420         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
   3421         { "m","M","x","X","q","Q","a","z","n" }, 9},
   3422         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
   3423         { "m","M","x","X","a","q","Q","z","n" }, 9},
   3424         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
   3425         { "x","X","q","Q","z","n","m","a","M" }, 9},
   3426         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
   3427         { "x","X","q","Q","m","a","M","z","n" }, 9},
   3428         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
   3429         { "m","x","X","a","M","q","Q","z","n" }, 9},
   3430         { "&[before 1] s < x <<< X << q <<< Q < z",
   3431         { "r","R","x","X","q","Q","z","s","n" }, 9},
   3432         { "&[before 2] s << x <<< X << q <<< Q < z",
   3433         { "r","R","x","X","q","Q","s","z","n" }, 9},
   3434         { "&[before 3] s <<< x <<< X << q <<< Q < z",
   3435         { "r","R","x","X","s","q","Q","z","n" }, 9},
   3436         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
   3437         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
   3438         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
   3439         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
   3440         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
   3441         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
   3442     };
   3443 
   3444     int32_t i = 0;
   3445 
   3446     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   3447         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
   3448     }
   3449 }
   3450 #endif
   3451 
   3452 static void TestTailorNULL( void ) {
   3453     const static char* rule = "&a <<< '\\u0000'";
   3454     UErrorCode status = U_ZERO_ERROR;
   3455     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   3456     uint32_t rlen = 0;
   3457     UChar a = 1, null = 0;
   3458     UCollationResult res = UCOL_EQUAL;
   3459 
   3460     UCollator *coll = NULL;
   3461 
   3462 
   3463     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
   3464     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   3465 
   3466     if(U_FAILURE(status)) {
   3467         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
   3468     } else {
   3469         res = ucol_strcoll(coll, &a, 1, &null, 1);
   3470 
   3471         if(res != UCOL_LESS) {
   3472             log_err("NULL was not tailored properly!\n");
   3473         }
   3474     }
   3475 
   3476     ucol_close(coll);
   3477 }
   3478 
   3479 static void
   3480 TestUpperFirstQuaternary(void)
   3481 {
   3482   const char* tests[] = { "B", "b", "Bb", "bB" };
   3483   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
   3484   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
   3485   genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
   3486 }
   3487 
   3488 static void
   3489 TestJ4960(void)
   3490 {
   3491   const char* tests[] = { "\\u00e2T", "aT" };
   3492   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
   3493   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
   3494   const char* tests2[] = { "a", "A" };
   3495   const char* rule = "&[first tertiary ignorable]=A=a";
   3496   UColAttribute att2[] = { UCOL_CASE_LEVEL };
   3497   UColAttributeValue attVals2[] = { UCOL_ON };
   3498   /* Test whether we correctly ignore primary ignorables on case level when */
   3499   /* we have only primary & case level */
   3500   genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
   3501   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
   3502   /* and case level */
   3503   genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
   3504   /* Test whether completely ignorable letters have case level info (they shouldn't) */
   3505   genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
   3506 }
   3507 
   3508 static void
   3509 TestJ5223(void)
   3510 {
   3511   static const char *test = "this is a test string";
   3512   UChar ustr[256];
   3513   int32_t ustr_length = u_unescape(test, ustr, 256);
   3514   unsigned char sortkey[256];
   3515   int32_t sortkey_length;
   3516   UErrorCode status = U_ZERO_ERROR;
   3517   static UCollator *coll = NULL;
   3518   coll = ucol_open("root", &status);
   3519   if(U_FAILURE(status)) {
   3520     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   3521     return;
   3522   }
   3523   ucol_setStrength(coll, UCOL_PRIMARY);
   3524   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   3525   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3526   if (U_FAILURE(status)) {
   3527     log_err("Failed setting atributes\n");
   3528     return;
   3529   }
   3530   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
   3531   if (sortkey_length > 256) return;
   3532 
   3533   /* we mark the position where the null byte should be written in advance */
   3534   sortkey[sortkey_length-1] = 0xAA;
   3535 
   3536   /* we set the buffer size one byte higher than needed */
   3537   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
   3538     sortkey_length+1);
   3539 
   3540   /* no error occurs (for me) */
   3541   if (sortkey[sortkey_length-1] == 0xAA) {
   3542     log_err("Hit bug at first try\n");
   3543   }
   3544 
   3545   /* we mark the position where the null byte should be written again */
   3546   sortkey[sortkey_length-1] = 0xAA;
   3547 
   3548   /* this time we set the buffer size to the exact amount needed */
   3549   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
   3550     sortkey_length);
   3551 
   3552   /* now the trailing null byte is not written */
   3553   if (sortkey[sortkey_length-1] == 0xAA) {
   3554     log_err("Hit bug at second try\n");
   3555   }
   3556 
   3557   ucol_close(coll);
   3558 }
   3559 
   3560 /* Regression test for Thai partial sort key problem */
   3561 static void
   3562 TestJ5232(void)
   3563 {
   3564     const static char *test[] = {
   3565         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
   3566         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
   3567     };
   3568 
   3569     genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
   3570 }
   3571 
   3572 static void
   3573 TestJ5367(void)
   3574 {
   3575     const static char *test[] = { "a", "y" };
   3576     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
   3577     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
   3578 }
   3579 
   3580 static void
   3581 TestVI5913(void)
   3582 {
   3583     UErrorCode status = U_ZERO_ERROR;
   3584     int32_t i, j;
   3585     UCollator *coll =NULL;
   3586     uint8_t  resColl[100], expColl[100];
   3587     int32_t  rLen, tLen, ruleLen, sLen, kLen;
   3588     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &b<0x1FF3-omega with Ypogegrammeni*/
   3589     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
   3590     /*
   3591      * Note: Just tailoring &z<ae^ does not work as expected:
   3592      * The UCA spec requires for discontiguous contractions that they
   3593      * extend an *existing match* by one combining mark at a time.
   3594      * Therefore, ae must be a contraction so that the builder finds
   3595      * discontiguous contractions for ae^, for example with an intervening underdot.
   3596      * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
   3597      */
   3598     UChar rule3[256]={
   3599         0x26, 0x78, 0x3c, 0x61, 0x65,      /* &x<ae */
   3600         0x26, 0x7a, 0x3c, 0x0061, 0x00ea,  /* &z<a+e with circumflex.*/
   3601         0};
   3602     static const UChar tData[][20]={
   3603         {0x1EAC, 0},
   3604         {0x0041, 0x0323, 0x0302, 0},
   3605         {0x1EA0, 0x0302, 0},
   3606         {0x00C2, 0x0323, 0},
   3607         {0x1ED8, 0},  /* O with dot and circumflex */
   3608         {0x1ECC, 0x0302, 0},
   3609         {0x1EB7, 0},
   3610         {0x1EA1, 0x0306, 0},
   3611     };
   3612     static const UChar tailorData[][20]={
   3613         {0x1FA2, 0},  /* Omega with 3 combining marks */
   3614         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
   3615         {0x1FF3, 0x0313, 0x0300, 0},
   3616         {0x1F60, 0x0300, 0x0345, 0},
   3617         {0x1F62, 0x0345, 0},
   3618         {0x1FA0, 0x0300, 0},
   3619     };
   3620     static const UChar tailorData2[][20]={
   3621         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
   3622         {0x0073, 0x0323, 0x030C, 0},
   3623         {0x0073, 0x030C, 0x0323, 0},
   3624     };
   3625     static const UChar tailorData3[][20]={
   3626         {0x007a, 0},  /*  z */
   3627         {0x0061, 0x0065, 0},  /*  a + e */
   3628         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
   3629         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
   3630         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
   3631         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
   3632         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
   3633         {0x00EA, 0},  /* e with circumflex  */
   3634     };
   3635 
   3636     /* Test Vietnamese sort. */
   3637     coll = ucol_open("vi", &status);
   3638     if(U_FAILURE(status)) {
   3639         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
   3640         return;
   3641     }
   3642     log_verbose("\n\nVI collation:");
   3643     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
   3644         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
   3645     }
   3646     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
   3647         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
   3648     }
   3649     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
   3650         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
   3651     }
   3652     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
   3653         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
   3654     }
   3655 
   3656     for (j=0; j<8; j++) {
   3657         tLen = u_strlen(tData[j]);
   3658         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
   3659         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
   3660         for(i = 0; i<rLen; i++) {
   3661             log_verbose(" %02X", resColl[i]);
   3662         }
   3663     }
   3664 
   3665     ucol_close(coll);
   3666 
   3667     /* Test Romanian sort. */
   3668     coll = ucol_open("ro", &status);
   3669     log_verbose("\n\nRO collation:");
   3670     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
   3671         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
   3672     }
   3673     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
   3674         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
   3675     }
   3676     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
   3677         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
   3678     }
   3679 
   3680     for (j=4; j<8; j++) {
   3681         tLen = u_strlen(tData[j]);
   3682         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
   3683         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
   3684         for(i = 0; i<rLen; i++) {
   3685             log_verbose(" %02X", resColl[i]);
   3686         }
   3687     }
   3688     ucol_close(coll);
   3689 
   3690     /* Test the precomposed Greek character with 3 combining marks. */
   3691     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
   3692     ruleLen = u_strlen(rule);
   3693     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3694     if (U_FAILURE(status)) {
   3695         log_err("ucol_openRules failed with %s\n", u_errorName(status));
   3696         return;
   3697     }
   3698     sLen = u_strlen(tailorData[0]);
   3699     for (j=1; j<6; j++) {
   3700         tLen = u_strlen(tailorData[j]);
   3701         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
   3702             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
   3703         }
   3704     }
   3705     /* Test getSortKey. */
   3706     tLen = u_strlen(tailorData[0]);
   3707     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
   3708     for (j=0; j<6; j++) {
   3709         tLen = u_strlen(tailorData[j]);
   3710         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
   3711         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   3712             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   3713             for(i = 0; i<rLen; i++) {
   3714                 log_err(" %02X", resColl[i]);
   3715             }
   3716         }
   3717     }
   3718     ucol_close(coll);
   3719 
   3720     log_verbose("\n\nTailoring test for s with caron:");
   3721     ruleLen = u_strlen(rule2);
   3722     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3723     tLen = u_strlen(tailorData2[0]);
   3724     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
   3725     for (j=1; j<3; j++) {
   3726         tLen = u_strlen(tailorData2[j]);
   3727         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
   3728         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   3729             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   3730             for(i = 0; i<rLen; i++) {
   3731                 log_err(" %02X", resColl[i]);
   3732             }
   3733         }
   3734     }
   3735     ucol_close(coll);
   3736 
   3737     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
   3738     ruleLen = u_strlen(rule3);
   3739     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3740     tLen = u_strlen(tailorData3[3]);
   3741     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
   3742     log_verbose("\n Test Data[3] :%s  \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
   3743     for(i = 0; i<kLen; i++) {
   3744         log_verbose(" %02X", expColl[i]);
   3745     }
   3746     for (j=4; j<6; j++) {
   3747         tLen = u_strlen(tailorData3[j]);
   3748         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
   3749 
   3750         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   3751             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
   3752             for(i = 0; i<rLen; i++) {
   3753                 log_err(" %02X", resColl[i]);
   3754             }
   3755         }
   3756 
   3757         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
   3758          for(i = 0; i<rLen; i++) {
   3759              log_verbose(" %02X", resColl[i]);
   3760          }
   3761     }
   3762     ucol_close(coll);
   3763 }
   3764 
   3765 static void
   3766 TestTailor6179(void)
   3767 {
   3768     UErrorCode status = U_ZERO_ERROR;
   3769     int32_t i;
   3770     UCollator *coll =NULL;
   3771     uint8_t  resColl[100];
   3772     int32_t  rLen, tLen, ruleLen;
   3773     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
   3774     static const UChar rule1[]={
   3775             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
   3776             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
   3777             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
   3778             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
   3779     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
   3780     static const UChar rule2[]={
   3781             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
   3782             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
   3783             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
   3784             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
   3785             0x3C,0x3C,0x20,0x62,0};
   3786 
   3787     static const UChar tData1[][4]={
   3788         {0x61, 0},
   3789         {0x62, 0},
   3790         { 0xFDD0,0x009E, 0}
   3791     };
   3792     static const UChar tData2[][4]={
   3793         {0x61, 0},
   3794         {0x62, 0},
   3795         { 0xFDD0,0x009E, 0}
   3796      };
   3797 
   3798     /*
   3799      * These values from FractionalUCA.txt will change,
   3800      * and need to be updated here.
   3801      * TODO: Make this not check for particular sort keys.
   3802      * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
   3803      */
   3804     static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
   3805     static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
   3806     static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
   3807     static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
   3808 
   3809     UParseError parseError;
   3810 
   3811     /* Test [Last Primary ignorable] */
   3812 
   3813     log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
   3814     ruleLen = u_strlen(rule1);
   3815     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3816     if (U_FAILURE(status)) {
   3817         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
   3818         return;
   3819     }
   3820     tLen = u_strlen(tData1[0]);
   3821     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
   3822     if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
   3823         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
   3824         for(i = 0; i<rLen; i++) {
   3825             log_err(" %02X", resColl[i]);
   3826         }
   3827         log_err("\n");
   3828     }
   3829     tLen = u_strlen(tData1[1]);
   3830     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
   3831     if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
   3832         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
   3833         for(i = 0; i<rLen; i++) {
   3834             log_err(" %02X", resColl[i]);
   3835         }
   3836         log_err("\n");
   3837     }
   3838     ucol_close(coll);
   3839 
   3840 
   3841     /* Test [Last Secondary ignorable] */
   3842     log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
   3843     ruleLen = u_strlen(rule2);
   3844     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
   3845     if (U_FAILURE(status)) {
   3846         log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
   3847         log_info("  offset=%d  \"%s\" | \"%s\"\n",
   3848                  parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
   3849         return;
   3850     }
   3851     tLen = u_strlen(tData2[0]);
   3852     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
   3853     if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
   3854         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
   3855         for(i = 0; i<rLen; i++) {
   3856             log_err(" %02X", resColl[i]);
   3857         }
   3858         log_err("\n");
   3859     }
   3860     tLen = u_strlen(tData2[1]);
   3861     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
   3862     if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
   3863       log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
   3864       for(i = 0; i<rLen; i++) {
   3865         log_err(" %02X", resColl[i]);
   3866       }
   3867       log_err("\n");
   3868     }
   3869     ucol_close(coll);
   3870 }
   3871 
   3872 static void
   3873 TestUCAPrecontext(void)
   3874 {
   3875     UErrorCode status = U_ZERO_ERROR;
   3876     int32_t i, j;
   3877     UCollator *coll =NULL;
   3878     uint8_t  resColl[100], prevColl[100];
   3879     int32_t  rLen, tLen, ruleLen;
   3880     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
   3881     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
   3882     /* & l middle-dot << a  a is an expansion. */
   3883 
   3884     UChar tData1[][20]={
   3885             { 0xb7, 0},  /* standalone middle dot(0xb7) */
   3886             { 0x387, 0}, /* standalone middle dot(0x387) */
   3887             { 0x61, 0},  /* a */
   3888             { 0x6C, 0},  /* l */
   3889             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
   3890             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
   3891             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
   3892             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
   3893             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
   3894             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
   3895             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
   3896      };
   3897 
   3898     log_verbose("\n\nEN collation:");
   3899     coll = ucol_open("en", &status);
   3900     if (U_FAILURE(status)) {
   3901         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
   3902         return;
   3903     }
   3904     for (j=0; j<11; j++) {
   3905         tLen = u_strlen(tData1[j]);
   3906         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   3907         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   3908             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   3909                     j, tData1[j]);
   3910         }
   3911         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   3912         for(i = 0; i<rLen; i++) {
   3913             log_verbose(" %02X", resColl[i]);
   3914         }
   3915         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   3916      }
   3917      ucol_close(coll);
   3918 
   3919 
   3920      log_verbose("\n\nJA collation:");
   3921      coll = ucol_open("ja", &status);
   3922      if (U_FAILURE(status)) {
   3923          log_err("Tailoring test: &z <<a|- failed!");
   3924          return;
   3925      }
   3926      for (j=0; j<11; j++) {
   3927          tLen = u_strlen(tData1[j]);
   3928          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   3929          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   3930              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   3931                      j, tData1[j]);
   3932          }
   3933          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   3934          for(i = 0; i<rLen; i++) {
   3935              log_verbose(" %02X", resColl[i]);
   3936          }
   3937          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   3938       }
   3939       ucol_close(coll);
   3940 
   3941 
   3942       log_verbose("\n\nTailoring test: & middle dot < a ");
   3943       ruleLen = u_strlen(rule1);
   3944       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3945       if (U_FAILURE(status)) {
   3946           log_err("Tailoring test: & middle dot < a failed!");
   3947           return;
   3948       }
   3949       for (j=0; j<11; j++) {
   3950           tLen = u_strlen(tData1[j]);
   3951           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   3952           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   3953               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   3954                       j, tData1[j]);
   3955           }
   3956           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   3957           for(i = 0; i<rLen; i++) {
   3958               log_verbose(" %02X", resColl[i]);
   3959           }
   3960           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   3961        }
   3962        ucol_close(coll);
   3963 
   3964 
   3965        log_verbose("\n\nTailoring test: & l middle-dot << a ");
   3966        ruleLen = u_strlen(rule2);
   3967        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3968        if (U_FAILURE(status)) {
   3969            log_err("Tailoring test: & l middle-dot << a failed!");
   3970            return;
   3971        }
   3972        for (j=0; j<11; j++) {
   3973            tLen = u_strlen(tData1[j]);
   3974            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   3975            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   3976                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   3977                        j, tData1[j]);
   3978            }
   3979            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
   3980                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
   3981                        j, tData1[j]);
   3982            }
   3983            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   3984            for(i = 0; i<rLen; i++) {
   3985                log_verbose(" %02X", resColl[i]);
   3986            }
   3987            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   3988         }
   3989         ucol_close(coll);
   3990 }
   3991 
   3992 static void
   3993 TestOutOfBuffer5468(void)
   3994 {
   3995     static const char *test = "\\u4e00";
   3996     UChar ustr[256];
   3997     int32_t ustr_length = u_unescape(test, ustr, 256);
   3998     unsigned char shortKeyBuf[1];
   3999     int32_t sortkey_length;
   4000     UErrorCode status = U_ZERO_ERROR;
   4001     static UCollator *coll = NULL;
   4002 
   4003     coll = ucol_open("root", &status);
   4004     if(U_FAILURE(status)) {
   4005       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   4006       return;
   4007     }
   4008     ucol_setStrength(coll, UCOL_PRIMARY);
   4009     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   4010     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   4011     if (U_FAILURE(status)) {
   4012       log_err("Failed setting atributes\n");
   4013       return;
   4014     }
   4015 
   4016     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
   4017     if (sortkey_length != 4) {
   4018         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
   4019     }
   4020     log_verbose("length of sortKey is %d", sortkey_length);
   4021     ucol_close(coll);
   4022 }
   4023 
   4024 #define TSKC_DATA_SIZE 5
   4025 #define TSKC_BUF_SIZE  50
   4026 static void
   4027 TestSortKeyConsistency(void)
   4028 {
   4029     UErrorCode icuRC = U_ZERO_ERROR;
   4030     UCollator* ucol;
   4031     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
   4032 
   4033     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
   4034     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
   4035     int32_t i, j, i2;
   4036 
   4037     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
   4038     if (U_FAILURE(icuRC))
   4039     {
   4040         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
   4041         return;
   4042     }
   4043 
   4044     for (i = 0; i < TSKC_DATA_SIZE; i++)
   4045     {
   4046         UCharIterator uiter;
   4047         uint32_t state[2] = { 0, 0 };
   4048         int32_t dataLen = i+1;
   4049         for (j=0; j<TSKC_BUF_SIZE; j++)
   4050             bufFull[i][j] = bufPart[i][j] = 0;
   4051 
   4052         /* Full sort key */
   4053         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
   4054 
   4055         /* Partial sort key */
   4056         uiter_setString(&uiter, data, dataLen);
   4057         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
   4058         if (U_FAILURE(icuRC))
   4059         {
   4060             log_err("ucol_nextSortKeyPart failed\n");
   4061             ucol_close(ucol);
   4062             return;
   4063         }
   4064 
   4065         for (i2=0; i2<i; i2++)
   4066         {
   4067             UBool fullMatch = TRUE;
   4068             UBool partMatch = TRUE;
   4069             for (j=0; j<TSKC_BUF_SIZE; j++)
   4070             {
   4071                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
   4072                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
   4073             }
   4074             if (fullMatch != partMatch) {
   4075                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
   4076                                   : "partial key was consistent, but full key changed\n");
   4077                 ucol_close(ucol);
   4078                 return;
   4079             }
   4080         }
   4081     }
   4082 
   4083     /*=============================================*/
   4084    ucol_close(ucol);
   4085 }
   4086 
   4087 /* ticket: 6101 */
   4088 static void TestCroatianSortKey(void) {
   4089     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
   4090     UErrorCode status = U_ZERO_ERROR;
   4091     UCollator *ucol;
   4092     UCharIterator iter;
   4093 
   4094     static const UChar text[] = { 0x0044, 0xD81A };
   4095 
   4096     size_t length = sizeof(text)/sizeof(*text);
   4097 
   4098     uint8_t textSortKey[32];
   4099     size_t lenSortKey = 32;
   4100     size_t actualSortKeyLen;
   4101     uint32_t uStateInfo[2] = { 0, 0 };
   4102 
   4103     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
   4104     if (U_FAILURE(status)) {
   4105         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
   4106         return;
   4107     }
   4108 
   4109     uiter_setString(&iter, text, length);
   4110 
   4111     actualSortKeyLen = ucol_nextSortKeyPart(
   4112         ucol, &iter, (uint32_t*)uStateInfo,
   4113         textSortKey, lenSortKey, &status
   4114         );
   4115 
   4116     if (actualSortKeyLen == lenSortKey) {
   4117         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
   4118     }
   4119 
   4120     ucol_close(ucol);
   4121 }
   4122 
   4123 /* ticket: 6140 */
   4124 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
   4125  * they are both Hiragana and Katakana
   4126  */
   4127 #define SORTKEYLEN 50
   4128 static void TestHiragana(void) {
   4129     UErrorCode status = U_ZERO_ERROR;
   4130     UCollator* ucol;
   4131     UCollationResult strcollresult;
   4132     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
   4133     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
   4134     int32_t data1Len = sizeof(data1)/sizeof(*data1);
   4135     int32_t data2Len = sizeof(data2)/sizeof(*data2);
   4136     int32_t i, j;
   4137     uint8_t sortKey1[SORTKEYLEN];
   4138     uint8_t sortKey2[SORTKEYLEN];
   4139 
   4140     UCharIterator uiter1;
   4141     UCharIterator uiter2;
   4142     uint32_t state1[2] = { 0, 0 };
   4143     uint32_t state2[2] = { 0, 0 };
   4144     int32_t keySize1;
   4145     int32_t keySize2;
   4146 
   4147     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
   4148             &status);
   4149     if (U_FAILURE(status)) {
   4150         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
   4151         return;
   4152     }
   4153 
   4154     /* Start of full sort keys */
   4155     /* Full sort key1 */
   4156     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
   4157     /* Full sort key2 */
   4158     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
   4159     if (keySize1 == keySize2) {
   4160         for (i = 0; i < keySize1; i++) {
   4161             if (sortKey1[i] != sortKey2[i]) {
   4162                 log_err("Full sort keys are different. Should be equal.");
   4163             }
   4164         }
   4165     } else {
   4166         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
   4167     }
   4168     /* End of full sort keys */
   4169 
   4170     /* Start of partial sort keys */
   4171     /* Partial sort key1 */
   4172     uiter_setString(&uiter1, data1, data1Len);
   4173     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
   4174     /* Partial sort key2 */
   4175     uiter_setString(&uiter2, data2, data2Len);
   4176     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
   4177     if (U_SUCCESS(status) && keySize1 == keySize2) {
   4178         for (j = 0; j < keySize1; j++) {
   4179             if (sortKey1[j] != sortKey2[j]) {
   4180                 log_err("Partial sort keys are different. Should be equal");
   4181             }
   4182         }
   4183     } else {
   4184         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
   4185     }
   4186     /* End of partial sort keys */
   4187 
   4188     /* Start of strcoll */
   4189     /* Use ucol_strcoll() to determine ordering */
   4190     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
   4191     if (strcollresult != UCOL_EQUAL) {
   4192         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
   4193     }
   4194 
   4195     ucol_close(ucol);
   4196 }
   4197 
   4198 /* Convenient struct for running collation tests */
   4199 typedef struct {
   4200   const UChar source[MAX_TOKEN_LEN];  /* String on left */
   4201   const UChar target[MAX_TOKEN_LEN];  /* String on right */
   4202   UCollationResult result;            /* -1, 0 or +1, depending on collation */
   4203 } OneTestCase;
   4204 
   4205 /*
   4206  * Utility function to test one collation test case.
   4207  * @param testcases Array of test cases.
   4208  * @param n_testcases Size of the array testcases.
   4209  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
   4210  * @param n_rules Size of the array str_rules.
   4211  */
   4212 static void doTestOneTestCase(const OneTestCase testcases[],
   4213                               int n_testcases,
   4214                               const char* str_rules[],
   4215                               int n_rules)
   4216 {
   4217   int rule_no, testcase_no;
   4218   UChar rule[500];
   4219   int32_t length = 0;
   4220   UErrorCode status = U_ZERO_ERROR;
   4221   UParseError parse_error;
   4222   UCollator  *myCollation;
   4223 
   4224   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
   4225 
   4226     length = u_unescape(str_rules[rule_no], rule, 500);
   4227     if (length == 0) {
   4228         log_err("ERROR: The rule cannot be unescaped: %s\n");
   4229         return;
   4230     }
   4231     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
   4232     if(U_FAILURE(status)){
   4233         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   4234         log_info("  offset=%d  \"%s\" | \"%s\"\n",
   4235                  parse_error.offset,
   4236                  aescstrdup(parse_error.preContext, -1),
   4237                  aescstrdup(parse_error.postContext, -1));
   4238         return;
   4239     }
   4240     log_verbose("Testing the <<* syntax\n");
   4241     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   4242     ucol_setStrength(myCollation, UCOL_TERTIARY);
   4243     for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
   4244       doTest(myCollation,
   4245              testcases[testcase_no].source,
   4246              testcases[testcase_no].target,
   4247              testcases[testcase_no].result
   4248              );
   4249     }
   4250     ucol_close(myCollation);
   4251   }
   4252 }
   4253 
   4254 const static OneTestCase rangeTestcases[] = {
   4255   { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
   4256   { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
   4257   { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
   4258 
   4259   { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
   4260   { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
   4261   { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
   4262   { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
   4263   { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
   4264 
   4265   { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
   4266   { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
   4267   { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
   4268   { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
   4269 
   4270   { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
   4271   { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
   4272   { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
   4273   { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
   4274   { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
   4275   { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
   4276   { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
   4277   { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
   4278 };
   4279 
   4280 static int nRangeTestcases = LEN(rangeTestcases);
   4281 
   4282 const static OneTestCase rangeTestcasesSupplemental[] = {
   4283   { {0x4e00},                            {0xfffb},                          UCOL_LESS }, /* U+4E00 < U+FFFB */
   4284   { {0xfffb},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFB < U+10000 */
   4285   { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
   4286   { {0x4e00},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+4E00 < U+10001 */
   4287   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
   4288   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
   4289   { {0x4e00},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+4E00 < U+10001 */
   4290 };
   4291 
   4292 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
   4293 
   4294 const static OneTestCase rangeTestcasesQwerty[] = {
   4295   { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
   4296   { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
   4297 
   4298   { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
   4299   { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
   4300 
   4301   { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
   4302   { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
   4303 
   4304   { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
   4305   { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
   4306 
   4307   { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
   4308     {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
   4309   { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
   4310     {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
   4311 };
   4312 
   4313 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
   4314 
   4315 static void TestSameStrengthList(void)
   4316 {
   4317   const char* strRules[] = {
   4318     /* Normal */
   4319     "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
   4320 
   4321     /* Lists */
   4322     "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
   4323   };
   4324   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   4325 }
   4326 
   4327 static void TestSameStrengthListQuoted(void)
   4328 {
   4329   const char* strRules[] = {
   4330     /* Lists with quoted characters */
   4331     "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
   4332     "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
   4333 
   4334     "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
   4335     "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
   4336 
   4337     "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
   4338     "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
   4339   };
   4340   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   4341 }
   4342 
   4343 static void TestSameStrengthListSupplemental(void)
   4344 {
   4345   const char* strRules[] = {
   4346     "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
   4347     "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
   4348     "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
   4349     "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
   4350   };
   4351   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
   4352 }
   4353 
   4354 static void TestSameStrengthListQwerty(void)
   4355 {
   4356   const char* strRules[] = {
   4357     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
   4358     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
   4359     "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
   4360     "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
   4361     "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
   4362 
   4363     /* Quoted characters also will work if two quoted characters are not consecutive.  */
   4364     "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
   4365 
   4366     /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
   4367     /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
   4368 
   4369  };
   4370   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
   4371 }
   4372 
   4373 static void TestSameStrengthListQuotedQwerty(void)
   4374 {
   4375   const char* strRules[] = {
   4376     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
   4377     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
   4378     "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
   4379 
   4380     /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
   4381     /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
   4382    };
   4383   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
   4384 }
   4385 
   4386 static void TestSameStrengthListRanges(void)
   4387 {
   4388   const char* strRules[] = {
   4389     "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
   4390   };
   4391   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
   4392 }
   4393 
   4394 static void TestSameStrengthListSupplementalRanges(void)
   4395 {
   4396   const char* strRules[] = {
   4397     /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
   4398     "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
   4399   };
   4400   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
   4401 }
   4402 
   4403 static void TestSpecialCharacters(void)
   4404 {
   4405   const char* strRules[] = {
   4406     /* Normal */
   4407     "&';'<'+'<','<'-'<'&'<'*'",
   4408 
   4409     /* List */
   4410     "&';'<*'+,-&*'",
   4411 
   4412     /* Range */
   4413     "&';'<*'+'-'-&*'",
   4414   };
   4415 
   4416   const static OneTestCase specialCharacterStrings[] = {
   4417     { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
   4418     { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
   4419     { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
   4420     { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
   4421   };
   4422   doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
   4423 }
   4424 
   4425 static void TestPrivateUseCharacters(void)
   4426 {
   4427   const char* strRules[] = {
   4428     /* Normal */
   4429     "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
   4430     "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
   4431   };
   4432 
   4433   const static OneTestCase privateUseCharacterStrings[] = {
   4434     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   4435     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   4436     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   4437     { {0xe2da}, {0xe2db}, UCOL_LESS },
   4438     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   4439     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   4440   };
   4441   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   4442 }
   4443 
   4444 static void TestPrivateUseCharactersInList(void)
   4445 {
   4446   const char* strRules[] = {
   4447     /* List */
   4448     "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
   4449     /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
   4450     "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
   4451   };
   4452 
   4453   const static OneTestCase privateUseCharacterStrings[] = {
   4454     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   4455     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   4456     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   4457     { {0xe2da}, {0xe2db}, UCOL_LESS },
   4458     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   4459     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   4460   };
   4461   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   4462 }
   4463 
   4464 static void TestPrivateUseCharactersInRange(void)
   4465 {
   4466   const char* strRules[] = {
   4467     /* Range */
   4468     "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
   4469     "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
   4470     /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
   4471   };
   4472 
   4473   const static OneTestCase privateUseCharacterStrings[] = {
   4474     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   4475     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   4476     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   4477     { {0xe2da}, {0xe2db}, UCOL_LESS },
   4478     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   4479     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   4480   };
   4481   doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   4482 }
   4483 
   4484 static void TestInvalidListsAndRanges(void)
   4485 {
   4486   const char* invalidRules[] = {
   4487     /* Range not in starred expression */
   4488     "&\\ufffe<\\uffff-\\U00010002",
   4489 
   4490     /* Range without start */
   4491     "&a<*-c",
   4492 
   4493     /* Range without end */
   4494     "&a<*b-",
   4495 
   4496     /* More than one hyphen */
   4497     "&a<*b-g-l",
   4498 
   4499     /* Range in the wrong order */
   4500     "&a<*k-b",
   4501 
   4502   };
   4503 
   4504   UChar rule[500];
   4505   UErrorCode status = U_ZERO_ERROR;
   4506   UParseError parse_error;
   4507   int n_rules = LEN(invalidRules);
   4508   int rule_no;
   4509   int length;
   4510   UCollator  *myCollation;
   4511 
   4512   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
   4513 
   4514     length = u_unescape(invalidRules[rule_no], rule, 500);
   4515     if (length == 0) {
   4516         log_err("ERROR: The rule cannot be unescaped: %s\n");
   4517         return;
   4518     }
   4519     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
   4520     (void)myCollation;      /* Suppress set but not used warning. */
   4521     if(!U_FAILURE(status)){
   4522       log_err("ERROR: Could not cause a failure as expected: \n");
   4523     }
   4524     status = U_ZERO_ERROR;
   4525   }
   4526 }
   4527 
   4528 /*
   4529  * This test ensures that characters placed before a character in a different script have the same lead byte
   4530  * in their collation key before and after script reordering.
   4531  */
   4532 static void TestBeforeRuleWithScriptReordering(void)
   4533 {
   4534     UParseError error;
   4535     UErrorCode status = U_ZERO_ERROR;
   4536     UCollator  *myCollation;
   4537     char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
   4538     UChar rules[500];
   4539     uint32_t rulesLength = 0;
   4540     int32_t reorderCodes[1] = {USCRIPT_GREEK};
   4541     UCollationResult collResult;
   4542 
   4543     uint8_t baseKey[256];
   4544     uint32_t baseKeyLength;
   4545     uint8_t beforeKey[256];
   4546     uint32_t beforeKeyLength;
   4547 
   4548     UChar base[] = { 0x03b1 }; /* base */
   4549     int32_t baseLen = sizeof(base)/sizeof(*base);
   4550 
   4551     UChar before[] = { 0x0e01 }; /* ko kai */
   4552     int32_t beforeLen = sizeof(before)/sizeof(*before);
   4553 
   4554     /*UChar *data[] = { before, base };
   4555     genericRulesStarter(srules, data, 2);*/
   4556 
   4557     log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
   4558 
   4559     (void)beforeKeyLength;   /* Suppress set but not used warnings. */
   4560     (void)baseKeyLength;
   4561 
   4562     /* build collator */
   4563     log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
   4564 
   4565     rulesLength = u_unescape(srules, rules, LEN(rules));
   4566     myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   4567     if(U_FAILURE(status)) {
   4568         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   4569         return;
   4570     }
   4571 
   4572     /* check collation results - before rule applied but not script reordering */
   4573     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
   4574     if (collResult != UCOL_GREATER) {
   4575         log_err("Collation result not correct before script reordering = %d\n", collResult);
   4576     }
   4577 
   4578     /* check the lead byte of the collation keys before script reordering */
   4579     baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
   4580     beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
   4581     if (baseKey[0] != beforeKey[0]) {
   4582       log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
   4583    }
   4584 
   4585     /* reorder the scripts */
   4586     ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
   4587     if(U_FAILURE(status)) {
   4588         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
   4589         return;
   4590     }
   4591 
   4592     /* check collation results - before rule applied and after script reordering */
   4593     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
   4594     if (collResult != UCOL_GREATER) {
   4595         log_err("Collation result not correct after script reordering = %d\n", collResult);
   4596     }
   4597 
   4598     /* check the lead byte of the collation keys after script reordering */
   4599     ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
   4600     ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
   4601     if (baseKey[0] != beforeKey[0]) {
   4602         log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
   4603     }
   4604 
   4605     ucol_close(myCollation);
   4606 }
   4607 
   4608 /*
   4609  * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
   4610  */
   4611 static void TestNonLeadBytesDuringCollationReordering(void)
   4612 {
   4613     UErrorCode status = U_ZERO_ERROR;
   4614     UCollator  *myCollation;
   4615     int32_t reorderCodes[1] = {USCRIPT_GREEK};
   4616 
   4617     uint8_t baseKey[256];
   4618     uint32_t baseKeyLength;
   4619     uint8_t reorderKey[256];
   4620     uint32_t reorderKeyLength;
   4621 
   4622     UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
   4623 
   4624     uint32_t i;
   4625 
   4626 
   4627     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   4628 
   4629     /* build collator tertiary */
   4630     myCollation = ucol_open("", &status);
   4631     ucol_setStrength(myCollation, UCOL_TERTIARY);
   4632     if(U_FAILURE(status)) {
   4633         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   4634         return;
   4635     }
   4636     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
   4637 
   4638     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   4639     if(U_FAILURE(status)) {
   4640         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   4641         return;
   4642     }
   4643     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
   4644 
   4645     if (baseKeyLength != reorderKeyLength) {
   4646         log_err("Key lengths not the same during reordering.\n");
   4647         return;
   4648     }
   4649 
   4650     for (i = 1; i < baseKeyLength; i++) {
   4651         if (baseKey[i] != reorderKey[i]) {
   4652             log_err("Collation key bytes not the same at position %d.\n", i);
   4653             return;
   4654         }
   4655     }
   4656     ucol_close(myCollation);
   4657 
   4658     /* build collator quaternary */
   4659     myCollation = ucol_open("", &status);
   4660     ucol_setStrength(myCollation, UCOL_QUATERNARY);
   4661     if(U_FAILURE(status)) {
   4662         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   4663         return;
   4664     }
   4665     baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
   4666 
   4667     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   4668     if(U_FAILURE(status)) {
   4669         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   4670         return;
   4671     }
   4672     reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
   4673 
   4674     if (baseKeyLength != reorderKeyLength) {
   4675         log_err("Key lengths not the same during reordering.\n");
   4676         return;
   4677     }
   4678 
   4679     for (i = 1; i < baseKeyLength; i++) {
   4680         if (baseKey[i] != reorderKey[i]) {
   4681             log_err("Collation key bytes not the same at position %d.\n", i);
   4682             return;
   4683         }
   4684     }
   4685     ucol_close(myCollation);
   4686 }
   4687 
   4688 /*
   4689  * Test reordering API.
   4690  */
   4691 static void TestReorderingAPI(void)
   4692 {
   4693     UErrorCode status = U_ZERO_ERROR;
   4694     UCollator  *myCollation;
   4695     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   4696     int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_KATAKANA};
   4697     int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   4698     int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
   4699     UCollationResult collResult;
   4700     int32_t retrievedReorderCodesLength;
   4701     int32_t retrievedReorderCodes[10];
   4702     UChar greekString[] = { 0x03b1 };
   4703     UChar punctuationString[] = { 0x203e };
   4704     int loopIndex;
   4705 
   4706     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   4707 
   4708     /* build collator tertiary */
   4709     myCollation = ucol_open("", &status);
   4710     ucol_setStrength(myCollation, UCOL_TERTIARY);
   4711     if(U_FAILURE(status)) {
   4712         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   4713         return;
   4714     }
   4715 
   4716     /* set the reorderding */
   4717     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   4718     if (U_FAILURE(status)) {
   4719         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   4720         return;
   4721     }
   4722 
   4723     /* get the reordering */
   4724     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   4725     if (status != U_BUFFER_OVERFLOW_ERROR) {
   4726         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
   4727         return;
   4728     }
   4729     status = U_ZERO_ERROR;
   4730     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   4731         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   4732         return;
   4733     }
   4734     /* now let's really get it */
   4735     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   4736     if (U_FAILURE(status)) {
   4737         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   4738         return;
   4739     }
   4740     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   4741         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   4742         return;
   4743     }
   4744     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   4745         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   4746             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   4747             return;
   4748         }
   4749     }
   4750     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   4751     if (collResult != UCOL_LESS) {
   4752         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
   4753         return;
   4754     }
   4755 
   4756     /* clear the reordering */
   4757     ucol_setReorderCodes(myCollation, NULL, 0, &status);
   4758     if (U_FAILURE(status)) {
   4759         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
   4760         return;
   4761     }
   4762 
   4763     /* get the reordering again */
   4764     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   4765     if (retrievedReorderCodesLength != 0) {
   4766         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
   4767         return;
   4768     }
   4769 
   4770     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   4771     if (collResult != UCOL_GREATER) {
   4772         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   4773         return;
   4774     }
   4775 
   4776     /* clear the reordering using [NONE] */
   4777     ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
   4778     if (U_FAILURE(status)) {
   4779         log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
   4780         return;
   4781     }
   4782 
   4783     /* get the reordering again */
   4784     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   4785     if (retrievedReorderCodesLength != 0) {
   4786         log_err_status(status,
   4787                        "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
   4788                        retrievedReorderCodesLength);
   4789         return;
   4790     }
   4791 
   4792     /* test for error condition on duplicate reorder codes */
   4793     ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
   4794     if (!U_FAILURE(status)) {
   4795         log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
   4796         return;
   4797     }
   4798 
   4799     status = U_ZERO_ERROR;
   4800     /* test for reorder codes after a reset code */
   4801     ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status);
   4802     if (!U_FAILURE(status)) {
   4803         log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
   4804         return;
   4805     }
   4806 
   4807     ucol_close(myCollation);
   4808 }
   4809 
   4810 /*
   4811  * Test reordering API.
   4812  */
   4813 static void TestReorderingAPIWithRuleCreatedCollator(void)
   4814 {
   4815     UErrorCode status = U_ZERO_ERROR;
   4816     UCollator  *myCollation;
   4817     UChar rules[90];
   4818     static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
   4819     static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   4820     static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
   4821     UCollationResult collResult;
   4822     int32_t retrievedReorderCodesLength;
   4823     int32_t retrievedReorderCodes[10];
   4824     static const UChar greekString[] = { 0x03b1 };
   4825     static const UChar punctuationString[] = { 0x203e };
   4826     static const UChar hanString[] = { 0x65E5, 0x672C };
   4827     int loopIndex;
   4828 
   4829     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   4830 
   4831     /* build collator from rules */
   4832     u_uastrcpy(rules, "[reorder Hani Grek]");
   4833     myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
   4834     if(U_FAILURE(status)) {
   4835         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   4836         return;
   4837     }
   4838 
   4839     /* get the reordering */
   4840     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   4841     if (U_FAILURE(status)) {
   4842         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   4843         return;
   4844     }
   4845     if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
   4846         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
   4847         return;
   4848     }
   4849     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   4850         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
   4851             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   4852             return;
   4853         }
   4854     }
   4855     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString));
   4856     if (collResult != UCOL_GREATER) {
   4857         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   4858         return;
   4859     }
   4860 
   4861     /* set the reordering */
   4862     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   4863     if (U_FAILURE(status)) {
   4864         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   4865         return;
   4866     }
   4867 
   4868     /* get the reordering */
   4869     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   4870     if (status != U_BUFFER_OVERFLOW_ERROR) {
   4871         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
   4872         return;
   4873     }
   4874     status = U_ZERO_ERROR;
   4875     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   4876         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   4877         return;
   4878     }
   4879     /* now let's really get it */
   4880     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   4881     if (U_FAILURE(status)) {
   4882         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   4883         return;
   4884     }
   4885     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   4886         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   4887         return;
   4888     }
   4889     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   4890         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   4891             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   4892             return;
   4893         }
   4894     }
   4895     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   4896     if (collResult != UCOL_LESS) {
   4897         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
   4898         return;
   4899     }
   4900 
   4901     /* clear the reordering */
   4902     ucol_setReorderCodes(myCollation, NULL, 0, &status);
   4903     if (U_FAILURE(status)) {
   4904         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
   4905         return;
   4906     }
   4907 
   4908     /* get the reordering again */
   4909     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   4910     if (retrievedReorderCodesLength != 0) {
   4911         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
   4912         return;
   4913     }
   4914 
   4915     collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
   4916     if (collResult != UCOL_GREATER) {
   4917         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   4918         return;
   4919     }
   4920 
   4921     /* reset the reordering */
   4922     ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
   4923     if (U_FAILURE(status)) {
   4924         log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
   4925         return;
   4926     }
   4927     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   4928     if (U_FAILURE(status)) {
   4929         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   4930         return;
   4931     }
   4932     if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
   4933         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
   4934         return;
   4935     }
   4936     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   4937         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
   4938             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   4939             return;
   4940         }
   4941     }
   4942 
   4943     ucol_close(myCollation);
   4944 }
   4945 
   4946 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
   4947     int32_t i;
   4948     for (i = 0; i < length; ++i) {
   4949         if (expectedScript == scripts[i]) { return TRUE; }
   4950     }
   4951     return FALSE;
   4952 }
   4953 
   4954 static void TestEquivalentReorderingScripts(void) {
   4955     // Beginning with ICU 55, collation reordering moves single scripts
   4956     // rather than groups of scripts,
   4957     // except where scripts share a range and sort primary-equal.
   4958     UErrorCode status = U_ZERO_ERROR;
   4959     int32_t equivalentScripts[100];
   4960     int32_t length;
   4961     int i;
   4962     int32_t prevScript;
   4963     /* These scripts are expected to be equivalent. */
   4964     static const int32_t expectedScripts[] = {
   4965         USCRIPT_HIRAGANA,
   4966         USCRIPT_KATAKANA,
   4967         USCRIPT_KATAKANA_OR_HIRAGANA
   4968     };
   4969 
   4970     equivalentScripts[0] = 0;
   4971     length = ucol_getEquivalentReorderCodes(
   4972             USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
   4973     if (U_FAILURE(status)) {
   4974         log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
   4975         return;
   4976     }
   4977     if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) {
   4978         log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
   4979                 "length expected 1, was = %d; expected [%d] was [%d]\n",
   4980                 length, USCRIPT_GOTHIC, equivalentScripts[0]);
   4981     }
   4982 
   4983     length = ucol_getEquivalentReorderCodes(
   4984             USCRIPT_HIRAGANA, equivalentScripts, LEN(equivalentScripts), &status);
   4985     if (U_FAILURE(status)) {
   4986         log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
   4987         return;
   4988     }
   4989     if (length != LEN(expectedScripts)) {
   4990         log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
   4991                 "expected %d, was = %d\n",
   4992                 LEN(expectedScripts), length);
   4993     }
   4994     prevScript = -1;
   4995     for (i = 0; i < length; ++i) {
   4996         int32_t script = equivalentScripts[i];
   4997         if (script <= prevScript) {
   4998             log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i);
   4999         }
   5000         prevScript = script;
   5001     }
   5002     for (i = 0; i < LEN(expectedScripts); i++) {
   5003         if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
   5004             log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
   5005                     expectedScripts[i]);
   5006         }
   5007     }
   5008 
   5009     length = ucol_getEquivalentReorderCodes(
   5010             USCRIPT_KATAKANA, equivalentScripts, LEN(equivalentScripts), &status);
   5011     if (U_FAILURE(status)) {
   5012         log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
   5013         return;
   5014     }
   5015     if (length != LEN(expectedScripts)) {
   5016         log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
   5017                 "expected %d, was = %d\n",
   5018                 LEN(expectedScripts), length);
   5019     }
   5020     for (i = 0; i < LEN(expectedScripts); i++) {
   5021         if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
   5022             log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
   5023                     expectedScripts[i]);
   5024         }
   5025     }
   5026 
   5027     length = ucol_getEquivalentReorderCodes(
   5028             USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, LEN(equivalentScripts), &status);
   5029     if (U_FAILURE(status) || length != LEN(expectedScripts)) {
   5030         log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
   5031                 "expected %d, was = %d\n",
   5032                 LEN(expectedScripts), length);
   5033     }
   5034 
   5035     length = ucol_getEquivalentReorderCodes(
   5036             USCRIPT_HAN, equivalentScripts, LEN(equivalentScripts), &status);
   5037     if (U_FAILURE(status) || length != 3) {
   5038         log_err("ERROR/Hani: retrieved equivalent script length wrong: "
   5039                 "expected 3, was = %d\n", length);
   5040     }
   5041     length = ucol_getEquivalentReorderCodes(
   5042             USCRIPT_SIMPLIFIED_HAN, equivalentScripts, LEN(equivalentScripts), &status);
   5043     if (U_FAILURE(status) || length != 3) {
   5044         log_err("ERROR/Hans: retrieved equivalent script length wrong: "
   5045                 "expected 3, was = %d\n", length);
   5046     }
   5047     length = ucol_getEquivalentReorderCodes(
   5048             USCRIPT_TRADITIONAL_HAN, equivalentScripts, LEN(equivalentScripts), &status);
   5049     if (U_FAILURE(status) || length != 3) {
   5050         log_err("ERROR/Hant: retrieved equivalent script length wrong: "
   5051                 "expected 3, was = %d\n", length);
   5052     }
   5053 
   5054     length = ucol_getEquivalentReorderCodes(
   5055             USCRIPT_MEROITIC_CURSIVE, equivalentScripts, LEN(equivalentScripts), &status);
   5056     if (U_FAILURE(status) || length != 2) {
   5057         log_err("ERROR/Merc: retrieved equivalent script length wrong: "
   5058                 "expected 2, was = %d\n", length);
   5059     }
   5060     length = ucol_getEquivalentReorderCodes(
   5061             USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, LEN(equivalentScripts), &status);
   5062     if (U_FAILURE(status) || length != 2) {
   5063         log_err("ERROR/Mero: retrieved equivalent script length wrong: "
   5064                 "expected 2, was = %d\n", length);
   5065     }
   5066 }
   5067 
   5068 static void TestReorderingAcrossCloning(void)
   5069 {
   5070     UErrorCode status = U_ZERO_ERROR;
   5071     UCollator  *myCollation;
   5072     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   5073     UCollator *clonedCollation;
   5074     int32_t retrievedReorderCodesLength;
   5075     int32_t retrievedReorderCodes[10];
   5076     int loopIndex;
   5077 
   5078     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   5079 
   5080     /* build collator tertiary */
   5081     myCollation = ucol_open("", &status);
   5082     ucol_setStrength(myCollation, UCOL_TERTIARY);
   5083     if(U_FAILURE(status)) {
   5084         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5085         return;
   5086     }
   5087 
   5088     /* set the reorderding */
   5089     ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
   5090     if (U_FAILURE(status)) {
   5091         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   5092         return;
   5093     }
   5094 
   5095     /* clone the collator */
   5096     clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
   5097     if (U_FAILURE(status)) {
   5098         log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
   5099         return;
   5100     }
   5101 
   5102     /* get the reordering */
   5103     retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
   5104     if (U_FAILURE(status)) {
   5105         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   5106         return;
   5107     }
   5108     if (retrievedReorderCodesLength != LEN(reorderCodes)) {
   5109         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
   5110         return;
   5111     }
   5112     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   5113         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   5114             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   5115             return;
   5116         }
   5117     }
   5118 
   5119     /*uprv_free(buffer);*/
   5120     ucol_close(myCollation);
   5121     ucol_close(clonedCollation);
   5122 }
   5123 
   5124 /*
   5125  * Utility function to test one collation reordering test case set.
   5126  * @param testcases Array of test cases.
   5127  * @param n_testcases Size of the array testcases.
   5128  * @param reorderTokens Array of reordering codes.
   5129  * @param reorderTokensLen Size of the array reorderTokens.
   5130  */
   5131 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
   5132 {
   5133     uint32_t testCaseNum;
   5134     UErrorCode status = U_ZERO_ERROR;
   5135     UCollator  *myCollation;
   5136 
   5137     myCollation = ucol_open("", &status);
   5138     if (U_FAILURE(status)) {
   5139         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5140         return;
   5141     }
   5142     ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
   5143     if(U_FAILURE(status)) {
   5144         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
   5145         return;
   5146     }
   5147 
   5148     for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
   5149         doTest(myCollation,
   5150             testCases[testCaseNum].source,
   5151             testCases[testCaseNum].target,
   5152             testCases[testCaseNum].result
   5153         );
   5154     }
   5155     ucol_close(myCollation);
   5156 }
   5157 
   5158 static void TestGreekFirstReorder(void)
   5159 {
   5160     const char* strRules[] = {
   5161         "[reorder Grek]"
   5162     };
   5163 
   5164     const int32_t apiRules[] = {
   5165         USCRIPT_GREEK
   5166     };
   5167 
   5168     const static OneTestCase privateUseCharacterStrings[] = {
   5169         { {0x0391}, {0x0391}, UCOL_EQUAL },
   5170         { {0x0041}, {0x0391}, UCOL_GREATER },
   5171         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
   5172         { {0x0060}, {0x0391}, UCOL_LESS },
   5173         { {0x0391}, {0xe2dc}, UCOL_LESS },
   5174         { {0x0391}, {0x0060}, UCOL_GREATER },
   5175     };
   5176 
   5177     /* Test rules creation */
   5178     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5179 
   5180     /* Test collation reordering API */
   5181     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   5182 }
   5183 
   5184 static void TestGreekLastReorder(void)
   5185 {
   5186     const char* strRules[] = {
   5187         "[reorder Zzzz Grek]"
   5188     };
   5189 
   5190     const int32_t apiRules[] = {
   5191         USCRIPT_UNKNOWN, USCRIPT_GREEK
   5192     };
   5193 
   5194     const static OneTestCase privateUseCharacterStrings[] = {
   5195         { {0x0391}, {0x0391}, UCOL_EQUAL },
   5196         { {0x0041}, {0x0391}, UCOL_LESS },
   5197         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
   5198         { {0x0060}, {0x0391}, UCOL_LESS },
   5199         { {0x0391}, {0xe2dc}, UCOL_GREATER },
   5200     };
   5201 
   5202     /* Test rules creation */
   5203     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5204 
   5205     /* Test collation reordering API */
   5206     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   5207 }
   5208 
   5209 static void TestNonScriptReorder(void)
   5210 {
   5211     const char* strRules[] = {
   5212         "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
   5213     };
   5214 
   5215     const int32_t apiRules[] = {
   5216         USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
   5217         UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
   5218         UCOL_REORDER_CODE_CURRENCY
   5219     };
   5220 
   5221     const static OneTestCase privateUseCharacterStrings[] = {
   5222         { {0x0391}, {0x0041}, UCOL_LESS },
   5223         { {0x0041}, {0x0391}, UCOL_GREATER },
   5224         { {0x0060}, {0x0041}, UCOL_LESS },
   5225         { {0x0060}, {0x0391}, UCOL_GREATER },
   5226         { {0x0024}, {0x0041}, UCOL_GREATER },
   5227     };
   5228 
   5229     /* Test rules creation */
   5230     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5231 
   5232     /* Test collation reordering API */
   5233     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   5234 }
   5235 
   5236 static void TestHaniReorder(void)
   5237 {
   5238     const char* strRules[] = {
   5239         "[reorder Hani]"
   5240     };
   5241     const int32_t apiRules[] = {
   5242         USCRIPT_HAN
   5243     };
   5244 
   5245     const static OneTestCase privateUseCharacterStrings[] = {
   5246         { {0x4e00}, {0x0041}, UCOL_LESS },
   5247         { {0x4e00}, {0x0060}, UCOL_GREATER },
   5248         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
   5249         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
   5250         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
   5251         { {0xfa27}, {0x0041}, UCOL_LESS },
   5252         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
   5253     };
   5254 
   5255     /* Test rules creation */
   5256     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5257 
   5258     /* Test collation reordering API */
   5259     doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
   5260 }
   5261 
   5262 static void TestHaniReorderWithOtherRules(void)
   5263 {
   5264     const char* strRules[] = {
   5265         "[reorder Hani] &b<a"
   5266     };
   5267     /*const int32_t apiRules[] = {
   5268         USCRIPT_HAN
   5269     };*/
   5270 
   5271     const static OneTestCase privateUseCharacterStrings[] = {
   5272         { {0x4e00}, {0x0041}, UCOL_LESS },
   5273         { {0x4e00}, {0x0060}, UCOL_GREATER },
   5274         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
   5275         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
   5276         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
   5277         { {0xfa27}, {0x0041}, UCOL_LESS },
   5278         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
   5279         { {0x0062}, {0x0061}, UCOL_LESS },
   5280     };
   5281 
   5282     /* Test rules creation */
   5283     doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
   5284 }
   5285 
   5286 static void TestMultipleReorder(void)
   5287 {
   5288     const char* strRules[] = {
   5289         "[reorder Grek Zzzz DIGIT Latn Hani]"
   5290     };
   5291 
   5292     const int32_t apiRules[] = {
   5293         USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
   5294     };
   5295 
   5296     const static OneTestCase collationTestCases[] = {
   5297         { {0x0391}, {0x0041}, UCOL_LESS},
   5298         { {0x0031}, {0x0041}, UCOL_LESS},
   5299         { {0x0041}, {0x4e00}, UCOL_LESS},
   5300     };
   5301 
   5302     /* Test rules creation */
   5303     doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
   5304 
   5305     /* Test collation reordering API */
   5306     doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
   5307 }
   5308 
   5309 /*
   5310  * Test that covers issue reported in ticket 8814
   5311  */
   5312 static void TestReorderWithNumericCollation(void)
   5313 {
   5314     UErrorCode status = U_ZERO_ERROR;
   5315     UCollator  *myCollation;
   5316     UCollator  *myReorderCollation;
   5317     int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
   5318     /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
   5319     UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
   5320     UChar fortyS[] = { 0x0053 };
   5321     UChar fortyThreeP[] = { 0x0050 };
   5322     uint8_t fortyS_sortKey[128];
   5323     int32_t fortyS_sortKey_Length;
   5324     uint8_t fortyThreeP_sortKey[128];
   5325     int32_t fortyThreeP_sortKey_Length;
   5326     uint8_t fortyS_sortKey_reorder[128];
   5327     int32_t fortyS_sortKey_reorder_Length;
   5328     uint8_t fortyThreeP_sortKey_reorder[128];
   5329     int32_t fortyThreeP_sortKey_reorder_Length;
   5330     UCollationResult collResult;
   5331     UCollationResult collResultReorder;
   5332 
   5333     log_verbose("Testing reordering with and without numeric collation\n");
   5334 
   5335     /* build collator tertiary with numeric */
   5336     myCollation = ucol_open("", &status);
   5337     /*
   5338     ucol_setStrength(myCollation, UCOL_TERTIARY);
   5339     */
   5340     ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   5341     if(U_FAILURE(status)) {
   5342         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5343         return;
   5344     }
   5345 
   5346     /* build collator tertiary with numeric and reordering */
   5347     myReorderCollation = ucol_open("", &status);
   5348     /*
   5349     ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
   5350     */
   5351     ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   5352     ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status);
   5353     if(U_FAILURE(status)) {
   5354         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5355         return;
   5356     }
   5357 
   5358     fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128);
   5359     fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128);
   5360     fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128);
   5361     fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
   5362 
   5363     if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
   5364         log_err_status(status, "ERROR: couldn't generate sort keys\n");
   5365         return;
   5366     }
   5367     collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
   5368     collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
   5369     /*
   5370     fprintf(stderr, "\tcollResult = %x\n", collResult);
   5371     fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
   5372     fprintf(stderr, "\nfortyS\n");
   5373     for (i = 0; i < fortyS_sortKey_Length; i++) {
   5374         fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
   5375     }
   5376     fprintf(stderr, "\nfortyThreeP\n");
   5377     for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
   5378         fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
   5379     }
   5380     */
   5381     if (collResult != collResultReorder) {
   5382         log_err_status(status, "ERROR: collation results should have been the same.\n");
   5383         return;
   5384     }
   5385 
   5386     ucol_close(myCollation);
   5387     ucol_close(myReorderCollation);
   5388 }
   5389 
   5390 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
   5391 {
   5392   for (; *a == *b; ++a, ++b) {
   5393     if (*a == 0) {
   5394       return 0;
   5395     }
   5396   }
   5397   return (*a < *b ? -1 : 1);
   5398 }
   5399 
   5400 static void TestImportRulesDeWithPhonebook(void)
   5401 {
   5402   const char* normalRules[] = {
   5403     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
   5404     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
   5405     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
   5406   };
   5407   const OneTestCase normalTests[] = {
   5408     { {0x00e6}, {0x00c6}, UCOL_LESS},
   5409     { {0x00fc}, {0x00dc}, UCOL_GREATER},
   5410   };
   5411 
   5412   const char* importRules[] = {
   5413     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
   5414     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
   5415     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
   5416   };
   5417   const OneTestCase importTests[] = {
   5418     { {0x00e6}, {0x00c6}, UCOL_LESS},
   5419     { {0x00fc}, {0x00dc}, UCOL_LESS},
   5420   };
   5421 
   5422   doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules));
   5423   doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules));
   5424 }
   5425 
   5426 #if 0
   5427 static void TestImportRulesFiWithEor(void)
   5428 {
   5429   /* DUCET. */
   5430   const char* defaultRules[] = {
   5431     "&a<b",                                    /* Dummy rule. */
   5432   };
   5433 
   5434   const OneTestCase defaultTests[] = {
   5435     { {0x0110}, {0x00F0}, UCOL_LESS},
   5436     { {0x00a3}, {0x00a5}, UCOL_LESS},
   5437     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
   5438   };
   5439 
   5440   /* European Ordering rules: ignore currency characters. */
   5441   const char* eorRules[] = {
   5442     "[import root-u-co-eor]",
   5443   };
   5444 
   5445   const OneTestCase eorTests[] = {
   5446     { {0x0110}, {0x00F0}, UCOL_LESS},
   5447     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
   5448     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
   5449   };
   5450 
   5451   const char* fiStdRules[] = {
   5452     "[import fi-u-co-standard]",
   5453   };
   5454 
   5455   const OneTestCase fiStdTests[] = {
   5456     { {0x0110}, {0x00F0}, UCOL_GREATER},
   5457     { {0x00a3}, {0x00a5}, UCOL_LESS},
   5458     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
   5459   };
   5460 
   5461   /* Both European Ordering Rules and Fi Standard Rules. */
   5462   const char* eorFiStdRules[] = {
   5463     "[import root-u-co-eor][import fi-u-co-standard]",
   5464   };
   5465 
   5466   /* This is essentially same as the one before once fi.txt is updated with import. */
   5467   const char* fiEorRules[] = {
   5468     "[import fi-u-co-eor]",
   5469   };
   5470 
   5471   const OneTestCase fiEorTests[] = {
   5472     { {0x0110}, {0x00F0}, UCOL_GREATER},
   5473     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
   5474     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
   5475   };
   5476 
   5477   doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
   5478   doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));
   5479   doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));
   5480   doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules));
   5481 
   5482   log_knownIssue("8962", NULL);
   5483   /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
   5484         eor{
   5485             Sequence{
   5486                 "[import root-u-co-eor][import fi-u-co-standard]"
   5487             }
   5488             Version{"21.0"}
   5489         }
   5490   */
   5491   /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
   5492 
   5493 }
   5494 #endif
   5495 
   5496 #if 0
   5497 /*
   5498  * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
   5499  * the resource files are built with -includeUnihanColl option.
   5500  * TODO: Uncomment this function and make it work when unihan rules are built by default.
   5501  */
   5502 static void TestImportRulesCJKWithUnihan(void)
   5503 {
   5504   /* DUCET. */
   5505   const char* defaultRules[] = {
   5506     "&a<b",                                    /* Dummy rule. */
   5507   };
   5508 
   5509   const OneTestCase defaultTests[] = {
   5510     { {0x3402}, {0x4e1e}, UCOL_GREATER},
   5511   };
   5512 
   5513   /* European Ordering rules: ignore currency characters. */
   5514   const char* unihanRules[] = {
   5515     "[import ko-u-co-unihan]",
   5516   };
   5517 
   5518   const OneTestCase unihanTests[] = {
   5519     { {0x3402}, {0x4e1e}, UCOL_LESS},
   5520   };
   5521 
   5522   doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
   5523   doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules));
   5524 
   5525 }
   5526 #endif
   5527 
   5528 static void TestImport(void)
   5529 {
   5530     UCollator* vicoll;
   5531     UCollator* escoll;
   5532     UCollator* viescoll;
   5533     UCollator* importviescoll;
   5534     UParseError error;
   5535     UErrorCode status = U_ZERO_ERROR;
   5536     UChar* virules;
   5537     int32_t viruleslength;
   5538     UChar* esrules;
   5539     int32_t esruleslength;
   5540     UChar* viesrules;
   5541     int32_t viesruleslength;
   5542     char srules[500] = "[import vi][import es]";
   5543     UChar rules[500];
   5544     uint32_t length = 0;
   5545     int32_t itemCount;
   5546     int32_t i, k;
   5547     UChar32 start;
   5548     UChar32 end;
   5549     UChar str[500];
   5550     int32_t strLength;
   5551 
   5552     uint8_t sk1[500];
   5553     uint8_t sk2[500];
   5554 
   5555     UBool b;
   5556     USet* tailoredSet;
   5557     USet* importTailoredSet;
   5558 
   5559 
   5560     vicoll = ucol_open("vi", &status);
   5561     if(U_FAILURE(status)){
   5562         log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
   5563         return;
   5564     }
   5565 
   5566     virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
   5567     if(viruleslength == 0) {
   5568         log_data_err("missing vi tailoring rule string\n");
   5569         ucol_close(vicoll);
   5570         return;
   5571     }
   5572     escoll = ucol_open("es", &status);
   5573     esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
   5574     viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
   5575     viesrules[0] = 0;
   5576     u_strcat(viesrules, virules);
   5577     u_strcat(viesrules, esrules);
   5578     viesruleslength = viruleslength + esruleslength;
   5579     viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   5580 
   5581     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
   5582     length = u_unescape(srules, rules, 500);
   5583     importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
   5584     if(U_FAILURE(status)){
   5585         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5586         return;
   5587     }
   5588 
   5589     tailoredSet = ucol_getTailoredSet(viescoll, &status);
   5590     importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
   5591 
   5592     if(!uset_equals(tailoredSet, importTailoredSet)){
   5593         log_err("Tailored sets not equal");
   5594     }
   5595 
   5596     uset_close(importTailoredSet);
   5597 
   5598     itemCount = uset_getItemCount(tailoredSet);
   5599 
   5600     for( i = 0; i < itemCount; i++){
   5601         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
   5602         if(strLength < 2){
   5603             for (; start <= end; start++){
   5604                 k = 0;
   5605                 U16_APPEND(str, k, 500, start, b);
   5606                 (void)b;    /* Suppress set but not used warning. */
   5607                 ucol_getSortKey(viescoll, str, 1, sk1, 500);
   5608                 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
   5609                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
   5610                     log_err("Sort key for %s not equal\n", str);
   5611                     break;
   5612                 }
   5613             }
   5614         }else{
   5615             ucol_getSortKey(viescoll, str, strLength, sk1, 500);
   5616             ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
   5617             if(compare_uint8_t_arrays(sk1, sk2) != 0){
   5618                 log_err("ZZSort key for %s not equal\n", str);
   5619                 break;
   5620             }
   5621 
   5622         }
   5623     }
   5624 
   5625     uset_close(tailoredSet);
   5626 
   5627     uprv_free(viesrules);
   5628 
   5629     ucol_close(vicoll);
   5630     ucol_close(escoll);
   5631     ucol_close(viescoll);
   5632     ucol_close(importviescoll);
   5633 }
   5634 
   5635 static void TestImportWithType(void)
   5636 {
   5637     UCollator* vicoll;
   5638     UCollator* decoll;
   5639     UCollator* videcoll;
   5640     UCollator* importvidecoll;
   5641     UParseError error;
   5642     UErrorCode status = U_ZERO_ERROR;
   5643     const UChar* virules;
   5644     int32_t viruleslength;
   5645     const UChar* derules;
   5646     int32_t deruleslength;
   5647     UChar* viderules;
   5648     int32_t videruleslength;
   5649     const char srules[500] = "[import vi][import de-u-co-phonebk]";
   5650     UChar rules[500];
   5651     uint32_t length = 0;
   5652     int32_t itemCount;
   5653     int32_t i, k;
   5654     UChar32 start;
   5655     UChar32 end;
   5656     UChar str[500];
   5657     int32_t strLength;
   5658 
   5659     uint8_t sk1[500];
   5660     uint8_t sk2[500];
   5661 
   5662     USet* tailoredSet;
   5663     USet* importTailoredSet;
   5664 
   5665     vicoll = ucol_open("vi", &status);
   5666     if(U_FAILURE(status)){
   5667         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5668         return;
   5669     }
   5670     virules = ucol_getRules(vicoll, &viruleslength);
   5671     if(viruleslength == 0) {
   5672         log_data_err("missing vi tailoring rule string\n");
   5673         ucol_close(vicoll);
   5674         return;
   5675     }
   5676     /* decoll = ucol_open("de@collation=phonebook", &status); */
   5677     decoll = ucol_open("de-u-co-phonebk", &status);
   5678     if(U_FAILURE(status)){
   5679         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5680         return;
   5681     }
   5682 
   5683 
   5684     derules = ucol_getRules(decoll, &deruleslength);
   5685     viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
   5686     viderules[0] = 0;
   5687     u_strcat(viderules, virules);
   5688     u_strcat(viderules, derules);
   5689     videruleslength = viruleslength + deruleslength;
   5690     videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   5691 
   5692     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
   5693     length = u_unescape(srules, rules, 500);
   5694     importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
   5695     if(U_FAILURE(status)){
   5696         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5697         return;
   5698     }
   5699 
   5700     tailoredSet = ucol_getTailoredSet(videcoll, &status);
   5701     importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
   5702 
   5703     if(!uset_equals(tailoredSet, importTailoredSet)){
   5704         log_err("Tailored sets not equal");
   5705     }
   5706 
   5707     uset_close(importTailoredSet);
   5708 
   5709     itemCount = uset_getItemCount(tailoredSet);
   5710 
   5711     for( i = 0; i < itemCount; i++){
   5712         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
   5713         if(strLength < 2){
   5714             for (; start <= end; start++){
   5715                 k = 0;
   5716                 U16_APPEND_UNSAFE(str, k, start);
   5717                 ucol_getSortKey(videcoll, str, 1, sk1, 500);
   5718                 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
   5719                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
   5720                     log_err("Sort key for %s not equal\n", str);
   5721                     break;
   5722                 }
   5723             }
   5724         }else{
   5725             ucol_getSortKey(videcoll, str, strLength, sk1, 500);
   5726             ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
   5727             if(compare_uint8_t_arrays(sk1, sk2) != 0){
   5728                 log_err("Sort key for %s not equal\n", str);
   5729                 break;
   5730             }
   5731 
   5732         }
   5733     }
   5734 
   5735     uset_close(tailoredSet);
   5736 
   5737     uprv_free(viderules);
   5738 
   5739     ucol_close(videcoll);
   5740     ucol_close(importvidecoll);
   5741     ucol_close(vicoll);
   5742     ucol_close(decoll);
   5743 }
   5744 
   5745 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
   5746 static const UChar longUpperStr1[]= { /* 155 chars */
   5747     0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
   5748     0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
   5749     0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
   5750     0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
   5751     0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
   5752     0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
   5753     0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
   5754     0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
   5755     0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
   5756     0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
   5757 };
   5758 
   5759 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
   5760 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
   5761     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   5762     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   5763     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   5764     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   5765     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
   5766 };
   5767 
   5768 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
   5769 static const UChar longUpperStr3[]= { /* 324 chars */
   5770     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5771     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5772     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5773     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5774     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5775     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5776     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5777     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5778     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5779     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5780     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5781     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
   5782 };
   5783 
   5784 #define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
   5785 
   5786 typedef struct {
   5787     const UChar * longUpperStrPtr;
   5788     int32_t       longUpperStrLen;
   5789 } LongUpperStrItem;
   5790 
   5791 /* String pointers must be in reverse collation order of the corresponding strings */
   5792 static const LongUpperStrItem longUpperStrItems[] = {
   5793     { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
   5794     { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
   5795     { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
   5796     { NULL,          0                           }
   5797 };
   5798 
   5799 enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
   5800 
   5801 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
   5802 static void TestCaseLevelBufferOverflow(void)
   5803 {
   5804     UErrorCode status = U_ZERO_ERROR;
   5805     UCollator * ucol = ucol_open("root", &status);
   5806     if ( U_SUCCESS(status) ) {
   5807         ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
   5808         if ( U_SUCCESS(status) ) {
   5809             const LongUpperStrItem * itemPtr;
   5810             uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
   5811             for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
   5812                 int32_t sortKeyLen;
   5813                 if (itemPtr > longUpperStrItems) {
   5814                     uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
   5815                 }
   5816                 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
   5817                 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
   5818                     log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
   5819                     break;
   5820                 }
   5821                 if ( itemPtr > longUpperStrItems ) {
   5822                     int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
   5823                     if (compareResult >= 0) {
   5824                         log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
   5825                     }
   5826                 }
   5827             }
   5828         } else {
   5829             log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
   5830         }
   5831         ucol_close(ucol);
   5832     } else {
   5833         log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
   5834     }
   5835 }
   5836 
   5837 /* Test for #10595 */
   5838 static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
   5839 #define KEY_PART_SIZE 16
   5840 
   5841 static void TestNextSortKeyPartJaIdentical(void)
   5842 {
   5843     UErrorCode status = U_ZERO_ERROR;
   5844     UCollator *coll;
   5845     uint8_t keyPart[KEY_PART_SIZE];
   5846     UCharIterator iter;
   5847     uint32_t state[2] = {0, 0};
   5848     int32_t keyPartLen;
   5849 
   5850     coll = ucol_open("ja", &status);
   5851     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
   5852     if (U_FAILURE(status)) {
   5853         log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
   5854         return;
   5855     }
   5856 
   5857     uiter_setString(&iter, testJapaneseName, 5);
   5858     keyPartLen = KEY_PART_SIZE;
   5859     while (keyPartLen == KEY_PART_SIZE) {
   5860         keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
   5861         if (U_FAILURE(status)) {
   5862             log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
   5863             break;
   5864         }
   5865     }
   5866 
   5867     ucol_close(coll);
   5868 }
   5869 
   5870 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
   5871 
   5872 void addMiscCollTest(TestNode** root)
   5873 {
   5874     TEST(TestRuleOptions);
   5875     TEST(TestBeforePrefixFailure);
   5876     TEST(TestContractionClosure);
   5877     TEST(TestPrefixCompose);
   5878     TEST(TestStrCollIdenticalPrefix);
   5879     TEST(TestPrefix);
   5880     TEST(TestNewJapanese);
   5881     /*TEST(TestLimitations);*/
   5882     TEST(TestNonChars);
   5883     TEST(TestExtremeCompression);
   5884     TEST(TestSurrogates);
   5885     TEST(TestVariableTopSetting);
   5886     TEST(TestMaxVariable);
   5887     TEST(TestBocsuCoverage);
   5888     TEST(TestCyrillicTailoring);
   5889     TEST(TestCase);
   5890     TEST(IncompleteCntTest);
   5891     TEST(BlackBirdTest);
   5892     TEST(FunkyATest);
   5893     TEST(BillFairmanTest);
   5894     TEST(TestChMove);
   5895     TEST(TestImplicitTailoring);
   5896     TEST(TestFCDProblem);
   5897     TEST(TestEmptyRule);
   5898     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
   5899     TEST(TestJ815);
   5900     /*TEST(TestJ831);*/ /* we changed lv locale */
   5901     TEST(TestBefore);
   5902     TEST(TestHangulTailoring);
   5903     TEST(TestUCARules);
   5904     TEST(TestIncrementalNormalize);
   5905     TEST(TestComposeDecompose);
   5906     TEST(TestCompressOverlap);
   5907     TEST(TestContraction);
   5908     TEST(TestExpansion);
   5909     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
   5910     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
   5911     TEST(TestOptimize);
   5912     TEST(TestSuppressContractions);
   5913     TEST(Alexis2);
   5914     TEST(TestHebrewUCA);
   5915     TEST(TestPartialSortKeyTermination);
   5916     TEST(TestSettings);
   5917     TEST(TestEquals);
   5918     TEST(TestJ2726);
   5919     TEST(NullRule);
   5920     TEST(TestNumericCollation);
   5921     TEST(TestTibetanConformance);
   5922     TEST(TestPinyinProblem);
   5923     TEST(TestSeparateTrees);
   5924     TEST(TestBeforePinyin);
   5925     TEST(TestBeforeTightening);
   5926     /*TEST(TestMoreBefore);*/
   5927     TEST(TestTailorNULL);
   5928     TEST(TestUpperFirstQuaternary);
   5929     TEST(TestJ4960);
   5930     TEST(TestJ5223);
   5931     TEST(TestJ5232);
   5932     TEST(TestJ5367);
   5933     TEST(TestHiragana);
   5934     TEST(TestSortKeyConsistency);
   5935     TEST(TestVI5913);  /* VI, RO tailored rules */
   5936     TEST(TestCroatianSortKey);
   5937     TEST(TestTailor6179);
   5938     TEST(TestUCAPrecontext);
   5939     TEST(TestOutOfBuffer5468);
   5940     TEST(TestSameStrengthList);
   5941 
   5942     TEST(TestSameStrengthListQuoted);
   5943     TEST(TestSameStrengthListSupplemental);
   5944     TEST(TestSameStrengthListQwerty);
   5945     TEST(TestSameStrengthListQuotedQwerty);
   5946     TEST(TestSameStrengthListRanges);
   5947     TEST(TestSameStrengthListSupplementalRanges);
   5948     TEST(TestSpecialCharacters);
   5949     TEST(TestPrivateUseCharacters);
   5950     TEST(TestPrivateUseCharactersInList);
   5951     TEST(TestPrivateUseCharactersInRange);
   5952     TEST(TestInvalidListsAndRanges);
   5953     TEST(TestImportRulesDeWithPhonebook);
   5954     /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
   5955     /* TEST(TestImportRulesCJKWithUnihan); */
   5956     TEST(TestImport);
   5957     TEST(TestImportWithType);
   5958 
   5959     TEST(TestBeforeRuleWithScriptReordering);
   5960     TEST(TestNonLeadBytesDuringCollationReordering);
   5961     TEST(TestReorderingAPI);
   5962     TEST(TestReorderingAPIWithRuleCreatedCollator);
   5963     TEST(TestEquivalentReorderingScripts);
   5964     TEST(TestGreekFirstReorder);
   5965     TEST(TestGreekLastReorder);
   5966     TEST(TestNonScriptReorder);
   5967     TEST(TestHaniReorder);
   5968     TEST(TestHaniReorderWithOtherRules);
   5969     TEST(TestMultipleReorder);
   5970     TEST(TestReorderingAcrossCloning);
   5971     TEST(TestReorderWithNumericCollation);
   5972 
   5973     TEST(TestCaseLevelBufferOverflow);
   5974     TEST(TestNextSortKeyPartJaIdentical);
   5975 }
   5976 
   5977 #endif /* #if !UCONFIG_NO_COLLATION */
   5978