Home | History | Annotate | Download | only in cintltst
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /********************************************************************
      4  * COPYRIGHT:
      5  * Copyright (c) 2001-2016, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  ********************************************************************/
      8 /*******************************************************************************
      9 *
     10 * File cmsccoll.C
     11 *
     12 *******************************************************************************/
     13 /**
     14  * These are the tests specific to ICU 1.8 and above, that I didn't know where
     15  * to fit.
     16  */
     17 
     18 #include <stdio.h>
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_COLLATION
     23 
     24 #include "unicode/ucol.h"
     25 #include "unicode/ucoleitr.h"
     26 #include "unicode/uloc.h"
     27 #include "cintltst.h"
     28 #include "ccolltst.h"
     29 #include "callcoll.h"
     30 #include "unicode/ustring.h"
     31 #include "string.h"
     32 #include "ucol_imp.h"
     33 #include "cmemory.h"
     34 #include "cstring.h"
     35 #include "uassert.h"
     36 #include "unicode/parseerr.h"
     37 #include "unicode/ucnv.h"
     38 #include "unicode/ures.h"
     39 #include "unicode/uscript.h"
     40 #include "unicode/utf16.h"
     41 #include "uparse.h"
     42 #include "putilimp.h"
     43 
     44 
     45 #define MAX_TOKEN_LEN 16
     46 
     47 typedef UCollationResult tst_strcoll(void *collator, const int object,
     48                         const UChar *source, const int sLen,
     49                         const UChar *target, const int tLen);
     50 
     51 
     52 
     53 const static char cnt1[][10] = {
     54 
     55   "AA",
     56   "AC",
     57   "AZ",
     58   "AQ",
     59   "AB",
     60   "ABZ",
     61   "ABQ",
     62   "Z",
     63   "ABC",
     64   "Q",
     65   "B"
     66 };
     67 
     68 const static char cnt2[][10] = {
     69   "DA",
     70   "DAD",
     71   "DAZ",
     72   "MAR",
     73   "Z",
     74   "DAVIS",
     75   "MARK",
     76   "DAV",
     77   "DAVI"
     78 };
     79 
     80 static void IncompleteCntTest(void)
     81 {
     82   UErrorCode status = U_ZERO_ERROR;
     83   UChar temp[90];
     84   UChar t1[90];
     85   UChar t2[90];
     86 
     87   UCollator *coll =  NULL;
     88   uint32_t i = 0, j = 0;
     89   uint32_t size = 0;
     90 
     91   u_uastrcpy(temp, " & Z < ABC < Q < B");
     92 
     93   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
     94 
     95   if(U_SUCCESS(status)) {
     96     size = UPRV_LENGTHOF(cnt1);
     97     for(i = 0; i < size-1; i++) {
     98       for(j = i+1; j < size; j++) {
     99         UCollationElements *iter;
    100         u_uastrcpy(t1, cnt1[i]);
    101         u_uastrcpy(t2, cnt1[j]);
    102         doTest(coll, t1, t2, UCOL_LESS);
    103         /* synwee : added collation element iterator test */
    104         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    105         if (U_FAILURE(status)) {
    106           log_err("Creation of iterator failed\n");
    107           break;
    108         }
    109         backAndForth(iter);
    110         ucol_closeElements(iter);
    111       }
    112     }
    113   }
    114 
    115   ucol_close(coll);
    116 
    117 
    118   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
    119   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
    120 
    121   if(U_SUCCESS(status)) {
    122     size = UPRV_LENGTHOF(cnt2);
    123     for(i = 0; i < size-1; i++) {
    124       for(j = i+1; j < size; j++) {
    125         UCollationElements *iter;
    126         u_uastrcpy(t1, cnt2[i]);
    127         u_uastrcpy(t2, cnt2[j]);
    128         doTest(coll, t1, t2, UCOL_LESS);
    129 
    130         /* synwee : added collation element iterator test */
    131         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    132         if (U_FAILURE(status)) {
    133           log_err("Creation of iterator failed\n");
    134           break;
    135         }
    136         backAndForth(iter);
    137         ucol_closeElements(iter);
    138       }
    139     }
    140   }
    141 
    142   ucol_close(coll);
    143 
    144 
    145 }
    146 
    147 const static char shifted[][20] = {
    148   "black bird",
    149   "black-bird",
    150   "blackbird",
    151   "black Bird",
    152   "black-Bird",
    153   "blackBird",
    154   "black birds",
    155   "black-birds",
    156   "blackbirds"
    157 };
    158 
    159 const static UCollationResult shiftedTert[] = {
    160   UCOL_EQUAL,
    161   UCOL_EQUAL,
    162   UCOL_EQUAL,
    163   UCOL_LESS,
    164   UCOL_EQUAL,
    165   UCOL_EQUAL,
    166   UCOL_LESS,
    167   UCOL_EQUAL,
    168   UCOL_EQUAL
    169 };
    170 
    171 const static char nonignorable[][20] = {
    172   "black bird",
    173   "black Bird",
    174   "black birds",
    175   "black-bird",
    176   "black-Bird",
    177   "black-birds",
    178   "blackbird",
    179   "blackBird",
    180   "blackbirds"
    181 };
    182 
    183 static void BlackBirdTest(void) {
    184   UErrorCode status = U_ZERO_ERROR;
    185   UChar t1[90];
    186   UChar t2[90];
    187 
    188   uint32_t i = 0, j = 0;
    189   uint32_t size = 0;
    190   UCollator *coll = ucol_open("en_US", &status);
    191 
    192   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
    193   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
    194 
    195   if(U_SUCCESS(status)) {
    196     size = UPRV_LENGTHOF(nonignorable);
    197     for(i = 0; i < size-1; i++) {
    198       for(j = i+1; j < size; j++) {
    199         u_uastrcpy(t1, nonignorable[i]);
    200         u_uastrcpy(t2, nonignorable[j]);
    201         doTest(coll, t1, t2, UCOL_LESS);
    202       }
    203     }
    204   }
    205 
    206   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
    207   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
    208 
    209   if(U_SUCCESS(status)) {
    210     size = UPRV_LENGTHOF(shifted);
    211     for(i = 0; i < size-1; i++) {
    212       for(j = i+1; j < size; j++) {
    213         u_uastrcpy(t1, shifted[i]);
    214         u_uastrcpy(t2, shifted[j]);
    215         doTest(coll, t1, t2, UCOL_LESS);
    216       }
    217     }
    218   }
    219 
    220   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
    221   if(U_SUCCESS(status)) {
    222     size = UPRV_LENGTHOF(shifted);
    223     for(i = 1; i < size; i++) {
    224       u_uastrcpy(t1, shifted[i-1]);
    225       u_uastrcpy(t2, shifted[i]);
    226       doTest(coll, t1, t2, shiftedTert[i]);
    227     }
    228   }
    229 
    230   ucol_close(coll);
    231 }
    232 
    233 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
    234     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
    235     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
    236     {0x0041/*'A'*/, 0x0300, 0x0000},
    237     {0x00C0, 0x0301, 0x0000},
    238     /* this would work with forced normalization */
    239     {0x00C0, 0x0316, 0x0000}
    240 };
    241 
    242 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
    243     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    244     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
    245     {0x00C0, 0},
    246     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    247     /* this would work with forced normalization */
    248     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
    249 };
    250 
    251 const static UCollationResult results[] = {
    252     UCOL_GREATER,
    253     UCOL_EQUAL,
    254     UCOL_EQUAL,
    255     UCOL_GREATER,
    256     UCOL_EQUAL
    257 };
    258 
    259 static void FunkyATest(void)
    260 {
    261 
    262     int32_t i;
    263     UErrorCode status = U_ZERO_ERROR;
    264     UCollator  *myCollation;
    265     myCollation = ucol_open("en_US", &status);
    266     if(U_FAILURE(status)){
    267         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    268         return;
    269     }
    270     log_verbose("Testing some A letters, for some reason\n");
    271     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    272     ucol_setStrength(myCollation, UCOL_TERTIARY);
    273     for (i = 0; i < 4 ; i++)
    274     {
    275         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    276     }
    277     ucol_close(myCollation);
    278 }
    279 
    280 UColAttributeValue caseFirst[] = {
    281     UCOL_OFF,
    282     UCOL_LOWER_FIRST,
    283     UCOL_UPPER_FIRST
    284 };
    285 
    286 
    287 UColAttributeValue alternateHandling[] = {
    288     UCOL_NON_IGNORABLE,
    289     UCOL_SHIFTED
    290 };
    291 
    292 UColAttributeValue caseLevel[] = {
    293     UCOL_OFF,
    294     UCOL_ON
    295 };
    296 
    297 UColAttributeValue strengths[] = {
    298     UCOL_PRIMARY,
    299     UCOL_SECONDARY,
    300     UCOL_TERTIARY,
    301     UCOL_QUATERNARY,
    302     UCOL_IDENTICAL
    303 };
    304 
    305 #if 0
    306 static const char * strengthsC[] = {
    307     "UCOL_PRIMARY",
    308     "UCOL_SECONDARY",
    309     "UCOL_TERTIARY",
    310     "UCOL_QUATERNARY",
    311     "UCOL_IDENTICAL"
    312 };
    313 
    314 static const char * caseFirstC[] = {
    315     "UCOL_OFF",
    316     "UCOL_LOWER_FIRST",
    317     "UCOL_UPPER_FIRST"
    318 };
    319 
    320 
    321 static const char * alternateHandlingC[] = {
    322     "UCOL_NON_IGNORABLE",
    323     "UCOL_SHIFTED"
    324 };
    325 
    326 static const char * caseLevelC[] = {
    327     "UCOL_OFF",
    328     "UCOL_ON"
    329 };
    330 
    331 /* not used currently - does not test only prints */
    332 static void PrintMarkDavis(void)
    333 {
    334   UErrorCode status = U_ZERO_ERROR;
    335   UChar m[256];
    336   uint8_t sortkey[256];
    337   UCollator *coll = ucol_open("en_US", &status);
    338   uint32_t h,i,j,k, sortkeysize;
    339   uint32_t sizem = 0;
    340   char buffer[512];
    341   uint32_t len = 512;
    342 
    343   log_verbose("PrintMarkDavis");
    344 
    345   u_uastrcpy(m, "Mark Davis");
    346   sizem = u_strlen(m);
    347 
    348 
    349   m[1] = 0xe4;
    350 
    351   for(i = 0; i<sizem; i++) {
    352     fprintf(stderr, "\\u%04X ", m[i]);
    353   }
    354   fprintf(stderr, "\n");
    355 
    356   for(h = 0; h<UPRV_LENGTHOF(caseFirst); h++) {
    357     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
    358     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
    359 
    360     for(i = 0; i<UPRV_LENGTHOF(alternateHandling); i++) {
    361       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
    362       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
    363 
    364       for(j = 0; j<UPRV_LENGTHOF(caseLevel); j++) {
    365         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
    366         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
    367 
    368         for(k = 0; k<UPRV_LENGTHOF(strengths); k++) {
    369           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
    370           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
    371           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
    372           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
    373         }
    374 
    375       }
    376 
    377     }
    378 
    379   }
    380 }
    381 #endif
    382 
    383 static void BillFairmanTest(void) {
    384 /*
    385 ** check for actual locale via ICU resource bundles
    386 **
    387 ** lp points to the original locale ("fr_FR_....")
    388 */
    389 
    390     UResourceBundle *lr,*cr;
    391     UErrorCode              lec = U_ZERO_ERROR;
    392     const char *lp = "fr_FR_you_ll_never_find_this_locale";
    393 
    394     log_verbose("BillFairmanTest\n");
    395 
    396     lr = ures_open(NULL,lp,&lec);
    397     if (lr) {
    398         cr = ures_getByKey(lr,"collations",0,&lec);
    399         if (cr) {
    400             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
    401             if (lp) {
    402                 if (U_SUCCESS(lec)) {
    403                     if(strcmp(lp, "fr") != 0) {
    404                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
    405                     }
    406                 }
    407             }
    408             ures_close(cr);
    409         }
    410         ures_close(lr);
    411     }
    412 }
    413 
    414 const static char chTest[][20] = {
    415   "c",
    416   "C",
    417   "ca", "cb", "cx", "cy", "CZ",
    418   "c\\u030C", "C\\u030C",
    419   "h",
    420   "H",
    421   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
    422   "ch", "cH", "Ch", "CH",
    423   "cha", "charly", "che", "chh", "chch", "chr",
    424   "i", "I", "iarly",
    425   "r", "R",
    426   "r\\u030C", "R\\u030C",
    427   "s",
    428   "S",
    429   "s\\u030C", "S\\u030C",
    430   "z", "Z",
    431   "z\\u030C", "Z\\u030C"
    432 };
    433 
    434 static void TestChMove(void) {
    435     UChar t1[256] = {0};
    436     UChar t2[256] = {0};
    437 
    438     uint32_t i = 0, j = 0;
    439     uint32_t size = 0;
    440     UErrorCode status = U_ZERO_ERROR;
    441 
    442     UCollator *coll = ucol_open("cs", &status);
    443 
    444     if(U_SUCCESS(status)) {
    445         size = UPRV_LENGTHOF(chTest);
    446         for(i = 0; i < size-1; i++) {
    447             for(j = i+1; j < size; j++) {
    448                 u_unescape(chTest[i], t1, 256);
    449                 u_unescape(chTest[j], t2, 256);
    450                 doTest(coll, t1, t2, UCOL_LESS);
    451             }
    452         }
    453     }
    454     else {
    455         log_data_err("Can't open collator");
    456     }
    457     ucol_close(coll);
    458 }
    459 
    460 
    461 
    462 
    463 /*
    464 const static char impTest[][20] = {
    465   "\\u4e00",
    466     "a",
    467     "A",
    468     "b",
    469     "B",
    470     "\\u4e01"
    471 };
    472 */
    473 
    474 
    475 static void TestImplicitTailoring(void) {
    476   static const struct {
    477     const char *rules;
    478     const char *data[10];
    479     const uint32_t len;
    480   } tests[] = {
    481       {
    482         /* Tailor b and c before U+4E00. */
    483         "&[before 1]\\u4e00 < b < c "
    484         /* Now, before U+4E00 is c; put d and e after that. */
    485         "&[before 1]\\u4e00 < d < e",
    486         { "b", "c", "d", "e", "\\u4e00"}, 5 },
    487       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
    488       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
    489       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
    490   };
    491 
    492   int32_t i = 0;
    493 
    494   for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
    495       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
    496   }
    497 
    498 /*
    499   UChar t1[256] = {0};
    500   UChar t2[256] = {0};
    501 
    502   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
    503 
    504   uint32_t i = 0, j = 0;
    505   uint32_t size = 0;
    506   uint32_t ruleLen = 0;
    507   UErrorCode status = U_ZERO_ERROR;
    508   UCollator *coll = NULL;
    509   ruleLen = u_unescape(rule, t1, 256);
    510 
    511   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
    512 
    513   if(U_SUCCESS(status)) {
    514     size = UPRV_LENGTHOF(impTest);
    515     for(i = 0; i < size-1; i++) {
    516       for(j = i+1; j < size; j++) {
    517         u_unescape(impTest[i], t1, 256);
    518         u_unescape(impTest[j], t2, 256);
    519         doTest(coll, t1, t2, UCOL_LESS);
    520       }
    521     }
    522   }
    523   else {
    524     log_err("Can't open collator");
    525   }
    526   ucol_close(coll);
    527   */
    528 }
    529 
    530 static void TestFCDProblem(void) {
    531   UChar t1[256] = {0};
    532   UChar t2[256] = {0};
    533 
    534   const char *s1 = "\\u0430\\u0306\\u0325";
    535   const char *s2 = "\\u04D1\\u0325";
    536 
    537   UErrorCode status = U_ZERO_ERROR;
    538   UCollator *coll = ucol_open("", &status);
    539   u_unescape(s1, t1, 256);
    540   u_unescape(s2, t2, 256);
    541 
    542   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
    543   doTest(coll, t1, t2, UCOL_EQUAL);
    544 
    545   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    546   doTest(coll, t1, t2, UCOL_EQUAL);
    547 
    548   ucol_close(coll);
    549 }
    550 
    551 /*
    552 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
    553 We're only using NFC/NFD in this test.
    554 */
    555 #define NORM_BUFFER_TEST_LEN 18
    556 typedef struct {
    557   UChar32 u;
    558   UChar NFC[NORM_BUFFER_TEST_LEN];
    559   UChar NFD[NORM_BUFFER_TEST_LEN];
    560 } tester;
    561 
    562 static void TestComposeDecompose(void) {
    563     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
    564     static const UChar UNICODESET_STR[] = {
    565         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
    566         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
    567         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
    568     };
    569     int32_t noOfLoc;
    570     int32_t i = 0, j = 0;
    571 
    572     UErrorCode status = U_ZERO_ERROR;
    573     const char *locName = NULL;
    574     uint32_t nfcSize;
    575     uint32_t nfdSize;
    576     tester **t;
    577     uint32_t noCases = 0;
    578     UCollator *coll = NULL;
    579     UChar32 u = 0;
    580     UChar comp[NORM_BUFFER_TEST_LEN];
    581     uint32_t len = 0;
    582     UCollationElements *iter;
    583     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
    584     int32_t charsToTestSize;
    585 
    586     noOfLoc = uloc_countAvailable();
    587 
    588     coll = ucol_open("", &status);
    589     if (U_FAILURE(status)) {
    590         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
    591         return;
    592     }
    593     charsToTestSize = uset_size(charsToTest);
    594     if (charsToTestSize <= 0) {
    595         log_err("Set was zero. Missing data?\n");
    596         return;
    597     }
    598     t = (tester **)malloc(charsToTestSize * sizeof(tester *));
    599     t[0] = (tester *)malloc(sizeof(tester));
    600     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
    601 
    602     for(u = 0; u < charsToTestSize; u++) {
    603         UChar32 ch = uset_charAt(charsToTest, u);
    604         len = 0;
    605         U16_APPEND_UNSAFE(comp, len, ch);
    606         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
    607         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
    608 
    609         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
    610           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
    611             t[noCases]->u = ch;
    612             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
    613                 u_strncpy(t[noCases]->NFC, comp, len);
    614                 t[noCases]->NFC[len] = 0;
    615             }
    616             noCases++;
    617             t[noCases] = (tester *)malloc(sizeof(tester));
    618             uprv_memset(t[noCases], 0, sizeof(tester));
    619         }
    620     }
    621     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
    622     uset_close(charsToTest);
    623     charsToTest = NULL;
    624 
    625     for(u=0; u<(UChar32)noCases; u++) {
    626         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
    627             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
    628             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
    629         }
    630     }
    631     /*
    632     for(u = 0; u < charsToTestSize; u++) {
    633       if(!(u&0xFFFF)) {
    634         log_verbose("%08X ", u);
    635       }
    636       uprv_memset(t[noCases], 0, sizeof(tester));
    637       t[noCases]->u = u;
    638       len = 0;
    639       U16_APPEND_UNSAFE(comp, len, u);
    640       comp[len] = 0;
    641       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
    642       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
    643       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
    644       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
    645     }
    646     */
    647 
    648     ucol_close(coll);
    649 
    650     log_verbose("Testing locales, number of cases = %i\n", noCases);
    651     for(i = 0; i<noOfLoc; i++) {
    652         status = U_ZERO_ERROR;
    653         locName = uloc_getAvailable(i);
    654         if(hasCollationElements(locName)) {
    655             char cName[256];
    656             UChar name[256];
    657             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
    658 
    659             for(j = 0; j<nameSize; j++) {
    660                 cName[j] = (char)name[j];
    661             }
    662             cName[nameSize] = 0;
    663             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
    664 
    665             coll = ucol_open(locName, &status);
    666             ucol_setStrength(coll, UCOL_IDENTICAL);
    667             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
    668 
    669             for(u=0; u<(UChar32)noCases; u++) {
    670                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
    671                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
    672                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
    673                     log_verbose("Testing NFC\n");
    674                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
    675                     backAndForth(iter);
    676                     log_verbose("Testing NFD\n");
    677                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
    678                     backAndForth(iter);
    679                 }
    680             }
    681             ucol_closeElements(iter);
    682             ucol_close(coll);
    683         }
    684     }
    685     for(u = 0; u <= (UChar32)noCases; u++) {
    686         free(t[u]);
    687     }
    688     free(t);
    689 }
    690 
    691 static void TestEmptyRule(void) {
    692   UErrorCode status = U_ZERO_ERROR;
    693   UChar rulez[] = { 0 };
    694   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
    695 
    696   ucol_close(coll);
    697 }
    698 
    699 static void TestUCARules(void) {
    700   UErrorCode status = U_ZERO_ERROR;
    701   UChar b[256];
    702   UChar *rules = b;
    703   uint32_t ruleLen = 0;
    704   UCollator *UCAfromRules = NULL;
    705   UCollator *coll = ucol_open("", &status);
    706   if(status == U_FILE_ACCESS_ERROR) {
    707     log_data_err("Is your data around?\n");
    708     return;
    709   } else if(U_FAILURE(status)) {
    710     log_err("Error opening collator\n");
    711     return;
    712   }
    713   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
    714 
    715   log_verbose("TestUCARules\n");
    716   if(ruleLen > 256) {
    717     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
    718     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
    719   }
    720   log_verbose("Rules length is %d\n", ruleLen);
    721   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
    722   if(U_SUCCESS(status)) {
    723     ucol_close(UCAfromRules);
    724   } else {
    725     log_verbose("Unable to create a collator from UCARules!\n");
    726   }
    727 /*
    728   u_unescape(blah, b, 256);
    729   ucol_getSortKey(coll, b, 1, res, 256);
    730 */
    731   ucol_close(coll);
    732   if(rules != b) {
    733     free(rules);
    734   }
    735 }
    736 
    737 
    738 /* Pinyin tonal order */
    739 /*
    740     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
    741           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
    742     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
    743     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
    744     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
    745     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
    746       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
    747 .. (\u00fc)
    748 
    749 However, in testing we got the following order:
    750     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
    751           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
    752     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
    753 .. (\u0113)
    754     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
    755     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
    756     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
    757 .. (\u01d8)
    758       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
    759 */
    760 
    761 static void TestBefore(void) {
    762   const static char *data[] = {
    763       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
    764       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
    765       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
    766       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
    767       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
    768       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
    769   };
    770   genericRulesStarter(
    771     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
    772     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
    773     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
    774     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
    775     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
    776     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
    777     data, UPRV_LENGTHOF(data));
    778 }
    779 
    780 #if 0
    781 /* superceded by TestBeforePinyin */
    782 static void TestJ784(void) {
    783   const static char *data[] = {
    784       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
    785       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
    786       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
    787       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
    788       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
    789       "\\u00fc",
    790            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
    791   };
    792   genericLocaleStarter("zh", data, UPRV_LENGTHOF(data));
    793 }
    794 #endif
    795 
    796 static void TestUpperCaseFirst(void) {
    797   const static char *data[] = {
    798     "I",
    799       "i",
    800       "Y",
    801       "y"
    802   };
    803   genericLocaleStarter("da", data, UPRV_LENGTHOF(data));
    804 }
    805 
    806 static void TestJ815(void) {
    807   const static char *data[] = {
    808     "aa",
    809       "Aa",
    810       "ab",
    811       "Ab",
    812       "ad",
    813       "Ad",
    814       "ae",
    815       "Ae",
    816       "\\u00e6",
    817       "\\u00c6",
    818       "af",
    819       "Af",
    820       "b",
    821       "B"
    822   };
    823   genericLocaleStarter("fr", data, UPRV_LENGTHOF(data));
    824   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, UPRV_LENGTHOF(data));
    825 }
    826 
    827 
    828 static void TestCase(void)
    829 {
    830     const static UChar gRules[MAX_TOKEN_LEN] =
    831     /*" & 0 < 1,\u2461<a,A"*/
    832     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
    833 
    834     const static UChar testCase[][MAX_TOKEN_LEN] =
    835     {
    836         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
    837         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
    838         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
    839         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
    840     };
    841 
    842     const static UCollationResult caseTestResults[][9] =
    843     {
    844         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
    845         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
    846         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
    847         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
    848     };
    849 
    850     const static UColAttributeValue caseTestAttributes[][2] =
    851     {
    852         { UCOL_LOWER_FIRST, UCOL_OFF},
    853         { UCOL_UPPER_FIRST, UCOL_OFF},
    854         { UCOL_LOWER_FIRST, UCOL_ON},
    855         { UCOL_UPPER_FIRST, UCOL_ON}
    856     };
    857     int32_t i,j,k;
    858     UErrorCode status = U_ZERO_ERROR;
    859     UCollationElements *iter;
    860     UCollator  *myCollation;
    861     myCollation = ucol_open("en_US", &status);
    862 
    863     if(U_FAILURE(status)){
    864         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    865         return;
    866     }
    867     log_verbose("Testing different case settings\n");
    868     ucol_setStrength(myCollation, UCOL_TERTIARY);
    869 
    870     for(k = 0; k<4; k++) {
    871       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
    872       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
    873       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
    874       for (i = 0; i < 3 ; i++) {
    875         for(j = i+1; j<4; j++) {
    876           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
    877         }
    878       }
    879     }
    880     ucol_close(myCollation);
    881 
    882     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
    883     if(U_FAILURE(status)){
    884         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    885         return;
    886     }
    887     log_verbose("Testing different case settings with custom rules\n");
    888     ucol_setStrength(myCollation, UCOL_TERTIARY);
    889 
    890     for(k = 0; k<4; k++) {
    891       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
    892       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
    893       for (i = 0; i < 3 ; i++) {
    894         for(j = i+1; j<4; j++) {
    895           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
    896           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
    897           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
    898           backAndForth(iter);
    899           ucol_closeElements(iter);
    900           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
    901           backAndForth(iter);
    902           ucol_closeElements(iter);
    903         }
    904       }
    905     }
    906     ucol_close(myCollation);
    907     {
    908       const static char *lowerFirst[] = {
    909         "h",
    910         "H",
    911         "ch",
    912         "Ch",
    913         "CH",
    914         "cha",
    915         "chA",
    916         "Cha",
    917         "ChA",
    918         "CHa",
    919         "CHA",
    920         "i",
    921         "I"
    922       };
    923 
    924       const static char *upperFirst[] = {
    925         "H",
    926         "h",
    927         "CH",
    928         "Ch",
    929         "ch",
    930         "CHA",
    931         "CHa",
    932         "ChA",
    933         "Cha",
    934         "chA",
    935         "cha",
    936         "I",
    937         "i"
    938       };
    939       log_verbose("mixed case test\n");
    940       log_verbose("lower first, case level off\n");
    941       genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
    942       log_verbose("upper first, case level off\n");
    943       genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
    944       log_verbose("lower first, case level on\n");
    945       genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
    946       log_verbose("upper first, case level on\n");
    947       genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
    948     }
    949 
    950 }
    951 
    952 static void TestIncrementalNormalize(void) {
    953 
    954     /*UChar baseA     =0x61;*/
    955     UChar baseA     =0x41;
    956 /*    UChar baseB     = 0x42;*/
    957     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
    958     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
    959     /*
    960         0x316 is combining grave accent below, cc=220
    961         0x321 is combining palatalized hook below, cc=202
    962         0x300 is combining grave accent, cc=230
    963     */
    964 
    965 #define MAXSLEN 2000
    966     /*int          maxSLen   = 64000;*/
    967     int          sLen;
    968     int          i;
    969 
    970     UCollator        *coll;
    971     UErrorCode       status = U_ZERO_ERROR;
    972     UCollationResult result;
    973 
    974     int32_t myQ = getTestOption(QUICK_OPTION);
    975 
    976     if(getTestOption(QUICK_OPTION) < 0) {
    977         setTestOption(QUICK_OPTION, 1);
    978     }
    979 
    980     {
    981         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
    982         /*          most buffers along the way.*/
    983         UChar            strA[MAXSLEN+1];
    984         UChar            strB[MAXSLEN+1];
    985 
    986         coll = ucol_open("en_US", &status);
    987         if(status == U_FILE_ACCESS_ERROR) {
    988           log_data_err("Is your data around?\n");
    989           return;
    990         } else if(U_FAILURE(status)) {
    991           log_err("Error opening collator\n");
    992           return;
    993         }
    994         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    995 
    996         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
    997         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
    998         /*for (sLen = 1000; sLen<1001; sLen++) {*/
    999         for (sLen = 500; sLen<501; sLen++) {
   1000         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
   1001             strA[0] = baseA;
   1002             strB[0] = baseA;
   1003             for (i=1; i<=sLen-1; i++) {
   1004                 strA[i] = ccMix[i % 3];
   1005                 strB[sLen-i] = ccMix[i % 3];
   1006             }
   1007             strA[sLen]   = 0;
   1008             strB[sLen]   = 0;
   1009 
   1010             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
   1011             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
   1012             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
   1013             doTest(coll, strA, strB, UCOL_EQUAL);
   1014         }
   1015     }
   1016 
   1017     setTestOption(QUICK_OPTION, myQ);
   1018 
   1019 
   1020     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
   1021     /*         of the string.  Checks a couple of edge cases.*/
   1022 
   1023     {
   1024         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
   1025         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
   1026         ucol_setStrength(coll, UCOL_TERTIARY);
   1027         doTest(coll, strA, strB, UCOL_EQUAL);
   1028     }
   1029 
   1030     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
   1031 
   1032     {
   1033       /* New UCA  3.1.1.
   1034        * test below used a code point from Desseret, which sorts differently
   1035        * than d800 dc00
   1036        */
   1037         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
   1038         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
   1039         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
   1040         ucol_setStrength(coll, UCOL_TERTIARY);
   1041         doTest(coll, strA, strB, UCOL_GREATER);
   1042     }
   1043 
   1044     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
   1045 
   1046     {
   1047         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
   1048         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
   1049         char  sortKeyA[50];
   1050         char  sortKeyAz[50];
   1051         char  sortKeyB[50];
   1052         char  sortKeyBz[50];
   1053         int   r;
   1054 
   1055         /* there used to be -3 here. Hmmmm.... */
   1056         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
   1057         result = ucol_strcoll(coll, strA, 3, strB, 3);
   1058         if (result != UCOL_GREATER) {
   1059             log_err("ERROR 1 in test 4\n");
   1060         }
   1061         result = ucol_strcoll(coll, strA, -1, strB, -1);
   1062         if (result != UCOL_EQUAL) {
   1063             log_err("ERROR 2 in test 4\n");
   1064         }
   1065 
   1066         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   1067         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   1068         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   1069         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   1070 
   1071         r = strcmp(sortKeyA, sortKeyAz);
   1072         if (r <= 0) {
   1073             log_err("Error 3 in test 4\n");
   1074         }
   1075         r = strcmp(sortKeyA, sortKeyB);
   1076         if (r <= 0) {
   1077             log_err("Error 4 in test 4\n");
   1078         }
   1079         r = strcmp(sortKeyAz, sortKeyBz);
   1080         if (r != 0) {
   1081             log_err("Error 5 in test 4\n");
   1082         }
   1083 
   1084         ucol_setStrength(coll, UCOL_IDENTICAL);
   1085         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   1086         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   1087         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   1088         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   1089 
   1090         r = strcmp(sortKeyA, sortKeyAz);
   1091         if (r <= 0) {
   1092             log_err("Error 6 in test 4\n");
   1093         }
   1094         r = strcmp(sortKeyA, sortKeyB);
   1095         if (r <= 0) {
   1096             log_err("Error 7 in test 4\n");
   1097         }
   1098         r = strcmp(sortKeyAz, sortKeyBz);
   1099         if (r != 0) {
   1100             log_err("Error 8 in test 4\n");
   1101         }
   1102         ucol_setStrength(coll, UCOL_TERTIARY);
   1103     }
   1104 
   1105 
   1106     /*  Test 5:  Null characters in non-normal source strings.*/
   1107 
   1108     {
   1109         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
   1110         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
   1111         char  sortKeyA[50];
   1112         char  sortKeyAz[50];
   1113         char  sortKeyB[50];
   1114         char  sortKeyBz[50];
   1115         int   r;
   1116 
   1117         result = ucol_strcoll(coll, strA, 6, strB, 6);
   1118         if (result != UCOL_GREATER) {
   1119             log_err("ERROR 1 in test 5\n");
   1120         }
   1121         result = ucol_strcoll(coll, strA, -1, strB, -1);
   1122         if (result != UCOL_EQUAL) {
   1123             log_err("ERROR 2 in test 5\n");
   1124         }
   1125 
   1126         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   1127         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   1128         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   1129         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   1130 
   1131         r = strcmp(sortKeyA, sortKeyAz);
   1132         if (r <= 0) {
   1133             log_err("Error 3 in test 5\n");
   1134         }
   1135         r = strcmp(sortKeyA, sortKeyB);
   1136         if (r <= 0) {
   1137             log_err("Error 4 in test 5\n");
   1138         }
   1139         r = strcmp(sortKeyAz, sortKeyBz);
   1140         if (r != 0) {
   1141             log_err("Error 5 in test 5\n");
   1142         }
   1143 
   1144         ucol_setStrength(coll, UCOL_IDENTICAL);
   1145         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   1146         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   1147         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   1148         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   1149 
   1150         r = strcmp(sortKeyA, sortKeyAz);
   1151         if (r <= 0) {
   1152             log_err("Error 6 in test 5\n");
   1153         }
   1154         r = strcmp(sortKeyA, sortKeyB);
   1155         if (r <= 0) {
   1156             log_err("Error 7 in test 5\n");
   1157         }
   1158         r = strcmp(sortKeyAz, sortKeyBz);
   1159         if (r != 0) {
   1160             log_err("Error 8 in test 5\n");
   1161         }
   1162         ucol_setStrength(coll, UCOL_TERTIARY);
   1163     }
   1164 
   1165 
   1166     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
   1167 
   1168     {
   1169         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
   1170         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
   1171 
   1172         result = ucol_strcoll(coll, strA, 5, strB, 5);
   1173         if (result != UCOL_LESS) {
   1174             log_err("Error 1 in test 6\n");
   1175         }
   1176         result = ucol_strcoll(coll, strA, -1, strB, -1);
   1177         if (result != UCOL_EQUAL) {
   1178             log_err("Error 2 in test 6\n");
   1179         }
   1180     }
   1181 
   1182     ucol_close(coll);
   1183 }
   1184 
   1185 
   1186 
   1187 #if 0
   1188 static void TestGetCaseBit(void) {
   1189   static const char *caseBitData[] = {
   1190     "a", "A", "ch", "Ch", "CH",
   1191       "\\uFF9E", "\\u0009"
   1192   };
   1193 
   1194   static const uint8_t results[] = {
   1195     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
   1196       UCOL_UPPER_CASE, UCOL_LOWER_CASE
   1197   };
   1198 
   1199   uint32_t i, blen = 0;
   1200   UChar b[256] = {0};
   1201   UErrorCode status = U_ZERO_ERROR;
   1202   UCollator *UCA = ucol_open("", &status);
   1203   uint8_t res = 0;
   1204 
   1205   for(i = 0; i<UPRV_LENGTHOF(results); i++) {
   1206     blen = u_unescape(caseBitData[i], b, 256);
   1207     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
   1208     if(results[i] != res) {
   1209       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
   1210     }
   1211   }
   1212 }
   1213 #endif
   1214 
   1215 static void TestHangulTailoring(void) {
   1216     static const char *koreanData[] = {
   1217         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
   1218             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
   1219             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
   1220             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
   1221             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
   1222             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
   1223     };
   1224 
   1225     const char *rules =
   1226         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
   1227         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
   1228         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
   1229         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
   1230         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
   1231         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
   1232 
   1233 
   1234   UErrorCode status = U_ZERO_ERROR;
   1235   UChar rlz[2048] = { 0 };
   1236   uint32_t rlen = u_unescape(rules, rlz, 2048);
   1237 
   1238   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   1239   if(status == U_FILE_ACCESS_ERROR) {
   1240     log_data_err("Is your data around?\n");
   1241     return;
   1242   } else if(U_FAILURE(status)) {
   1243     log_err("Error opening collator\n");
   1244     return;
   1245   }
   1246 
   1247   log_verbose("Using start of korean rules\n");
   1248 
   1249   if(U_SUCCESS(status)) {
   1250     genericOrderingTest(coll, koreanData, UPRV_LENGTHOF(koreanData));
   1251   } else {
   1252     log_err("Unable to open collator with rules %s\n", rules);
   1253   }
   1254 
   1255   ucol_close(coll);
   1256 
   1257   log_verbose("Using ko__LOTUS locale\n");
   1258   genericLocaleStarter("ko__LOTUS", koreanData, UPRV_LENGTHOF(koreanData));
   1259 }
   1260 
   1261 /*
   1262  * The secondary/tertiary compression middle byte
   1263  * as used by the current implementation.
   1264  * Subject to change as the sort key compression changes.
   1265  * See class CollationKeys.
   1266  */
   1267 enum {
   1268     SEC_COMMON_MIDDLE = 0x25,  /* range 05..45 */
   1269     TER_ONLY_COMMON_MIDDLE = 0x65  /* range 05..C5 */
   1270 };
   1271 
   1272 static void TestCompressOverlap(void) {
   1273     UChar       secstr[150];
   1274     UChar       tertstr[150];
   1275     UErrorCode  status = U_ZERO_ERROR;
   1276     UCollator  *coll;
   1277     uint8_t     result[500];
   1278     uint32_t    resultlen;
   1279     int         count = 0;
   1280     uint8_t    *tempptr;
   1281 
   1282     coll = ucol_open("", &status);
   1283 
   1284     if (U_FAILURE(status)) {
   1285         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
   1286         return;
   1287     }
   1288     while (count < 149) {
   1289         secstr[count] = 0x0020; /* [06, 05, 05] */
   1290         tertstr[count] = 0x0020;
   1291         count ++;
   1292     }
   1293 
   1294     /* top down compression ----------------------------------- */
   1295     secstr[count] = 0x0332; /* [, 87, 05] */
   1296     tertstr[count] = 0x3000; /* [06, 05, 07] */
   1297 
   1298     /* no compression secstr should have 150 secondary bytes, tertstr should
   1299     have 150 tertiary bytes.
   1300     with correct compression, secstr should have 6 secondary
   1301     bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
   1302     resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
   1303     (void)resultlen;    /* Suppress set but not used warning. */
   1304     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
   1305     while (*(tempptr + 1) != 1) {
   1306         /* the last secondary collation element is not checked since it is not
   1307         part of the compression */
   1308         if (*tempptr < SEC_COMMON_MIDDLE) {
   1309             log_err("Secondary top down compression overlapped\n");
   1310         }
   1311         tempptr ++;
   1312     }
   1313 
   1314     /* tertiary top/bottom/common for en_US is similar to the secondary
   1315     top/bottom/common */
   1316     resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
   1317     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
   1318     while (*(tempptr + 1) != 0) {
   1319         /* the last secondary collation element is not checked since it is not
   1320         part of the compression */
   1321         if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
   1322             log_err("Tertiary top down compression overlapped\n");
   1323         }
   1324         tempptr ++;
   1325     }
   1326 
   1327     /* bottom up compression ------------------------------------- */
   1328     secstr[count] = 0;
   1329     tertstr[count] = 0;
   1330     resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
   1331     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
   1332     while (*(tempptr + 1) != 1) {
   1333         /* the last secondary collation element is not checked since it is not
   1334         part of the compression */
   1335         if (*tempptr > SEC_COMMON_MIDDLE) {
   1336             log_err("Secondary bottom up compression overlapped\n");
   1337         }
   1338         tempptr ++;
   1339     }
   1340 
   1341     /* tertiary top/bottom/common for en_US is similar to the secondary
   1342     top/bottom/common */
   1343     resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
   1344     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
   1345     while (*(tempptr + 1) != 0) {
   1346         /* the last secondary collation element is not checked since it is not
   1347         part of the compression */
   1348         if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
   1349             log_err("Tertiary bottom up compression overlapped\n");
   1350         }
   1351         tempptr ++;
   1352     }
   1353 
   1354     ucol_close(coll);
   1355 }
   1356 
   1357 static void TestCyrillicTailoring(void) {
   1358   static const char *test[] = {
   1359     "\\u0410b",
   1360       "\\u0410\\u0306a",
   1361       "\\u04d0A"
   1362   };
   1363 
   1364     /* Russian overrides contractions, so this test is not valid anymore */
   1365     /*genericLocaleStarter("ru", test, 3);*/
   1366 
   1367     // Most of the following are commented out because UCA 8.0
   1368     // drops most of the Cyrillic contractions from the default order.
   1369     // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
   1370 
   1371     // genericLocaleStarter("root", test, 3);
   1372     // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
   1373     // genericRulesStarter("&Z < \\u0410", test, 3);
   1374     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
   1375     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
   1376     // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
   1377     // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
   1378 }
   1379 
   1380 static void TestSuppressContractions(void) {
   1381 
   1382   static const char *testNoCont2[] = {
   1383       "\\u0410\\u0302a",
   1384       "\\u0410\\u0306b",
   1385       "\\u0410c"
   1386   };
   1387   static const char *testNoCont[] = {
   1388       "a\\u0410",
   1389       "A\\u0410\\u0306",
   1390       "\\uFF21\\u0410\\u0302"
   1391   };
   1392 
   1393   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
   1394   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
   1395 }
   1396 
   1397 static void TestContraction(void) {
   1398     const static char *testrules[] = {
   1399         "&A = AB / B",
   1400         "&A = A\\u0306/\\u0306",
   1401         "&c = ch / h"
   1402     };
   1403     const static UChar testdata[][2] = {
   1404         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
   1405         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
   1406         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
   1407     };
   1408     const static UChar testdata2[][2] = {
   1409         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
   1410         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
   1411         {0x0063 /* 'c' */, 0x006C /* 'l' */}
   1412     };
   1413 #if 0
   1414     /*
   1415      * These pairs of rule strings are not guaranteed to yield the very same mappings.
   1416      * In fact, LDML 24 recommends an improved way of creating mappings
   1417      * which always yields different mappings for such pairs. See
   1418      * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
   1419      */
   1420     const static char *testrules3[] = {
   1421         "&z < xyz &xyzw << B",
   1422         "&z < xyz &xyz << B / w",
   1423         "&z < ch &achm << B",
   1424         "&z < ch &a << B / chm",
   1425         "&\\ud800\\udc00w << B",
   1426         "&\\ud800\\udc00 << B / w",
   1427         "&a\\ud800\\udc00m << B",
   1428         "&a << B / \\ud800\\udc00m",
   1429     };
   1430 #endif
   1431 
   1432     UErrorCode  status   = U_ZERO_ERROR;
   1433     UCollator  *coll;
   1434     UChar       rule[256] = {0};
   1435     uint32_t    rlen     = 0;
   1436     int         i;
   1437 
   1438     for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
   1439         UCollationElements *iter1;
   1440         int j = 0;
   1441         log_verbose("Rule %s for testing\n", testrules[i]);
   1442         rlen = u_unescape(testrules[i], rule, 32);
   1443         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1444         if (U_FAILURE(status)) {
   1445             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   1446             return;
   1447         }
   1448         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
   1449         if (U_FAILURE(status)) {
   1450             log_err("Collation iterator creation failed\n");
   1451             return;
   1452         }
   1453         while (j < 2) {
   1454             UCollationElements *iter2 = ucol_openElements(coll,
   1455                                                          &(testdata[i][j]),
   1456                                                          1, &status);
   1457             uint32_t ce;
   1458             if (U_FAILURE(status)) {
   1459                 log_err("Collation iterator creation failed\n");
   1460                 return;
   1461             }
   1462             ce = ucol_next(iter2, &status);
   1463             while (ce != UCOL_NULLORDER) {
   1464                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
   1465                     log_err("Collation elements in contraction split does not match\n");
   1466                     return;
   1467                 }
   1468                 ce = ucol_next(iter2, &status);
   1469             }
   1470             j ++;
   1471             ucol_closeElements(iter2);
   1472         }
   1473         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
   1474             log_err("Collation elements not exhausted\n");
   1475             return;
   1476         }
   1477         ucol_closeElements(iter1);
   1478         ucol_close(coll);
   1479     }
   1480 
   1481     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
   1482     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1483     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
   1484         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   1485                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
   1486                 testdata2[1][1]);
   1487         return;
   1488     }
   1489     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
   1490         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   1491                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
   1492                 testdata2[2][1]);
   1493         return;
   1494     }
   1495     ucol_close(coll);
   1496 #if 0  /* see above */
   1497     for (i = 0; i < UPRV_LENGTHOF(testrules3); i += 2) {
   1498         log_verbose("testrules3 i==%d  \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
   1499         UCollator          *coll1,
   1500                            *coll2;
   1501         UCollationElements *iter1,
   1502                            *iter2;
   1503         UChar               ch = 0x0042 /* 'B' */;
   1504         uint32_t            ce;
   1505         rlen = u_unescape(testrules3[i], rule, 32);
   1506         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1507         rlen = u_unescape(testrules3[i + 1], rule, 32);
   1508         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1509         if (U_FAILURE(status)) {
   1510             log_err("Collator creation failed %s\n", testrules[i]);
   1511             return;
   1512         }
   1513         iter1 = ucol_openElements(coll1, &ch, 1, &status);
   1514         iter2 = ucol_openElements(coll2, &ch, 1, &status);
   1515         if (U_FAILURE(status)) {
   1516             log_err("Collation iterator creation failed\n");
   1517             return;
   1518         }
   1519         ce = ucol_next(iter1, &status);
   1520         if (U_FAILURE(status)) {
   1521             log_err("Retrieving ces failed\n");
   1522             return;
   1523         }
   1524         while (ce != UCOL_NULLORDER) {
   1525             uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
   1526             if (ce == ce2) {
   1527                 log_verbose("CEs match: %08x\n", ce);
   1528             } else {
   1529                 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
   1530                 return;
   1531             }
   1532             ce = ucol_next(iter1, &status);
   1533             if (U_FAILURE(status)) {
   1534                 log_err("Retrieving ces failed\n");
   1535                 return;
   1536             }
   1537         }
   1538         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
   1539             log_err("CEs not exhausted\n");
   1540             return;
   1541         }
   1542         ucol_closeElements(iter1);
   1543         ucol_closeElements(iter2);
   1544         ucol_close(coll1);
   1545         ucol_close(coll2);
   1546     }
   1547 #endif
   1548 }
   1549 
   1550 static void TestExpansion(void) {
   1551     const static char *testrules[] = {
   1552 #if 0
   1553         /*
   1554          * This seems to have tested that M was not mapped to an expansion.
   1555          * I believe the old builder just did that because it computed the extension CEs
   1556          * at the very end, which was a bug.
   1557          * Among other problems, it violated the core tailoring principle
   1558          * by making an earlier rule depend on a later one.
   1559          * And, of course, if M did not get an expansion, then it was primary different from K,
   1560          * unlike what the rule &K<<M says.
   1561          */
   1562         "&J << K / B & K << M",
   1563 #endif
   1564         "&J << K / B << M"
   1565     };
   1566     const static UChar testdata[][3] = {
   1567         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
   1568         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
   1569         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
   1570         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
   1571         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
   1572         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
   1573     };
   1574 
   1575     UErrorCode  status   = U_ZERO_ERROR;
   1576     UCollator  *coll;
   1577     UChar       rule[256] = {0};
   1578     uint32_t    rlen     = 0;
   1579     int         i;
   1580 
   1581     for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
   1582         int j = 0;
   1583         log_verbose("Rule %s for testing\n", testrules[i]);
   1584         rlen = u_unescape(testrules[i], rule, 32);
   1585         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1586         if (U_FAILURE(status)) {
   1587             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   1588             return;
   1589         }
   1590 
   1591         for (j = 0; j < 5; j ++) {
   1592             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
   1593         }
   1594         ucol_close(coll);
   1595     }
   1596 }
   1597 
   1598 #if 0
   1599 /* this test tests the current limitations of the engine */
   1600 /* it always fail, so it is disabled by default */
   1601 static void TestLimitations(void) {
   1602   /* recursive expansions */
   1603   {
   1604     static const char *rule = "&a=b/c&d=c/e";
   1605     static const char *tlimit01[] = {"add","b","adf"};
   1606     static const char *tlimit02[] = {"aa","b","af"};
   1607     log_verbose("recursive expansions\n");
   1608     genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
   1609     genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
   1610   }
   1611   /* contractions spanning expansions */
   1612   {
   1613     static const char *rule = "&a<<<c/e&g<<<eh";
   1614     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
   1615     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
   1616     log_verbose("contractions spanning expansions\n");
   1617     genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
   1618     genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
   1619   }
   1620   /* normalization: nulls in contractions */
   1621   {
   1622     static const char *rule = "&a<<<\\u0000\\u0302";
   1623     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   1624     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   1625     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   1626     static const UColAttributeValue valOn[] = { UCOL_ON };
   1627     static const UColAttributeValue valOff[] = { UCOL_OFF };
   1628 
   1629     log_verbose("NULL in contractions\n");
   1630     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   1631     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   1632     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   1633     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   1634 
   1635   }
   1636   /* normalization: contractions spanning normalization */
   1637   {
   1638     static const char *rule = "&a<<<\\u0000\\u0302";
   1639     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   1640     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   1641     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   1642     static const UColAttributeValue valOn[] = { UCOL_ON };
   1643     static const UColAttributeValue valOff[] = { UCOL_OFF };
   1644 
   1645     log_verbose("contractions spanning normalization\n");
   1646     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   1647     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   1648     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   1649     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   1650 
   1651   }
   1652   /* variable top:  */
   1653   {
   1654     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
   1655     static const char *rule = "&\\u2010<x<[variable top]=z";
   1656     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
   1657     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
   1658     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
   1659     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
   1660     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
   1661     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
   1662     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
   1663 
   1664     log_verbose("variable top\n");
   1665     genericRulesStarterWithOptions(rule, tlimit03, UPRV_LENGTHOF(tlimit03), att, valOn, UPRV_LENGTHOF(att));
   1666     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
   1667     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
   1668     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));
   1669     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));
   1670 
   1671   }
   1672   /* case level */
   1673   {
   1674     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
   1675     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
   1676     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
   1677     static const UColAttribute att[] = { UCOL_CASE_FIRST};
   1678     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
   1679     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
   1680     log_verbose("case level\n");
   1681     genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
   1682     genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
   1683     /*genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));*/
   1684     /*genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));*/
   1685   }
   1686 
   1687 }
   1688 #endif
   1689 
   1690 static void TestBocsuCoverage(void) {
   1691   UErrorCode status = U_ZERO_ERROR;
   1692   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
   1693   UChar       test[256] = {0};
   1694   uint32_t    tlen     = u_unescape(testString, test, 32);
   1695   uint8_t key[256]     = {0};
   1696   uint32_t klen         = 0;
   1697 
   1698   UCollator *coll = ucol_open("", &status);
   1699   if(U_SUCCESS(status)) {
   1700   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
   1701 
   1702   klen = ucol_getSortKey(coll, test, tlen, key, 256);
   1703   (void)klen;    /* Suppress set but not used warning. */
   1704 
   1705   ucol_close(coll);
   1706   } else {
   1707     log_data_err("Couldn't open UCA\n");
   1708   }
   1709 }
   1710 
   1711 static void TestVariableTopSetting(void) {
   1712   UErrorCode status = U_ZERO_ERROR;
   1713   uint32_t varTopOriginal = 0, varTop1, varTop2;
   1714   UCollator *coll = ucol_open("", &status);
   1715   if(U_SUCCESS(status)) {
   1716 
   1717   static const UChar nul = 0;
   1718   static const UChar space = 0x20;
   1719   static const UChar dot = 0x2e;  /* punctuation */
   1720   static const UChar degree = 0xb0;  /* symbol */
   1721   static const UChar dollar = 0x24;  /* currency symbol */
   1722   static const UChar zero = 0x30;  /* digit */
   1723 
   1724   varTopOriginal = ucol_getVariableTop(coll, &status);
   1725   log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
   1726   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   1727 
   1728   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
   1729   varTop2 = ucol_getVariableTop(coll, &status);
   1730   log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
   1731   if(U_FAILURE(status) || varTop1 != varTop2 ||
   1732       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1733       ucol_equal(coll, &nul, 0, &dot, 1) ||
   1734       ucol_equal(coll, &nul, 0, &degree, 1) ||
   1735       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1736       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1737       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
   1738     log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
   1739   }
   1740 
   1741   varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
   1742   varTop2 = ucol_getVariableTop(coll, &status);
   1743   log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
   1744   if(U_FAILURE(status) || varTop1 != varTop2 ||
   1745       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1746       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1747       ucol_equal(coll, &nul, 0, &degree, 1) ||
   1748       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1749       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1750       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
   1751     log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
   1752   }
   1753 
   1754   varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
   1755   varTop2 = ucol_getVariableTop(coll, &status);
   1756   log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
   1757   if(U_FAILURE(status) || varTop1 != varTop2 ||
   1758       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1759       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1760       !ucol_equal(coll, &nul, 0, &degree, 1) ||
   1761       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1762       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1763       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
   1764     log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
   1765   }
   1766 
   1767   varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
   1768   varTop2 = ucol_getVariableTop(coll, &status);
   1769   log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
   1770   if(U_FAILURE(status) || varTop1 != varTop2 ||
   1771       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1772       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1773       !ucol_equal(coll, &nul, 0, &degree, 1) ||
   1774       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1775       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1776       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
   1777     log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
   1778   }
   1779 
   1780   log_verbose("Testing setting variable top to contractions\n");
   1781   {
   1782     UChar first[4] = { 0 };
   1783     first[0] = 0x0040;
   1784     first[1] = 0x0050;
   1785     first[2] = 0x0000;
   1786 
   1787     status = U_ZERO_ERROR;
   1788     ucol_setVariableTop(coll, first, -1, &status);
   1789 
   1790     if(U_SUCCESS(status)) {
   1791       log_err("Invalid contraction succeded in setting variable top!\n");
   1792     }
   1793 
   1794   }
   1795 
   1796   log_verbose("Test restoring variable top\n");
   1797 
   1798   status = U_ZERO_ERROR;
   1799   ucol_restoreVariableTop(coll, varTopOriginal, &status);
   1800   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
   1801     log_err("Couldn't restore old variable top\n");
   1802   }
   1803 
   1804   log_verbose("Testing calling with error set\n");
   1805 
   1806   status = U_INTERNAL_PROGRAM_ERROR;
   1807   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
   1808   varTop2 = ucol_getVariableTop(coll, &status);
   1809   ucol_restoreVariableTop(coll, varTop2, &status);
   1810   varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
   1811   varTop2 = ucol_getVariableTop(NULL, &status);
   1812   ucol_restoreVariableTop(NULL, varTop2, &status);
   1813   if(status != U_INTERNAL_PROGRAM_ERROR) {
   1814     log_err("Bad reaction to passed error!\n");
   1815   }
   1816   ucol_close(coll);
   1817   } else {
   1818     log_data_err("Couldn't open UCA collator\n");
   1819   }
   1820 }
   1821 
   1822 static void TestMaxVariable() {
   1823   UErrorCode status = U_ZERO_ERROR;
   1824   UColReorderCode oldMax, max;
   1825   UCollator *coll;
   1826 
   1827   static const UChar nul = 0;
   1828   static const UChar space = 0x20;
   1829   static const UChar dot = 0x2e;  /* punctuation */
   1830   static const UChar degree = 0xb0;  /* symbol */
   1831   static const UChar dollar = 0x24;  /* currency symbol */
   1832   static const UChar zero = 0x30;  /* digit */
   1833 
   1834   coll = ucol_open("", &status);
   1835   if(U_FAILURE(status)) {
   1836     log_data_err("Couldn't open root collator\n");
   1837     return;
   1838   }
   1839 
   1840   oldMax = ucol_getMaxVariable(coll);
   1841   log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
   1842   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   1843 
   1844   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
   1845   max = ucol_getMaxVariable(coll);
   1846   log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
   1847   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
   1848       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1849       ucol_equal(coll, &nul, 0, &dot, 1) ||
   1850       ucol_equal(coll, &nul, 0, &degree, 1) ||
   1851       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1852       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1853       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
   1854     log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
   1855   }
   1856 
   1857   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
   1858   max = ucol_getMaxVariable(coll);
   1859   log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
   1860   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
   1861       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1862       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1863       ucol_equal(coll, &nul, 0, &degree, 1) ||
   1864       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1865       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1866       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
   1867     log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
   1868   }
   1869 
   1870   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
   1871   max = ucol_getMaxVariable(coll);
   1872   log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
   1873   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
   1874       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1875       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1876       !ucol_equal(coll, &nul, 0, &degree, 1) ||
   1877       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1878       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1879       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
   1880     log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
   1881   }
   1882 
   1883   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
   1884   max = ucol_getMaxVariable(coll);
   1885   log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
   1886   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
   1887       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1888       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1889       !ucol_equal(coll, &nul, 0, &degree, 1) ||
   1890       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1891       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1892       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
   1893     log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
   1894   }
   1895 
   1896   log_verbose("Test restoring maxVariable\n");
   1897   status = U_ZERO_ERROR;
   1898   ucol_setMaxVariable(coll, oldMax, &status);
   1899   if(oldMax != ucol_getMaxVariable(coll)) {
   1900     log_err("Couldn't restore old maxVariable\n");
   1901   }
   1902 
   1903   log_verbose("Testing calling with error set\n");
   1904   status = U_INTERNAL_PROGRAM_ERROR;
   1905   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
   1906   max = ucol_getMaxVariable(coll);
   1907   if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
   1908     log_err("Bad reaction to passed error!\n");
   1909   }
   1910   ucol_close(coll);
   1911 }
   1912 
   1913 static void TestNonChars(void) {
   1914   static const char *test[] = {
   1915       "\\u0000",  /* ignorable */
   1916       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
   1917       "\\uFDD0", "\\uFDEF",
   1918       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
   1919       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
   1920       "\\U0003FFFE", "\\U0003FFFF",
   1921       "\\U0004FFFE", "\\U0004FFFF",
   1922       "\\U0005FFFE", "\\U0005FFFF",
   1923       "\\U0006FFFE", "\\U0006FFFF",
   1924       "\\U0007FFFE", "\\U0007FFFF",
   1925       "\\U0008FFFE", "\\U0008FFFF",
   1926       "\\U0009FFFE", "\\U0009FFFF",
   1927       "\\U000AFFFE", "\\U000AFFFF",
   1928       "\\U000BFFFE", "\\U000BFFFF",
   1929       "\\U000CFFFE", "\\U000CFFFF",
   1930       "\\U000DFFFE", "\\U000DFFFF",
   1931       "\\U000EFFFE", "\\U000EFFFF",
   1932       "\\U000FFFFE", "\\U000FFFFF",
   1933       "\\U0010FFFE", "\\U0010FFFF",
   1934       "\\uFFFF"  /* special character with maximum primary weight */
   1935   };
   1936   UErrorCode status = U_ZERO_ERROR;
   1937   UCollator *coll = ucol_open("en_US", &status);
   1938 
   1939   log_verbose("Test non characters\n");
   1940 
   1941   if(U_SUCCESS(status)) {
   1942     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
   1943   } else {
   1944     log_err_status(status, "Unable to open collator\n");
   1945   }
   1946 
   1947   ucol_close(coll);
   1948 }
   1949 
   1950 static void TestExtremeCompression(void) {
   1951   static char *test[4];
   1952   int32_t j = 0, i = 0;
   1953 
   1954   for(i = 0; i<4; i++) {
   1955     test[i] = (char *)malloc(2048*sizeof(char));
   1956   }
   1957 
   1958   for(j = 20; j < 500; j++) {
   1959     for(i = 0; i<4; i++) {
   1960       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   1961       test[i][j-1] = (char)('a'+i);
   1962       test[i][j] = 0;
   1963     }
   1964     genericLocaleStarter("en_US", (const char **)test, 4);
   1965   }
   1966 
   1967 
   1968   for(i = 0; i<4; i++) {
   1969     free(test[i]);
   1970   }
   1971 }
   1972 
   1973 #if 0
   1974 static void TestExtremeCompression(void) {
   1975   static char *test[4];
   1976   int32_t j = 0, i = 0;
   1977   UErrorCode status = U_ZERO_ERROR;
   1978   UCollator *coll = ucol_open("en_US", status);
   1979   for(i = 0; i<4; i++) {
   1980     test[i] = (char *)malloc(2048*sizeof(char));
   1981   }
   1982   for(j = 10; j < 2048; j++) {
   1983     for(i = 0; i<4; i++) {
   1984       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
   1985       test[i][j-1] = (char)('a'+i);
   1986       test[i][j] = 0;
   1987     }
   1988   }
   1989   genericLocaleStarter("en_US", (const char **)test, 4);
   1990 
   1991   for(j = 10; j < 2048; j++) {
   1992     for(i = 0; i<1; i++) {
   1993       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   1994       test[i][j] = 0;
   1995     }
   1996   }
   1997   for(i = 0; i<4; i++) {
   1998     free(test[i]);
   1999   }
   2000 }
   2001 #endif
   2002 
   2003 static void TestSurrogates(void) {
   2004   static const char *test[] = {
   2005     "z","\\ud900\\udc25",  "\\ud805\\udc50",
   2006        "\\ud800\\udc00y",  "\\ud800\\udc00r",
   2007        "\\ud800\\udc00f",  "\\ud800\\udc00",
   2008        "\\ud800\\udc00c", "\\ud800\\udc00b",
   2009        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
   2010        "\\ud800\\udc00a",
   2011        "c", "b"
   2012   };
   2013 
   2014   static const char *rule =
   2015     "&z < \\ud900\\udc25   < \\ud805\\udc50"
   2016        "< \\ud800\\udc00y  < \\ud800\\udc00r"
   2017        "< \\ud800\\udc00f  << \\ud800\\udc00"
   2018        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
   2019        "< \\ud800\\udc00a  < c < b" ;
   2020 
   2021   genericRulesStarter(rule, test, 14);
   2022 }
   2023 
   2024 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
   2025 static void TestPrefix(void) {
   2026   uint32_t i;
   2027 
   2028   static const struct {
   2029     const char *rules;
   2030     const char *data[50];
   2031     const uint32_t len;
   2032   } tests[] = {
   2033     { "&z <<< z|a",
   2034       {"zz", "za"}, 2 },
   2035 
   2036     { "&z <<< z|   a",
   2037       {"zz", "za"}, 2 },
   2038     { "[strength I]"
   2039       "&a=\\ud900\\udc25"
   2040       "&z<<<\\ud900\\udc25|a",
   2041       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
   2042   };
   2043 
   2044 
   2045   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
   2046     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   2047   }
   2048 }
   2049 
   2050 /* This test uses data suplied by Masashiko Maedera to test the implementation */
   2051 /* JIS X 4061 collation order implementation                                   */
   2052 static void TestNewJapanese(void) {
   2053 
   2054   static const char * const test1[] = {
   2055       "\\u30b7\\u30e3\\u30fc\\u30ec",
   2056       "\\u30b7\\u30e3\\u30a4",
   2057       "\\u30b7\\u30e4\\u30a3",
   2058       "\\u30b7\\u30e3\\u30ec",
   2059       "\\u3061\\u3087\\u3053",
   2060       "\\u3061\\u3088\\u3053",
   2061       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
   2062       "\\u3066\\u30fc\\u305f",
   2063       "\\u30c6\\u30fc\\u30bf",
   2064       "\\u30c6\\u30a7\\u30bf",
   2065       "\\u3066\\u3048\\u305f",
   2066       "\\u3067\\u30fc\\u305f",
   2067       "\\u30c7\\u30fc\\u30bf",
   2068       "\\u30c7\\u30a7\\u30bf",
   2069       "\\u3067\\u3048\\u305f",
   2070       "\\u3066\\u30fc\\u305f\\u30fc",
   2071       "\\u30c6\\u30fc\\u30bf\\u30a1",
   2072       "\\u30c6\\u30a7\\u30bf\\u30fc",
   2073       "\\u3066\\u3047\\u305f\\u3041",
   2074       "\\u3066\\u3048\\u305f\\u30fc",
   2075       "\\u3067\\u30fc\\u305f\\u30fc",
   2076       "\\u30c7\\u30fc\\u30bf\\u30a1",
   2077       "\\u3067\\u30a7\\u305f\\u30a1",
   2078       "\\u30c7\\u3047\\u30bf\\u3041",
   2079       "\\u30c7\\u30a8\\u30bf\\u30a2",
   2080       "\\u3072\\u3086",
   2081       "\\u3073\\u3085\\u3042",
   2082       "\\u3074\\u3085\\u3042",
   2083       "\\u3073\\u3085\\u3042\\u30fc",
   2084       "\\u30d3\\u30e5\\u30a2\\u30fc",
   2085       "\\u3074\\u3085\\u3042\\u30fc",
   2086       "\\u30d4\\u30e5\\u30a2\\u30fc",
   2087       "\\u30d2\\u30e5\\u30a6",
   2088       "\\u30d2\\u30e6\\u30a6",
   2089       "\\u30d4\\u30e5\\u30a6\\u30a2",
   2090       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
   2091       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
   2092       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
   2093       "\\u3072\\u3085\\u3093",
   2094       "\\u3074\\u3085\\u3093",
   2095       "\\u3075\\u30fc\\u308a",
   2096       "\\u30d5\\u30fc\\u30ea",
   2097       "\\u3075\\u3045\\u308a",
   2098       "\\u3075\\u30a5\\u308a",
   2099       "\\u3075\\u30a5\\u30ea",
   2100       "\\u30d5\\u30a6\\u30ea",
   2101       "\\u3076\\u30fc\\u308a",
   2102       "\\u30d6\\u30fc\\u30ea",
   2103       "\\u3076\\u3045\\u308a",
   2104       "\\u30d6\\u30a5\\u308a",
   2105       "\\u3077\\u3046\\u308a",
   2106       "\\u30d7\\u30a6\\u30ea",
   2107       "\\u3075\\u30fc\\u308a\\u30fc",
   2108       "\\u30d5\\u30a5\\u30ea\\u30fc",
   2109       "\\u3075\\u30a5\\u308a\\u30a3",
   2110       "\\u30d5\\u3045\\u308a\\u3043",
   2111       "\\u30d5\\u30a6\\u30ea\\u30fc",
   2112       "\\u3075\\u3046\\u308a\\u3043",
   2113       "\\u30d6\\u30a6\\u30ea\\u30a4",
   2114       "\\u3077\\u30fc\\u308a\\u30fc",
   2115       "\\u3077\\u30a5\\u308a\\u30a4",
   2116       "\\u3077\\u3046\\u308a\\u30fc",
   2117       "\\u30d7\\u30a6\\u30ea\\u30a4",
   2118       "\\u30d5\\u30fd",
   2119       "\\u3075\\u309e",
   2120       "\\u3076\\u309d",
   2121       "\\u3076\\u3075",
   2122       "\\u3076\\u30d5",
   2123       "\\u30d6\\u3075",
   2124       "\\u30d6\\u30d5",
   2125       "\\u3076\\u309e",
   2126       "\\u3076\\u3077",
   2127       "\\u30d6\\u3077",
   2128       "\\u3077\\u309d",
   2129       "\\u30d7\\u30fd",
   2130       "\\u3077\\u3075",
   2131 };
   2132 
   2133   static const char *test2[] = {
   2134     "\\u306f\\u309d", /* H\\u309d */
   2135     "\\u30cf\\u30fd", /* K\\u30fd */
   2136     "\\u306f\\u306f", /* HH */
   2137     "\\u306f\\u30cf", /* HK */
   2138     "\\u30cf\\u30cf", /* KK */
   2139     "\\u306f\\u309e", /* H\\u309e */
   2140     "\\u30cf\\u30fe", /* K\\u30fe */
   2141     "\\u306f\\u3070", /* HH\\u309b */
   2142     "\\u30cf\\u30d0", /* KK\\u309b */
   2143     "\\u306f\\u3071", /* HH\\u309c */
   2144     "\\u30cf\\u3071", /* KH\\u309c */
   2145     "\\u30cf\\u30d1", /* KK\\u309c */
   2146     "\\u3070\\u309d", /* H\\u309b\\u309d */
   2147     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
   2148     "\\u3070\\u306f", /* H\\u309bH */
   2149     "\\u30d0\\u30cf", /* K\\u309bK */
   2150     "\\u3070\\u309e", /* H\\u309b\\u309e */
   2151     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
   2152     "\\u3070\\u3070", /* H\\u309bH\\u309b */
   2153     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
   2154     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
   2155     "\\u3070\\u3071", /* H\\u309bH\\u309c */
   2156     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
   2157     "\\u3071\\u309d", /* H\\u309c\\u309d */
   2158     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
   2159     "\\u3071\\u306f", /* H\\u309cH */
   2160     "\\u30d1\\u30cf", /* K\\u309cK */
   2161     "\\u3071\\u3070", /* H\\u309cH\\u309b */
   2162     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
   2163     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
   2164     "\\u3071\\u3071", /* H\\u309cH\\u309c */
   2165     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
   2166   };
   2167   /*
   2168   static const char *test3[] = {
   2169     "\\u221er\\u221e",
   2170     "\\u221eR#",
   2171     "\\u221et\\u221e",
   2172     "#r\\u221e",
   2173     "#R#",
   2174     "#t%",
   2175     "#T%",
   2176     "8t\\u221e",
   2177     "8T\\u221e",
   2178     "8t#",
   2179     "8T#",
   2180     "8t%",
   2181     "8T%",
   2182     "8t8",
   2183     "8T8",
   2184     "\\u03c9r\\u221e",
   2185     "\\u03a9R%",
   2186     "rr\\u221e",
   2187     "rR\\u221e",
   2188     "Rr\\u221e",
   2189     "RR\\u221e",
   2190     "RT%",
   2191     "rt8",
   2192     "tr\\u221e",
   2193     "tr8",
   2194     "TR8",
   2195     "tt8",
   2196     "\\u30b7\\u30e3\\u30fc\\u30ec",
   2197   };
   2198   */
   2199   static const UColAttribute att[] = { UCOL_STRENGTH };
   2200   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
   2201 
   2202   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
   2203   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
   2204 
   2205   genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), att, val, 1);
   2206   genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), att, val, 1);
   2207   /*genericLocaleStarter("ja", test3, UPRV_LENGTHOF(test3));*/
   2208   genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), attShifted, valShifted, 2);
   2209   genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), attShifted, valShifted, 2);
   2210 }
   2211 
   2212 static void TestStrCollIdenticalPrefix(void) {
   2213   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
   2214   const char* test[] = {
   2215     "ab\\ud9b0\\udc70",
   2216     "ab\\ud9b0\\udc71"
   2217   };
   2218   genericRulesStarterWithResult(rule, test, UPRV_LENGTHOF(test), UCOL_EQUAL);
   2219 }
   2220 /* Contractions should have all their canonically equivalent */
   2221 /* strings included */
   2222 static void TestContractionClosure(void) {
   2223   static const struct {
   2224     const char *rules;
   2225     const char *data[10];
   2226     const uint32_t len;
   2227   } tests[] = {
   2228     {   "&b=\\u00e4\\u00e4",
   2229       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
   2230     {   "&b=\\u00C5",
   2231       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
   2232   };
   2233   uint32_t i;
   2234 
   2235 
   2236   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
   2237     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
   2238   }
   2239 }
   2240 
   2241 /* This tests also fails*/
   2242 static void TestBeforePrefixFailure(void) {
   2243   static const struct {
   2244     const char *rules;
   2245     const char *data[10];
   2246     const uint32_t len;
   2247   } tests[] = {
   2248     { "&g <<< a"
   2249       "&[before 3]\\uff41 <<< x",
   2250       {"x", "\\uff41"}, 2 },
   2251     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   2252         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   2253         "&[before 3]\\u30a7<<<\\u30a9",
   2254       {"\\u30a9", "\\u30a7"}, 2 },
   2255     {   "&[before 3]\\u30a7<<<\\u30a9"
   2256         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   2257         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
   2258       {"\\u30a9", "\\u30a7"}, 2 },
   2259   };
   2260   uint32_t i;
   2261 
   2262 
   2263   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
   2264     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   2265   }
   2266 
   2267 #if 0
   2268   const char* rule1 =
   2269         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   2270         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   2271         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
   2272   const char* rule2 =
   2273         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
   2274         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   2275         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
   2276   const char* test[] = {
   2277       "\\u30c6\\u30fc\\u30bf",
   2278       "\\u30c6\\u30a7\\u30bf",
   2279   };
   2280   genericRulesStarter(rule1, test, UPRV_LENGTHOF(test));
   2281   genericRulesStarter(rule2, test, UPRV_LENGTHOF(test));
   2282 /* this piece of code should be in some sort of verbose mode     */
   2283 /* it gets the collation elements for elements and prints them   */
   2284 /* This is useful when trying to see whether the problem is      */
   2285   {
   2286     UErrorCode status = U_ZERO_ERROR;
   2287     uint32_t i = 0;
   2288     UCollationElements *it = NULL;
   2289     uint32_t CE;
   2290     UChar string[256];
   2291     uint32_t uStringLen;
   2292     UCollator *coll = NULL;
   2293 
   2294     uStringLen = u_unescape(rule1, string, 256);
   2295 
   2296     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2297 
   2298     /*coll = ucol_open("ja_JP_JIS", &status);*/
   2299     it = ucol_openElements(coll, string, 0, &status);
   2300 
   2301     for(i = 0; i < UPRV_LENGTHOF(test); i++) {
   2302       log_verbose("%s\n", test[i]);
   2303       uStringLen = u_unescape(test[i], string, 256);
   2304       ucol_setText(it, string, uStringLen, &status);
   2305 
   2306       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
   2307         log_verbose("%08X\n", CE);
   2308       }
   2309       log_verbose("\n");
   2310 
   2311     }
   2312 
   2313     ucol_closeElements(it);
   2314     ucol_close(coll);
   2315   }
   2316 #endif
   2317 }
   2318 
   2319 static void TestPrefixCompose(void) {
   2320   const char* rule1 =
   2321         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
   2322   /*
   2323   const char* test[] = {
   2324       "\\u30c6\\u30fc\\u30bf",
   2325       "\\u30c6\\u30a7\\u30bf",
   2326   };
   2327   */
   2328   {
   2329     UErrorCode status = U_ZERO_ERROR;
   2330     /*uint32_t i = 0;*/
   2331     /*UCollationElements *it = NULL;*/
   2332 /*    uint32_t CE;*/
   2333     UChar string[256];
   2334     uint32_t uStringLen;
   2335     UCollator *coll = NULL;
   2336 
   2337     uStringLen = u_unescape(rule1, string, 256);
   2338 
   2339     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2340     ucol_close(coll);
   2341   }
   2342 
   2343 
   2344 }
   2345 
   2346 /*
   2347 [last variable] last variable value
   2348 [last primary ignorable] largest CE for primary ignorable
   2349 [last secondary ignorable] largest CE for secondary ignorable
   2350 [last tertiary ignorable] largest CE for tertiary ignorable
   2351 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
   2352 */
   2353 
   2354 static void TestRuleOptions(void) {
   2355   /* values here are hardcoded and are correct for the current UCA
   2356    * when the UCA changes, one might be forced to change these
   2357    * values.
   2358    */
   2359 
   2360   /*
   2361    * These strings contain the last character before [variable top]
   2362    * and the first and second characters (by primary weights) after it.
   2363    * See FractionalUCA.txt. For example:
   2364       [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
   2365       [variable top = 0C FE]
   2366       [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
   2367      and
   2368       00B4; [0D 0C, 05, 05]
   2369    *
   2370    * Note: Starting with UCA 6.0, the [variable top] collation element
   2371    * is not the weight of any character or string,
   2372    * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
   2373    */
   2374 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
   2375 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
   2376 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
   2377 
   2378   /*
   2379    * This string has to match the character that has the [last regular] weight
   2380    * which changes with each UCA version.
   2381    * See the bottom of FractionalUCA.txt which says something like
   2382       [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
   2383    *
   2384    * Note: Starting with UCA 6.0, the [last regular] collation element
   2385    * is not the weight of any character or string,
   2386    * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
   2387    */
   2388 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
   2389 
   2390   static const struct {
   2391     const char *rules;
   2392     const char *data[10];
   2393     const uint32_t len;
   2394   } tests[] = {
   2395 #if 0
   2396     /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
   2397     /* - all befores here amount to zero */
   2398     { "&[before 3][first tertiary ignorable]<<<a",
   2399         { "\\u0000", "a"}, 2
   2400     }, /* you cannot go before first tertiary ignorable */
   2401 
   2402     { "&[before 3][last tertiary ignorable]<<<a",
   2403         { "\\u0000", "a"}, 2
   2404     }, /* you cannot go before last tertiary ignorable */
   2405 #endif
   2406     /*
   2407      * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
   2408      * and it *is* possible to "go before" that.
   2409      */
   2410     { "&[before 3][first secondary ignorable]<<<a",
   2411         { "\\u0000", "a"}, 2
   2412     },
   2413 
   2414     { "&[before 3][last secondary ignorable]<<<a",
   2415         { "\\u0000", "a"}, 2
   2416     },
   2417 
   2418     /* 'normal' befores */
   2419 
   2420     /*
   2421      * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
   2422      * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
   2423      * because there is no tailoring space before that boundary.
   2424      * Made the tests work by tailoring to a space instead.
   2425      */
   2426     { "&[before 3][first primary ignorable]<<<c<<<b &' '<a",  /* was &[first primary ignorable]<a */
   2427         {  "c", "b", "\\u0332", "a" }, 4
   2428     },
   2429 
   2430     /* we don't have a code point that corresponds to
   2431      * the last primary ignorable
   2432      */
   2433     { "&[before 3][last primary ignorable]<<<c<<<b &' '<a",  /* was &[last primary ignorable]<a */
   2434         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
   2435     },
   2436 
   2437     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
   2438         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
   2439     },
   2440 
   2441     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
   2442         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
   2443     },
   2444 
   2445     { "&[first regular]<a"
   2446       "&[before 1][first regular]<b",
   2447       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
   2448     },
   2449 
   2450     { "&[before 1][last regular]<b"
   2451       "&[last regular]<a",
   2452         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
   2453     },
   2454 
   2455     { "&[before 1][first implicit]<b"
   2456       "&[first implicit]<a",
   2457         { "b", "\\u4e00", "a", "\\u4e01"}, 4
   2458     },
   2459 #if 0  /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
   2460     { "&[before 1][last implicit]<b"
   2461       "&[last implicit]<a",
   2462         { "b", "\\U0010FFFD", "a" }, 3
   2463     },
   2464 #endif
   2465     { "&[last variable]<z"
   2466       "&' '<x"  /* was &[last primary ignorable]<x, see above */
   2467       "&[last secondary ignorable]<<y"
   2468       "&[last tertiary ignorable]<<<w"
   2469       "&[top]<u",
   2470       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
   2471     }
   2472 
   2473   };
   2474   uint32_t i;
   2475 
   2476   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
   2477     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   2478   }
   2479 }
   2480 
   2481 
   2482 static void TestOptimize(void) {
   2483   /* this is not really a test - just trying out
   2484    * whether copying of UCA contents will fail
   2485    * Cannot really test, since the functionality
   2486    * remains the same.
   2487    */
   2488   static const struct {
   2489     const char *rules;
   2490     const char *data[10];
   2491     const uint32_t len;
   2492   } tests[] = {
   2493     /* - all befores here amount to zero */
   2494     { "[optimize [\\uAC00-\\uD7FF]]",
   2495     { "a", "b"}, 2}
   2496   };
   2497   uint32_t i;
   2498 
   2499   for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
   2500     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   2501   }
   2502 }
   2503 
   2504 /*
   2505 cycheng (at) ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
   2506 weiv    ucol_strcollIter?
   2507 cycheng (at) ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
   2508 weiv    these are the input strings?
   2509 cycheng (at) ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
   2510 weiv    will check - could be a problem with utf-8 iterator
   2511 cycheng (at) ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
   2512 weiv    hmmm
   2513 cycheng (at) ca.ibm.c... note that we have a standalone high surrogate
   2514 weiv    that doesn't sound right
   2515 cycheng (at) ca.ibm.c... we got the same inconsistent results on AIX and Win2000
   2516 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
   2517 cycheng (at) ca.ibm.c... yes
   2518 weiv    and then do the comparison
   2519 cycheng (at) ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
   2520 weiv    utf-16 strings look like a little endian ones in the example you sent me
   2521 weiv    It could be a bug - let me try to test it out
   2522 cycheng (at) ca.ibm.c... ok
   2523 cycheng (at) ca.ibm.c... we can wait till the conf. call
   2524 cycheng (at) ca.ibm.c... next weke
   2525 weiv    that would be great
   2526 weiv    hmmm
   2527 weiv    I might be wrong
   2528 weiv    let me play with it some more
   2529 cycheng (at) ca.ibm.c... ok
   2530 cycheng (at) ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
   2531 cycheng (at) ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
   2532 cycheng (at) ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
   2533 weiv    ok
   2534 cycheng (at) ca.ibm.c... i ask sherman to send you more inconsistent data
   2535 weiv    thanks
   2536 cycheng (at) ca.ibm.c... the 4 strings we sent are just samples
   2537 */
   2538 #if 0
   2539 static void Alexis(void) {
   2540   UErrorCode status = U_ZERO_ERROR;
   2541   UCollator *coll = ucol_open("", &status);
   2542 
   2543 
   2544   const char utf16be[2][4] = {
   2545     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
   2546     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
   2547   };
   2548 
   2549   const char utf8[2][4] = {
   2550     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
   2551     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
   2552   };
   2553 
   2554   UCharIterator iterU161, iterU162;
   2555   UCharIterator iterU81, iterU82;
   2556 
   2557   UCollationResult resU16, resU8;
   2558 
   2559   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
   2560   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
   2561 
   2562   uiter_setUTF8(&iterU81, utf8[0], 4);
   2563   uiter_setUTF8(&iterU82, utf8[1], 4);
   2564 
   2565   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   2566 
   2567   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
   2568   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
   2569 
   2570 
   2571   if(resU16 != resU8) {
   2572     log_err("different results\n");
   2573   }
   2574 
   2575   ucol_close(coll);
   2576 }
   2577 #endif
   2578 
   2579 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
   2580 static void Alexis2(void) {
   2581   UErrorCode status = U_ZERO_ERROR;
   2582   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   2583   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   2584   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   2585   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
   2586 
   2587   UConverter *conv = NULL;
   2588 
   2589   UCharIterator U16BEItS, U16BEItT;
   2590   UCharIterator U8ItS, U8ItT;
   2591 
   2592   UCollationResult resU16, resU16BE, resU8;
   2593 
   2594   static const char* const pairs[][2] = {
   2595     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
   2596     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
   2597     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
   2598     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
   2599     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
   2600     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
   2601     { "\\u0020", "\\u0020\\u0000"}
   2602 /*
   2603 5F20 (my result here)
   2604 5F204E008E3F
   2605 5F20 (your result here)
   2606 */
   2607   };
   2608 
   2609   int32_t i = 0;
   2610 
   2611   UCollator *coll = ucol_open("", &status);
   2612   if(status == U_FILE_ACCESS_ERROR) {
   2613     log_data_err("Is your data around?\n");
   2614     return;
   2615   } else if(U_FAILURE(status)) {
   2616     log_err("Error opening collator\n");
   2617     return;
   2618   }
   2619   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   2620   conv = ucnv_open("UTF16BE", &status);
   2621   for(i = 0; i < UPRV_LENGTHOF(pairs); i++) {
   2622     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   2623     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   2624 
   2625     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
   2626 
   2627     log_verbose("Result of strcoll is %i\n", resU16);
   2628 
   2629     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
   2630     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
   2631     (void)U16BELenS;    /* Suppress set but not used warnings. */
   2632     (void)U16BELenT;
   2633 
   2634     /* use the original sizes, as the result from converter is in bytes */
   2635     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
   2636     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
   2637 
   2638     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
   2639 
   2640     log_verbose("Result of U16BE is %i\n", resU16BE);
   2641 
   2642     if(resU16 != resU16BE) {
   2643       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
   2644     }
   2645 
   2646     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
   2647     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
   2648 
   2649     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
   2650     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
   2651 
   2652     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
   2653 
   2654     if(resU16 != resU8) {
   2655       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
   2656     }
   2657 
   2658   }
   2659 
   2660   ucol_close(coll);
   2661   ucnv_close(conv);
   2662 }
   2663 
   2664 static void TestHebrewUCA(void) {
   2665   UErrorCode status = U_ZERO_ERROR;
   2666   static const char *first[] = {
   2667     "d790d6b8d79cd795d6bcd7a9",
   2668     "d790d79cd79ed7a7d799d799d7a1",
   2669     "d790d6b4d79ed795d6bcd7a9",
   2670   };
   2671 
   2672   char utf8String[3][256];
   2673   UChar utf16String[3][256];
   2674 
   2675   int32_t i = 0, j = 0;
   2676   int32_t sizeUTF8[3];
   2677   int32_t sizeUTF16[3];
   2678 
   2679   UCollator *coll = ucol_open("", &status);
   2680   if (U_FAILURE(status)) {
   2681       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
   2682       return;
   2683   }
   2684   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
   2685 
   2686   for(i = 0; i < UPRV_LENGTHOF(first); i++) {
   2687     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
   2688     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
   2689     log_verbose("%i: ");
   2690     for(j = 0; j < sizeUTF16[i]; j++) {
   2691       /*log_verbose("\\u%04X", utf16String[i][j]);*/
   2692       log_verbose("%04X", utf16String[i][j]);
   2693     }
   2694     log_verbose("\n");
   2695   }
   2696   for(i = 0; i < UPRV_LENGTHOF(first)-1; i++) {
   2697     for(j = i + 1; j < UPRV_LENGTHOF(first); j++) {
   2698       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
   2699     }
   2700   }
   2701 
   2702   ucol_close(coll);
   2703 
   2704 }
   2705 
   2706 static void TestPartialSortKeyTermination(void) {
   2707   static const char* cases[] = {
   2708     "\\u1234\\u1234\\udc00",
   2709     "\\udc00\\ud800\\ud800"
   2710   };
   2711 
   2712   int32_t i;
   2713 
   2714   UErrorCode status = U_ZERO_ERROR;
   2715 
   2716   UCollator *coll = ucol_open("", &status);
   2717 
   2718   UCharIterator iter;
   2719 
   2720   UChar currCase[256];
   2721   int32_t length = 0;
   2722   int32_t pKeyLen = 0;
   2723 
   2724   uint8_t key[256];
   2725 
   2726   for(i = 0; i < UPRV_LENGTHOF(cases); i++) {
   2727     uint32_t state[2] = {0, 0};
   2728     length = u_unescape(cases[i], currCase, 256);
   2729     uiter_setString(&iter, currCase, length);
   2730     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
   2731     (void)pKeyLen;   /* Suppress set but not used warning. */
   2732 
   2733     log_verbose("Done\n");
   2734 
   2735   }
   2736   ucol_close(coll);
   2737 }
   2738 
   2739 static void TestSettings(void) {
   2740   static const char* cases[] = {
   2741     "apple",
   2742       "Apple"
   2743   };
   2744 
   2745   static const char* locales[] = {
   2746     "",
   2747       "en"
   2748   };
   2749 
   2750   UErrorCode status = U_ZERO_ERROR;
   2751 
   2752   int32_t i = 0, j = 0;
   2753 
   2754   UChar source[256], target[256];
   2755   int32_t sLen = 0, tLen = 0;
   2756 
   2757   UCollator *collateObject = NULL;
   2758   for(i = 0; i < UPRV_LENGTHOF(locales); i++) {
   2759     collateObject = ucol_open(locales[i], &status);
   2760     ucol_setStrength(collateObject, UCOL_PRIMARY);
   2761     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
   2762     for(j = 1; j < UPRV_LENGTHOF(cases); j++) {
   2763       sLen = u_unescape(cases[j-1], source, 256);
   2764       source[sLen] = 0;
   2765       tLen = u_unescape(cases[j], target, 256);
   2766       source[tLen] = 0;
   2767       doTest(collateObject, source, target, UCOL_EQUAL);
   2768     }
   2769     ucol_close(collateObject);
   2770   }
   2771 }
   2772 
   2773 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
   2774     UErrorCode status = U_ZERO_ERROR;
   2775     int32_t errorNo = 0;
   2776     const UChar *sourceRules = NULL;
   2777     int32_t sourceRulesLen = 0;
   2778     UParseError parseError;
   2779     UColAttributeValue french = UCOL_OFF;
   2780 
   2781     if(!ucol_equals(source, target)) {
   2782         log_err("Same collators, different address not equal\n");
   2783         errorNo++;
   2784     }
   2785     ucol_close(target);
   2786     if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
   2787         target = ucol_safeClone(source, NULL, NULL, &status);
   2788         if(U_FAILURE(status)) {
   2789             log_err("Error creating clone\n");
   2790             errorNo++;
   2791             return errorNo;
   2792         }
   2793         if(!ucol_equals(source, target)) {
   2794             log_err("Collator different from it's clone\n");
   2795             errorNo++;
   2796         }
   2797         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
   2798         if(french == UCOL_ON) {
   2799             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
   2800         } else {
   2801             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
   2802         }
   2803         if(U_FAILURE(status)) {
   2804             log_err("Error setting attributes\n");
   2805             errorNo++;
   2806             return errorNo;
   2807         }
   2808         if(ucol_equals(source, target)) {
   2809             log_err("Collators same even when options changed\n");
   2810             errorNo++;
   2811         }
   2812         ucol_close(target);
   2813 
   2814         sourceRules = ucol_getRules(source, &sourceRulesLen);
   2815         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   2816         if(U_FAILURE(status)) {
   2817             log_err("Error instantiating target from rules - %s\n", u_errorName(status));
   2818             errorNo++;
   2819             return errorNo;
   2820         }
   2821         /* Note: The tailoring rule string is an optional data item. */
   2822         if(!ucol_equals(source, target) && sourceRulesLen != 0) {
   2823             log_err("Collator different from collator that was created from the same rules\n");
   2824             errorNo++;
   2825         }
   2826         ucol_close(target);
   2827     }
   2828     return errorNo;
   2829 }
   2830 
   2831 
   2832 static void TestEquals(void) {
   2833     /* ucol_equals is not currently a public API. There is a chance that it will become
   2834     * something like this.
   2835     */
   2836     /* test whether the two collators instantiated from the same locale are equal */
   2837     UErrorCode status = U_ZERO_ERROR;
   2838     UParseError parseError;
   2839     int32_t noOfLoc = uloc_countAvailable();
   2840     const char *locName = NULL;
   2841     UCollator *source = NULL, *target = NULL;
   2842     int32_t i = 0;
   2843 
   2844     const char* rules[] = {
   2845         "&l < lj <<< Lj <<< LJ",
   2846         "&n < nj <<< Nj <<< NJ",
   2847         "&ae <<< \\u00e4",
   2848         "&AE <<< \\u00c4"
   2849     };
   2850     /*
   2851     const char* badRules[] = {
   2852     "&l <<< Lj",
   2853     "&n < nj <<< nJ <<< NJ",
   2854     "&a <<< \\u00e4",
   2855     "&AE <<< \\u00c4 <<< x"
   2856     };
   2857     */
   2858 
   2859     UChar sourceRules[1024], targetRules[1024];
   2860     int32_t sourceRulesSize = 0, targetRulesSize = 0;
   2861     int32_t rulesSize = UPRV_LENGTHOF(rules);
   2862 
   2863     for(i = 0; i < rulesSize; i++) {
   2864         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
   2865         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
   2866     }
   2867 
   2868     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   2869     if(status == U_FILE_ACCESS_ERROR) {
   2870         log_data_err("Is your data around?\n");
   2871         return;
   2872     } else if(U_FAILURE(status)) {
   2873         log_err("Error opening collator\n");
   2874         return;
   2875     }
   2876     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   2877     if(!ucol_equals(source, target)) {
   2878         log_err("Equivalent collators not equal!\n");
   2879     }
   2880     ucol_close(source);
   2881     ucol_close(target);
   2882 
   2883     source = ucol_open("root", &status);
   2884     target = ucol_open("root", &status);
   2885     log_verbose("Testing root\n");
   2886     if(!ucol_equals(source, source)) {
   2887         log_err("Same collator not equal\n");
   2888     }
   2889     if(TestEqualsForCollator("root", source, target)) {
   2890         log_err("Errors for root\n");
   2891     }
   2892     ucol_close(source);
   2893 
   2894     for(i = 0; i<noOfLoc; i++) {
   2895         status = U_ZERO_ERROR;
   2896         locName = uloc_getAvailable(i);
   2897         /*if(hasCollationElements(locName)) {*/
   2898         log_verbose("Testing equality for locale %s\n", locName);
   2899         source = ucol_open(locName, &status);
   2900         target = ucol_open(locName, &status);
   2901         if (U_FAILURE(status)) {
   2902             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
   2903             continue;
   2904         }
   2905         if(TestEqualsForCollator(locName, source, target)) {
   2906             log_err("Errors for locale %s\n", locName);
   2907         }
   2908         ucol_close(source);
   2909         /*}*/
   2910     }
   2911 }
   2912 
   2913 static void TestJ2726(void) {
   2914     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
   2915     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
   2916     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
   2917     UErrorCode status = U_ZERO_ERROR;
   2918     UCollator *coll = ucol_open("en", &status);
   2919     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   2920     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   2921     doTest(coll, a, aSpace, UCOL_EQUAL);
   2922     doTest(coll, aSpace, a, UCOL_EQUAL);
   2923     doTest(coll, a, spaceA, UCOL_EQUAL);
   2924     doTest(coll, spaceA, a, UCOL_EQUAL);
   2925     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
   2926     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
   2927     ucol_close(coll);
   2928 }
   2929 
   2930 static void NullRule(void) {
   2931     UChar r[3] = {0};
   2932     UErrorCode status = U_ZERO_ERROR;
   2933     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2934     if(U_SUCCESS(status)) {
   2935         log_err("This should have been an error!\n");
   2936         ucol_close(coll);
   2937     } else {
   2938         status = U_ZERO_ERROR;
   2939     }
   2940     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2941     if(U_FAILURE(status)) {
   2942         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
   2943     } else {
   2944         ucol_close(coll);
   2945     }
   2946 }
   2947 
   2948 /**
   2949  * Test for CollationElementIterator previous and next for the whole set of
   2950  * unicode characters with normalization on.
   2951  */
   2952 static void TestNumericCollation(void)
   2953 {
   2954     UErrorCode status = U_ZERO_ERROR;
   2955 
   2956     const static char *basicTestStrings[]={
   2957     "hello1",
   2958     "hello2",
   2959     "hello2002",
   2960     "hello2003",
   2961     "hello123456",
   2962     "hello1234567",
   2963     "hello10000000",
   2964     "hello100000000",
   2965     "hello1000000000",
   2966     "hello10000000000",
   2967     };
   2968 
   2969     const static char *preZeroTestStrings[]={
   2970     "avery10000",
   2971     "avery010000",
   2972     "avery0010000",
   2973     "avery00010000",
   2974     "avery000010000",
   2975     "avery0000010000",
   2976     "avery00000010000",
   2977     "avery000000010000",
   2978     };
   2979 
   2980     const static char *thirtyTwoBitNumericStrings[]={
   2981     "avery42949672960",
   2982     "avery42949672961",
   2983     "avery42949672962",
   2984     "avery429496729610"
   2985     };
   2986 
   2987      const static char *longNumericStrings[]={
   2988      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
   2989         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
   2990         are treated as multiple collation elements. */
   2991     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
   2992     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
   2993     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
   2994     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
   2995     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
   2996     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
   2997     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
   2998     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
   2999     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
   3000     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
   3001     };
   3002 
   3003     const static char *supplementaryDigits[] = {
   3004       "\\uD835\\uDFCE", /* 0 */
   3005       "\\uD835\\uDFCF", /* 1 */
   3006       "\\uD835\\uDFD0", /* 2 */
   3007       "\\uD835\\uDFD1", /* 3 */
   3008       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
   3009       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
   3010       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
   3011       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
   3012       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
   3013       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
   3014     };
   3015 
   3016     const static char *foreignDigits[] = {
   3017       "\\u0661",
   3018         "\\u0662",
   3019         "\\u0663",
   3020       "\\u0661\\u0660",
   3021       "\\u0661\\u0662",
   3022       "\\u0661\\u0663",
   3023       "\\u0662\\u0660",
   3024       "\\u0662\\u0662",
   3025       "\\u0662\\u0663",
   3026       "\\u0663\\u0660",
   3027       "\\u0663\\u0662",
   3028       "\\u0663\\u0663"
   3029     };
   3030 
   3031     const static char *evenZeroes[] = {
   3032       "2000",
   3033       "2001",
   3034         "2002",
   3035         "2003"
   3036     };
   3037 
   3038     UColAttribute att = UCOL_NUMERIC_COLLATION;
   3039     UColAttributeValue val = UCOL_ON;
   3040 
   3041     /* Open our collator. */
   3042     UCollator* coll = ucol_open("root", &status);
   3043     if (U_FAILURE(status)){
   3044         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
   3045               myErrorName(status));
   3046         return;
   3047     }
   3048     genericLocaleStarterWithOptions("root", basicTestStrings, UPRV_LENGTHOF(basicTestStrings), &att, &val, 1);
   3049     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, UPRV_LENGTHOF(thirtyTwoBitNumericStrings), &att, &val, 1);
   3050     genericLocaleStarterWithOptions("root", longNumericStrings, UPRV_LENGTHOF(longNumericStrings), &att, &val, 1);
   3051     genericLocaleStarterWithOptions("en_US", foreignDigits, UPRV_LENGTHOF(foreignDigits), &att, &val, 1);
   3052     genericLocaleStarterWithOptions("root", supplementaryDigits, UPRV_LENGTHOF(supplementaryDigits), &att, &val, 1);
   3053     genericLocaleStarterWithOptions("root", evenZeroes, UPRV_LENGTHOF(evenZeroes), &att, &val, 1);
   3054 
   3055     /* Setting up our collator to do digits. */
   3056     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   3057     if (U_FAILURE(status)){
   3058         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
   3059               myErrorName(status));
   3060         return;
   3061     }
   3062 
   3063     /*
   3064        Testing that prepended zeroes still yield the correct collation behavior.
   3065        We expect that every element in our strings array will be equal.
   3066     */
   3067     genericOrderingTestWithResult(coll, preZeroTestStrings, UPRV_LENGTHOF(preZeroTestStrings), UCOL_EQUAL);
   3068 
   3069     ucol_close(coll);
   3070 }
   3071 
   3072 static void TestTibetanConformance(void)
   3073 {
   3074     const char* test[] = {
   3075         "\\u0FB2\\u0591\\u0F71\\u0061",
   3076         "\\u0FB2\\u0F71\\u0061"
   3077     };
   3078 
   3079     UErrorCode status = U_ZERO_ERROR;
   3080     UCollator *coll = ucol_open("", &status);
   3081     UChar source[100];
   3082     UChar target[100];
   3083     int result;
   3084     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3085     if (U_SUCCESS(status)) {
   3086         u_unescape(test[0], source, 100);
   3087         u_unescape(test[1], target, 100);
   3088         doTest(coll, source, target, UCOL_EQUAL);
   3089         result = ucol_strcoll(coll, source, -1,   target, -1);
   3090         log_verbose("result %d\n", result);
   3091         if (UCOL_EQUAL != result) {
   3092             log_err("Tibetan comparison error\n");
   3093         }
   3094     }
   3095     ucol_close(coll);
   3096 
   3097     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
   3098 }
   3099 
   3100 static void TestPinyinProblem(void) {
   3101     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
   3102     genericLocaleStarter("zh__PINYIN", test, UPRV_LENGTHOF(test));
   3103 }
   3104 
   3105 /**
   3106  * Iterate through the given iterator, checking to see that all the strings
   3107  * in the expected array are present.
   3108  * @param expected array of strings we expect to see, or NULL
   3109  * @param expectedCount number of elements of expected, or 0
   3110  */
   3111 static int32_t checkUEnumeration(const char* msg,
   3112                                  UEnumeration* iter,
   3113                                  const char** expected,
   3114                                  int32_t expectedCount) {
   3115     UErrorCode ec = U_ZERO_ERROR;
   3116     int32_t i = 0, n, j, bit;
   3117     int32_t seenMask = 0;
   3118 
   3119     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
   3120     n = uenum_count(iter, &ec);
   3121     if (!assertSuccess("count", &ec)) return -1;
   3122     log_verbose("%s = [", msg);
   3123     for (;; ++i) {
   3124         const char* s = uenum_next(iter, NULL, &ec);
   3125         if (!assertSuccess("snext", &ec) || s == NULL) break;
   3126         if (i != 0) log_verbose(",");
   3127         log_verbose("%s", s);
   3128         /* check expected list */
   3129         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   3130             if ((seenMask&bit) == 0 &&
   3131                 uprv_strcmp(s, expected[j]) == 0) {
   3132                 seenMask |= bit;
   3133                 break;
   3134             }
   3135         }
   3136     }
   3137     log_verbose("] (%d)\n", i);
   3138     assertTrue("count verified", i==n);
   3139     /* did we see all expected strings? */
   3140     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   3141         if ((seenMask&bit)!=0) {
   3142             log_verbose("Ok: \"%s\" seen\n", expected[j]);
   3143         } else {
   3144             log_err("FAIL: \"%s\" not seen\n", expected[j]);
   3145         }
   3146     }
   3147     return n;
   3148 }
   3149 
   3150 /**
   3151  * Test new API added for separate collation tree.
   3152  */
   3153 static void TestSeparateTrees(void) {
   3154     UErrorCode ec = U_ZERO_ERROR;
   3155     UEnumeration *e = NULL;
   3156     int32_t n = -1;
   3157     UBool isAvailable;
   3158     char loc[256];
   3159 
   3160     static const char* AVAIL[] = { "en", "de" };
   3161 
   3162     static const char* KW[] = { "collation" };
   3163 
   3164     static const char* KWVAL[] = { "phonebook", "stroke" };
   3165 
   3166 #if !UCONFIG_NO_SERVICE
   3167     e = ucol_openAvailableLocales(&ec);
   3168     if (e != NULL) {
   3169         assertSuccess("ucol_openAvailableLocales", &ec);
   3170         assertTrue("ucol_openAvailableLocales!=0", e!=0);
   3171         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, UPRV_LENGTHOF(AVAIL));
   3172         (void)n;    /* Suppress set but not used warnings. */
   3173         /* Don't need to check n because we check list */
   3174         uenum_close(e);
   3175     } else {
   3176         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
   3177     }
   3178 #endif
   3179 
   3180     e = ucol_getKeywords(&ec);
   3181     if (e != NULL) {
   3182         assertSuccess("ucol_getKeywords", &ec);
   3183         assertTrue("ucol_getKeywords!=0", e!=0);
   3184         n = checkUEnumeration("ucol_getKeywords", e, KW, UPRV_LENGTHOF(KW));
   3185         /* Don't need to check n because we check list */
   3186         uenum_close(e);
   3187     } else {
   3188         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
   3189     }
   3190 
   3191     e = ucol_getKeywordValues(KW[0], &ec);
   3192     if (e != NULL) {
   3193         assertSuccess("ucol_getKeywordValues", &ec);
   3194         assertTrue("ucol_getKeywordValues!=0", e!=0);
   3195         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, UPRV_LENGTHOF(KWVAL));
   3196         /* Don't need to check n because we check list */
   3197         uenum_close(e);
   3198     } else {
   3199         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
   3200     }
   3201 
   3202     /* Try setting a warning before calling ucol_getKeywordValues */
   3203     ec = U_USING_FALLBACK_WARNING;
   3204     e = ucol_getKeywordValues(KW[0], &ec);
   3205     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
   3206         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
   3207         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, UPRV_LENGTHOF(KWVAL));
   3208         /* Don't need to check n because we check list */
   3209         uenum_close(e);
   3210     }
   3211 
   3212     /*
   3213 U_DRAFT int32_t U_EXPORT2
   3214 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
   3215                              const char* locale, UBool* isAvailable,
   3216                              UErrorCode* status);
   3217 }
   3218 */
   3219     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
   3220                                      &isAvailable, &ec);
   3221     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   3222         assertEquals("getFunctionalEquivalent(de)", "root", loc);
   3223         assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
   3224                    isAvailable == TRUE);
   3225     }
   3226 
   3227     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
   3228                                      &isAvailable, &ec);
   3229     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   3230         assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
   3231         assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
   3232                    isAvailable == FALSE);
   3233     }
   3234 }
   3235 
   3236 /* supercedes TestJ784 */
   3237 static void TestBeforePinyin(void) {
   3238     const static char rules[] = {
   3239         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
   3240         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
   3241         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
   3242         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
   3243         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
   3244         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
   3245     };
   3246 
   3247     const static char *test[] = {
   3248         "l\\u0101",
   3249         "la",
   3250         "l\\u0101n",
   3251         "lan ",
   3252         "l\\u0113",
   3253         "le",
   3254         "l\\u0113n",
   3255         "len"
   3256     };
   3257 
   3258     const static char *test2[] = {
   3259         "x\\u0101",
   3260         "x\\u0100",
   3261         "X\\u0101",
   3262         "X\\u0100",
   3263         "x\\u00E1",
   3264         "x\\u00C1",
   3265         "X\\u00E1",
   3266         "X\\u00C1",
   3267         "x\\u01CE",
   3268         "x\\u01CD",
   3269         "X\\u01CE",
   3270         "X\\u01CD",
   3271         "x\\u00E0",
   3272         "x\\u00C0",
   3273         "X\\u00E0",
   3274         "X\\u00C0",
   3275         "xa",
   3276         "xA",
   3277         "Xa",
   3278         "XA",
   3279         "x\\u0101x",
   3280         "x\\u0100x",
   3281         "x\\u00E1x",
   3282         "x\\u00C1x",
   3283         "x\\u01CEx",
   3284         "x\\u01CDx",
   3285         "x\\u00E0x",
   3286         "x\\u00C0x",
   3287         "xax",
   3288         "xAx"
   3289     };
   3290 
   3291     genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
   3292     genericLocaleStarter("zh", test, UPRV_LENGTHOF(test));
   3293     genericRulesStarter(rules, test2, UPRV_LENGTHOF(test2));
   3294     genericLocaleStarter("zh", test2, UPRV_LENGTHOF(test2));
   3295 }
   3296 
   3297 static void TestBeforeTightening(void) {
   3298     static const struct {
   3299         const char *rules;
   3300         UErrorCode expectedStatus;
   3301     } tests[] = {
   3302         { "&[before 1]a<x", U_ZERO_ERROR },
   3303         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
   3304         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
   3305         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
   3306         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
   3307         { "&[before 2]a<<x",U_ZERO_ERROR },
   3308         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
   3309         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
   3310         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
   3311         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
   3312         { "&[before 3]a<<<x",U_ZERO_ERROR },
   3313         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
   3314         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
   3315     };
   3316 
   3317     int32_t i = 0;
   3318 
   3319     UErrorCode status = U_ZERO_ERROR;
   3320     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   3321     uint32_t rlen = 0;
   3322 
   3323     UCollator *coll = NULL;
   3324 
   3325 
   3326     for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
   3327         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
   3328         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   3329         if(status != tests[i].expectedStatus) {
   3330             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
   3331                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
   3332         }
   3333         ucol_close(coll);
   3334         status = U_ZERO_ERROR;
   3335     }
   3336 
   3337 }
   3338 
   3339 /*
   3340 &m < a
   3341 &[before 1] a < x <<< X << q <<< Q < z
   3342 assert: m <<< M < x <<< X << q <<< Q < z < a < n
   3343 
   3344 &m < a
   3345 &[before 2] a << x <<< X << q <<< Q < z
   3346 assert: m <<< M < x <<< X << q <<< Q << a < z < n
   3347 
   3348 &m < a
   3349 &[before 3] a <<< x <<< X << q <<< Q < z
   3350 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
   3351 
   3352 
   3353 &m << a
   3354 &[before 1] a < x <<< X << q <<< Q < z
   3355 assert: x <<< X << q <<< Q < z < m <<< M << a < n
   3356 
   3357 &m << a
   3358 &[before 2] a << x <<< X << q <<< Q < z
   3359 assert: m <<< M << x <<< X << q <<< Q << a < z < n
   3360 
   3361 &m << a
   3362 &[before 3] a <<< x <<< X << q <<< Q < z
   3363 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
   3364 
   3365 
   3366 &m <<< a
   3367 &[before 1] a < x <<< X << q <<< Q < z
   3368 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
   3369 
   3370 &m <<< a
   3371 &[before 2] a << x <<< X << q <<< Q < z
   3372 assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
   3373 
   3374 &m <<< a
   3375 &[before 3] a <<< x <<< X << q <<< Q < z
   3376 assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
   3377 
   3378 
   3379 &[before 1] s < x <<< X << q <<< Q < z
   3380 assert: r <<< R < x <<< X << q <<< Q < z < s < n
   3381 
   3382 &[before 2] s << x <<< X << q <<< Q < z
   3383 assert: r <<< R < x <<< X << q <<< Q << s < z < n
   3384 
   3385 &[before 3] s <<< x <<< X << q <<< Q < z
   3386 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
   3387 
   3388 
   3389 &[before 1] \u24DC < x <<< X << q <<< Q < z
   3390 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
   3391 
   3392 &[before 2] \u24DC << x <<< X << q <<< Q < z
   3393 assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
   3394 
   3395 &[before 3] \u24DC <<< x <<< X << q <<< Q < z
   3396 assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
   3397 */
   3398 
   3399 
   3400 #if 0
   3401 /* requires features not yet supported */
   3402 static void TestMoreBefore(void) {
   3403     static const struct {
   3404         const char* rules;
   3405         const char* order[16];
   3406         int32_t size;
   3407     } tests[] = {
   3408         { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
   3409         { "m","M","x","X","q","Q","z","a","n" }, 9},
   3410         { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
   3411         { "m","M","x","X","q","Q","a","z","n" }, 9},
   3412         { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
   3413         { "m","M","x","X","a","q","Q","z","n" }, 9},
   3414         { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
   3415         { "x","X","q","Q","z","m","M","a","n" }, 9},
   3416         { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
   3417         { "m","M","x","X","q","Q","a","z","n" }, 9},
   3418         { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
   3419         { "m","M","x","X","a","q","Q","z","n" }, 9},
   3420         { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
   3421         { "x","X","q","Q","z","n","m","a","M" }, 9},
   3422         { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
   3423         { "x","X","q","Q","m","a","M","z","n" }, 9},
   3424         { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
   3425         { "m","x","X","a","M","q","Q","z","n" }, 9},
   3426         { "&[before 1] s < x <<< X << q <<< Q < z",
   3427         { "r","R","x","X","q","Q","z","s","n" }, 9},
   3428         { "&[before 2] s << x <<< X << q <<< Q < z",
   3429         { "r","R","x","X","q","Q","s","z","n" }, 9},
   3430         { "&[before 3] s <<< x <<< X << q <<< Q < z",
   3431         { "r","R","x","X","s","q","Q","z","n" }, 9},
   3432         { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
   3433         { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
   3434         { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
   3435         { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
   3436         { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
   3437         { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
   3438     };
   3439 
   3440     int32_t i = 0;
   3441 
   3442     for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
   3443         genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
   3444     }
   3445 }
   3446 #endif
   3447 
   3448 static void TestTailorNULL( void ) {
   3449     const static char* rule = "&a <<< '\\u0000'";
   3450     UErrorCode status = U_ZERO_ERROR;
   3451     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   3452     uint32_t rlen = 0;
   3453     UChar a = 1, null = 0;
   3454     UCollationResult res = UCOL_EQUAL;
   3455 
   3456     UCollator *coll = NULL;
   3457 
   3458 
   3459     rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
   3460     coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   3461 
   3462     if(U_FAILURE(status)) {
   3463         log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
   3464     } else {
   3465         res = ucol_strcoll(coll, &a, 1, &null, 1);
   3466 
   3467         if(res != UCOL_LESS) {
   3468             log_err("NULL was not tailored properly!\n");
   3469         }
   3470     }
   3471 
   3472     ucol_close(coll);
   3473 }
   3474 
   3475 static void
   3476 TestUpperFirstQuaternary(void)
   3477 {
   3478   const char* tests[] = { "B", "b", "Bb", "bB" };
   3479   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
   3480   UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
   3481   genericLocaleStarterWithOptions("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att));
   3482 }
   3483 
   3484 static void
   3485 TestJ4960(void)
   3486 {
   3487   const char* tests[] = { "\\u00e2T", "aT" };
   3488   UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
   3489   UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
   3490   const char* tests2[] = { "a", "A" };
   3491   const char* rule = "&[first tertiary ignorable]=A=a";
   3492   UColAttribute att2[] = { UCOL_CASE_LEVEL };
   3493   UColAttributeValue attVals2[] = { UCOL_ON };
   3494   /* Test whether we correctly ignore primary ignorables on case level when */
   3495   /* we have only primary & case level */
   3496   genericLocaleStarterWithOptionsAndResult("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att), UCOL_EQUAL);
   3497   /* Test whether ICU4J will make case level for sortkeys that have primary strength */
   3498   /* and case level */
   3499   genericLocaleStarterWithOptions("root", tests2, UPRV_LENGTHOF(tests2), att, attVals, UPRV_LENGTHOF(att));
   3500   /* Test whether completely ignorable letters have case level info (they shouldn't) */
   3501   genericRulesStarterWithOptionsAndResult(rule, tests2, UPRV_LENGTHOF(tests2), att2, attVals2, UPRV_LENGTHOF(att2), UCOL_EQUAL);
   3502 }
   3503 
   3504 static void
   3505 TestJ5223(void)
   3506 {
   3507   static const char *test = "this is a test string";
   3508   UChar ustr[256];
   3509   int32_t ustr_length = u_unescape(test, ustr, 256);
   3510   unsigned char sortkey[256];
   3511   int32_t sortkey_length;
   3512   UErrorCode status = U_ZERO_ERROR;
   3513   static UCollator *coll = NULL;
   3514   coll = ucol_open("root", &status);
   3515   if(U_FAILURE(status)) {
   3516     log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   3517     return;
   3518   }
   3519   ucol_setStrength(coll, UCOL_PRIMARY);
   3520   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   3521   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3522   if (U_FAILURE(status)) {
   3523     log_err("Failed setting atributes\n");
   3524     return;
   3525   }
   3526   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
   3527   if (sortkey_length > 256) return;
   3528 
   3529   /* we mark the position where the null byte should be written in advance */
   3530   sortkey[sortkey_length-1] = 0xAA;
   3531 
   3532   /* we set the buffer size one byte higher than needed */
   3533   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
   3534     sortkey_length+1);
   3535 
   3536   /* no error occurs (for me) */
   3537   if (sortkey[sortkey_length-1] == 0xAA) {
   3538     log_err("Hit bug at first try\n");
   3539   }
   3540 
   3541   /* we mark the position where the null byte should be written again */
   3542   sortkey[sortkey_length-1] = 0xAA;
   3543 
   3544   /* this time we set the buffer size to the exact amount needed */
   3545   sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
   3546     sortkey_length);
   3547 
   3548   /* now the trailing null byte is not written */
   3549   if (sortkey[sortkey_length-1] == 0xAA) {
   3550     log_err("Hit bug at second try\n");
   3551   }
   3552 
   3553   ucol_close(coll);
   3554 }
   3555 
   3556 /* Regression test for Thai partial sort key problem */
   3557 static void
   3558 TestJ5232(void)
   3559 {
   3560     const static char *test[] = {
   3561         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
   3562         "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
   3563     };
   3564 
   3565     genericLocaleStarter("th", test, UPRV_LENGTHOF(test));
   3566 }
   3567 
   3568 static void
   3569 TestJ5367(void)
   3570 {
   3571     const static char *test[] = { "a", "y" };
   3572     const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
   3573     genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
   3574 }
   3575 
   3576 static void
   3577 TestVI5913(void)
   3578 {
   3579     UErrorCode status = U_ZERO_ERROR;
   3580     int32_t i, j;
   3581     UCollator *coll =NULL;
   3582     uint8_t  resColl[100], expColl[100];
   3583     int32_t  rLen, tLen, ruleLen, sLen, kLen;
   3584     UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &b<0x1FF3-omega with Ypogegrammeni*/
   3585     UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
   3586     /*
   3587      * Note: Just tailoring &z<ae^ does not work as expected:
   3588      * The UCA spec requires for discontiguous contractions that they
   3589      * extend an *existing match* by one combining mark at a time.
   3590      * Therefore, ae must be a contraction so that the builder finds
   3591      * discontiguous contractions for ae^, for example with an intervening underdot.
   3592      * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
   3593      */
   3594     UChar rule3[256]={
   3595         0x26, 0x78, 0x3c, 0x61, 0x65,      /* &x<ae */
   3596         0x26, 0x7a, 0x3c, 0x0061, 0x00ea,  /* &z<a+e with circumflex.*/
   3597         0};
   3598     static const UChar tData[][20]={
   3599         {0x1EAC, 0},
   3600         {0x0041, 0x0323, 0x0302, 0},
   3601         {0x1EA0, 0x0302, 0},
   3602         {0x00C2, 0x0323, 0},
   3603         {0x1ED8, 0},  /* O with dot and circumflex */
   3604         {0x1ECC, 0x0302, 0},
   3605         {0x1EB7, 0},
   3606         {0x1EA1, 0x0306, 0},
   3607     };
   3608     static const UChar tailorData[][20]={
   3609         {0x1FA2, 0},  /* Omega with 3 combining marks */
   3610         {0x03C9, 0x0313, 0x0300, 0x0345, 0},
   3611         {0x1FF3, 0x0313, 0x0300, 0},
   3612         {0x1F60, 0x0300, 0x0345, 0},
   3613         {0x1F62, 0x0345, 0},
   3614         {0x1FA0, 0x0300, 0},
   3615     };
   3616     static const UChar tailorData2[][20]={
   3617         {0x1E63, 0x030C, 0},  /* s with dot below + caron */
   3618         {0x0073, 0x0323, 0x030C, 0},
   3619         {0x0073, 0x030C, 0x0323, 0},
   3620     };
   3621     static const UChar tailorData3[][20]={
   3622         {0x007a, 0},  /*  z */
   3623         {0x0061, 0x0065, 0},  /*  a + e */
   3624         {0x0061, 0x00ea, 0}, /* a + e with circumflex */
   3625         {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
   3626         {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
   3627         {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
   3628         {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
   3629         {0x00EA, 0},  /* e with circumflex  */
   3630     };
   3631 
   3632     /* Test Vietnamese sort. */
   3633     coll = ucol_open("vi", &status);
   3634     if(U_FAILURE(status)) {
   3635         log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
   3636         return;
   3637     }
   3638     log_verbose("\n\nVI collation:");
   3639     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
   3640         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
   3641     }
   3642     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
   3643         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
   3644     }
   3645     if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
   3646         log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
   3647     }
   3648     if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
   3649         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
   3650     }
   3651 
   3652     for (j=0; j<8; j++) {
   3653         tLen = u_strlen(tData[j]);
   3654         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
   3655         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
   3656         for(i = 0; i<rLen; i++) {
   3657             log_verbose(" %02X", resColl[i]);
   3658         }
   3659     }
   3660 
   3661     ucol_close(coll);
   3662 
   3663     /* Test Romanian sort. */
   3664     coll = ucol_open("ro", &status);
   3665     log_verbose("\n\nRO collation:");
   3666     if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
   3667         log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
   3668     }
   3669     if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
   3670         log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
   3671     }
   3672     if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
   3673         log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
   3674     }
   3675 
   3676     for (j=4; j<8; j++) {
   3677         tLen = u_strlen(tData[j]);
   3678         log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
   3679         rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
   3680         for(i = 0; i<rLen; i++) {
   3681             log_verbose(" %02X", resColl[i]);
   3682         }
   3683     }
   3684     ucol_close(coll);
   3685 
   3686     /* Test the precomposed Greek character with 3 combining marks. */
   3687     log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
   3688     ruleLen = u_strlen(rule);
   3689     coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3690     if (U_FAILURE(status)) {
   3691         log_err("ucol_openRules failed with %s\n", u_errorName(status));
   3692         return;
   3693     }
   3694     sLen = u_strlen(tailorData[0]);
   3695     for (j=1; j<6; j++) {
   3696         tLen = u_strlen(tailorData[j]);
   3697         if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
   3698             log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
   3699         }
   3700     }
   3701     /* Test getSortKey. */
   3702     tLen = u_strlen(tailorData[0]);
   3703     kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
   3704     for (j=0; j<6; j++) {
   3705         tLen = u_strlen(tailorData[j]);
   3706         rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
   3707         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   3708             log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   3709             for(i = 0; i<rLen; i++) {
   3710                 log_err(" %02X", resColl[i]);
   3711             }
   3712         }
   3713     }
   3714     ucol_close(coll);
   3715 
   3716     log_verbose("\n\nTailoring test for s with caron:");
   3717     ruleLen = u_strlen(rule2);
   3718     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3719     tLen = u_strlen(tailorData2[0]);
   3720     kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
   3721     for (j=1; j<3; j++) {
   3722         tLen = u_strlen(tailorData2[j]);
   3723         rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
   3724         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   3725             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
   3726             for(i = 0; i<rLen; i++) {
   3727                 log_err(" %02X", resColl[i]);
   3728             }
   3729         }
   3730     }
   3731     ucol_close(coll);
   3732 
   3733     log_verbose("\n\nTailoring test for &z< ae with circumflex:");
   3734     ruleLen = u_strlen(rule3);
   3735     coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3736     tLen = u_strlen(tailorData3[3]);
   3737     kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
   3738     log_verbose("\n Test Data[3] :%s  \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
   3739     for(i = 0; i<kLen; i++) {
   3740         log_verbose(" %02X", expColl[i]);
   3741     }
   3742     for (j=4; j<6; j++) {
   3743         tLen = u_strlen(tailorData3[j]);
   3744         rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
   3745 
   3746         if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
   3747             log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
   3748             for(i = 0; i<rLen; i++) {
   3749                 log_err(" %02X", resColl[i]);
   3750             }
   3751         }
   3752 
   3753         log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
   3754          for(i = 0; i<rLen; i++) {
   3755              log_verbose(" %02X", resColl[i]);
   3756          }
   3757     }
   3758     ucol_close(coll);
   3759 }
   3760 
   3761 static void
   3762 TestTailor6179(void)
   3763 {
   3764     UErrorCode status = U_ZERO_ERROR;
   3765     int32_t i;
   3766     UCollator *coll =NULL;
   3767     uint8_t  resColl[100];
   3768     int32_t  rLen, tLen, ruleLen;
   3769     /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
   3770     static const UChar rule1[]={
   3771             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
   3772             0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
   3773             0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
   3774             0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
   3775     /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
   3776     static const UChar rule2[]={
   3777             0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
   3778             0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
   3779             0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
   3780             0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
   3781             0x3C,0x3C,0x20,0x62,0};
   3782 
   3783     static const UChar tData1[][4]={
   3784         {0x61, 0},
   3785         {0x62, 0},
   3786         { 0xFDD0,0x009E, 0}
   3787     };
   3788     static const UChar tData2[][4]={
   3789         {0x61, 0},
   3790         {0x62, 0},
   3791         { 0xFDD0,0x009E, 0}
   3792      };
   3793 
   3794     /*
   3795      * These values from FractionalUCA.txt will change,
   3796      * and need to be updated here.
   3797      * TODO: Make this not check for particular sort keys.
   3798      * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
   3799      */
   3800     static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
   3801     static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
   3802     static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
   3803     static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
   3804 
   3805     UParseError parseError;
   3806 
   3807     /* Test [Last Primary ignorable] */
   3808 
   3809     log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
   3810     ruleLen = u_strlen(rule1);
   3811     coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3812     if (U_FAILURE(status)) {
   3813         log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
   3814         return;
   3815     }
   3816     tLen = u_strlen(tData1[0]);
   3817     rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
   3818     if (rLen != UPRV_LENGTHOF(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
   3819         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
   3820         for(i = 0; i<rLen; i++) {
   3821             log_err(" %02X", resColl[i]);
   3822         }
   3823         log_err("\n");
   3824     }
   3825     tLen = u_strlen(tData1[1]);
   3826     rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
   3827     if (rLen != UPRV_LENGTHOF(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
   3828         log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
   3829         for(i = 0; i<rLen; i++) {
   3830             log_err(" %02X", resColl[i]);
   3831         }
   3832         log_err("\n");
   3833     }
   3834     ucol_close(coll);
   3835 
   3836 
   3837     /* Test [Last Secondary ignorable] */
   3838     log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
   3839     ruleLen = u_strlen(rule2);
   3840     coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
   3841     if (U_FAILURE(status)) {
   3842         log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
   3843         log_info("  offset=%d  \"%s\" | \"%s\"\n",
   3844                  parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
   3845         return;
   3846     }
   3847     tLen = u_strlen(tData2[0]);
   3848     rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
   3849     if (rLen != UPRV_LENGTHOF(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
   3850         log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
   3851         for(i = 0; i<rLen; i++) {
   3852             log_err(" %02X", resColl[i]);
   3853         }
   3854         log_err("\n");
   3855     }
   3856     tLen = u_strlen(tData2[1]);
   3857     rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
   3858     if (rLen != UPRV_LENGTHOF(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
   3859       log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
   3860       for(i = 0; i<rLen; i++) {
   3861         log_err(" %02X", resColl[i]);
   3862       }
   3863       log_err("\n");
   3864     }
   3865     ucol_close(coll);
   3866 }
   3867 
   3868 static void
   3869 TestUCAPrecontext(void)
   3870 {
   3871     UErrorCode status = U_ZERO_ERROR;
   3872     int32_t i, j;
   3873     UCollator *coll =NULL;
   3874     uint8_t  resColl[100], prevColl[100];
   3875     int32_t  rLen, tLen, ruleLen;
   3876     UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
   3877     UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
   3878     /* & l middle-dot << a  a is an expansion. */
   3879 
   3880     UChar tData1[][20]={
   3881             { 0xb7, 0},  /* standalone middle dot(0xb7) */
   3882             { 0x387, 0}, /* standalone middle dot(0x387) */
   3883             { 0x61, 0},  /* a */
   3884             { 0x6C, 0},  /* l */
   3885             { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
   3886             { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
   3887             { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
   3888             { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
   3889             { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
   3890             { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
   3891             { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
   3892      };
   3893 
   3894     log_verbose("\n\nEN collation:");
   3895     coll = ucol_open("en", &status);
   3896     if (U_FAILURE(status)) {
   3897         log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
   3898         return;
   3899     }
   3900     for (j=0; j<11; j++) {
   3901         tLen = u_strlen(tData1[j]);
   3902         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   3903         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   3904             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   3905                     j, tData1[j]);
   3906         }
   3907         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   3908         for(i = 0; i<rLen; i++) {
   3909             log_verbose(" %02X", resColl[i]);
   3910         }
   3911         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   3912      }
   3913      ucol_close(coll);
   3914 
   3915 
   3916      log_verbose("\n\nJA collation:");
   3917      coll = ucol_open("ja", &status);
   3918      if (U_FAILURE(status)) {
   3919          log_err("Tailoring test: &z <<a|- failed!");
   3920          return;
   3921      }
   3922      for (j=0; j<11; j++) {
   3923          tLen = u_strlen(tData1[j]);
   3924          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   3925          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   3926              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   3927                      j, tData1[j]);
   3928          }
   3929          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   3930          for(i = 0; i<rLen; i++) {
   3931              log_verbose(" %02X", resColl[i]);
   3932          }
   3933          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   3934       }
   3935       ucol_close(coll);
   3936 
   3937 
   3938       log_verbose("\n\nTailoring test: & middle dot < a ");
   3939       ruleLen = u_strlen(rule1);
   3940       coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3941       if (U_FAILURE(status)) {
   3942           log_err("Tailoring test: & middle dot < a failed!");
   3943           return;
   3944       }
   3945       for (j=0; j<11; j++) {
   3946           tLen = u_strlen(tData1[j]);
   3947           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   3948           if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   3949               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   3950                       j, tData1[j]);
   3951           }
   3952           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   3953           for(i = 0; i<rLen; i++) {
   3954               log_verbose(" %02X", resColl[i]);
   3955           }
   3956           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   3957        }
   3958        ucol_close(coll);
   3959 
   3960 
   3961        log_verbose("\n\nTailoring test: & l middle-dot << a ");
   3962        ruleLen = u_strlen(rule2);
   3963        coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
   3964        if (U_FAILURE(status)) {
   3965            log_err("Tailoring test: & l middle-dot << a failed!");
   3966            return;
   3967        }
   3968        for (j=0; j<11; j++) {
   3969            tLen = u_strlen(tData1[j]);
   3970            rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
   3971            if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
   3972                log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
   3973                        j, tData1[j]);
   3974            }
   3975            if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
   3976                log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
   3977                        j, tData1[j]);
   3978            }
   3979            log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
   3980            for(i = 0; i<rLen; i++) {
   3981                log_verbose(" %02X", resColl[i]);
   3982            }
   3983            uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
   3984         }
   3985         ucol_close(coll);
   3986 }
   3987 
   3988 static void
   3989 TestOutOfBuffer5468(void)
   3990 {
   3991     static const char *test = "\\u4e00";
   3992     UChar ustr[256];
   3993     int32_t ustr_length = u_unescape(test, ustr, 256);
   3994     unsigned char shortKeyBuf[1];
   3995     int32_t sortkey_length;
   3996     UErrorCode status = U_ZERO_ERROR;
   3997     static UCollator *coll = NULL;
   3998 
   3999     coll = ucol_open("root", &status);
   4000     if(U_FAILURE(status)) {
   4001       log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
   4002       return;
   4003     }
   4004     ucol_setStrength(coll, UCOL_PRIMARY);
   4005     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   4006     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   4007     if (U_FAILURE(status)) {
   4008       log_err("Failed setting atributes\n");
   4009       return;
   4010     }
   4011 
   4012     sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
   4013     if (sortkey_length != 4) {
   4014         log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
   4015     }
   4016     log_verbose("length of sortKey is %d", sortkey_length);
   4017     ucol_close(coll);
   4018 }
   4019 
   4020 #define TSKC_DATA_SIZE 5
   4021 #define TSKC_BUF_SIZE  50
   4022 static void
   4023 TestSortKeyConsistency(void)
   4024 {
   4025     UErrorCode icuRC = U_ZERO_ERROR;
   4026     UCollator* ucol;
   4027     UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
   4028 
   4029     uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
   4030     uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
   4031     int32_t i, j, i2;
   4032 
   4033     ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
   4034     if (U_FAILURE(icuRC))
   4035     {
   4036         log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
   4037         return;
   4038     }
   4039 
   4040     for (i = 0; i < TSKC_DATA_SIZE; i++)
   4041     {
   4042         UCharIterator uiter;
   4043         uint32_t state[2] = { 0, 0 };
   4044         int32_t dataLen = i+1;
   4045         for (j=0; j<TSKC_BUF_SIZE; j++)
   4046             bufFull[i][j] = bufPart[i][j] = 0;
   4047 
   4048         /* Full sort key */
   4049         ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
   4050 
   4051         /* Partial sort key */
   4052         uiter_setString(&uiter, data, dataLen);
   4053         ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
   4054         if (U_FAILURE(icuRC))
   4055         {
   4056             log_err("ucol_nextSortKeyPart failed\n");
   4057             ucol_close(ucol);
   4058             return;
   4059         }
   4060 
   4061         for (i2=0; i2<i; i2++)
   4062         {
   4063             UBool fullMatch = TRUE;
   4064             UBool partMatch = TRUE;
   4065             for (j=0; j<TSKC_BUF_SIZE; j++)
   4066             {
   4067                 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
   4068                 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
   4069             }
   4070             if (fullMatch != partMatch) {
   4071                 log_err(fullMatch ? "full key was consistent, but partial key changed\n"
   4072                                   : "partial key was consistent, but full key changed\n");
   4073                 ucol_close(ucol);
   4074                 return;
   4075             }
   4076         }
   4077     }
   4078 
   4079     /*=============================================*/
   4080    ucol_close(ucol);
   4081 }
   4082 
   4083 /* ticket: 6101 */
   4084 static void TestCroatianSortKey(void) {
   4085     const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
   4086     UErrorCode status = U_ZERO_ERROR;
   4087     UCollator *ucol;
   4088     UCharIterator iter;
   4089 
   4090     static const UChar text[] = { 0x0044, 0xD81A };
   4091 
   4092     size_t length = UPRV_LENGTHOF(text);
   4093 
   4094     uint8_t textSortKey[32];
   4095     size_t lenSortKey = 32;
   4096     size_t actualSortKeyLen;
   4097     uint32_t uStateInfo[2] = { 0, 0 };
   4098 
   4099     ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
   4100     if (U_FAILURE(status)) {
   4101         log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
   4102         return;
   4103     }
   4104 
   4105     uiter_setString(&iter, text, length);
   4106 
   4107     actualSortKeyLen = ucol_nextSortKeyPart(
   4108         ucol, &iter, (uint32_t*)uStateInfo,
   4109         textSortKey, lenSortKey, &status
   4110         );
   4111 
   4112     if (actualSortKeyLen == lenSortKey) {
   4113         log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
   4114     }
   4115 
   4116     ucol_close(ucol);
   4117 }
   4118 
   4119 /* ticket: 6140 */
   4120 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
   4121  * they are both Hiragana and Katakana
   4122  */
   4123 #define SORTKEYLEN 50
   4124 static void TestHiragana(void) {
   4125     UErrorCode status = U_ZERO_ERROR;
   4126     UCollator* ucol;
   4127     UCollationResult strcollresult;
   4128     UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
   4129     UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
   4130     int32_t data1Len = UPRV_LENGTHOF(data1);
   4131     int32_t data2Len = UPRV_LENGTHOF(data2);
   4132     int32_t i, j;
   4133     uint8_t sortKey1[SORTKEYLEN];
   4134     uint8_t sortKey2[SORTKEYLEN];
   4135 
   4136     UCharIterator uiter1;
   4137     UCharIterator uiter2;
   4138     uint32_t state1[2] = { 0, 0 };
   4139     uint32_t state2[2] = { 0, 0 };
   4140     int32_t keySize1;
   4141     int32_t keySize2;
   4142 
   4143     ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
   4144             &status);
   4145     if (U_FAILURE(status)) {
   4146         log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
   4147         return;
   4148     }
   4149 
   4150     /* Start of full sort keys */
   4151     /* Full sort key1 */
   4152     keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
   4153     /* Full sort key2 */
   4154     keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
   4155     if (keySize1 == keySize2) {
   4156         for (i = 0; i < keySize1; i++) {
   4157             if (sortKey1[i] != sortKey2[i]) {
   4158                 log_err("Full sort keys are different. Should be equal.");
   4159             }
   4160         }
   4161     } else {
   4162         log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
   4163     }
   4164     /* End of full sort keys */
   4165 
   4166     /* Start of partial sort keys */
   4167     /* Partial sort key1 */
   4168     uiter_setString(&uiter1, data1, data1Len);
   4169     keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
   4170     /* Partial sort key2 */
   4171     uiter_setString(&uiter2, data2, data2Len);
   4172     keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
   4173     if (U_SUCCESS(status) && keySize1 == keySize2) {
   4174         for (j = 0; j < keySize1; j++) {
   4175             if (sortKey1[j] != sortKey2[j]) {
   4176                 log_err("Partial sort keys are different. Should be equal");
   4177             }
   4178         }
   4179     } else {
   4180         log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
   4181     }
   4182     /* End of partial sort keys */
   4183 
   4184     /* Start of strcoll */
   4185     /* Use ucol_strcoll() to determine ordering */
   4186     strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
   4187     if (strcollresult != UCOL_EQUAL) {
   4188         log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
   4189     }
   4190 
   4191     ucol_close(ucol);
   4192 }
   4193 
   4194 /* Convenient struct for running collation tests */
   4195 typedef struct {
   4196   const UChar source[MAX_TOKEN_LEN];  /* String on left */
   4197   const UChar target[MAX_TOKEN_LEN];  /* String on right */
   4198   UCollationResult result;            /* -1, 0 or +1, depending on collation */
   4199 } OneTestCase;
   4200 
   4201 /*
   4202  * Utility function to test one collation test case.
   4203  * @param testcases Array of test cases.
   4204  * @param n_testcases Size of the array testcases.
   4205  * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
   4206  * @param n_rules Size of the array str_rules.
   4207  */
   4208 static void doTestOneTestCase(const OneTestCase testcases[],
   4209                               int n_testcases,
   4210                               const char* str_rules[],
   4211                               int n_rules)
   4212 {
   4213   int rule_no, testcase_no;
   4214   UChar rule[500];
   4215   int32_t length = 0;
   4216   UErrorCode status = U_ZERO_ERROR;
   4217   UParseError parse_error;
   4218   UCollator  *myCollation;
   4219 
   4220   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
   4221 
   4222     length = u_unescape(str_rules[rule_no], rule, 500);
   4223     if (length == 0) {
   4224         log_err("ERROR: The rule cannot be unescaped: %s\n");
   4225         return;
   4226     }
   4227     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
   4228     if(U_FAILURE(status)){
   4229         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   4230         log_info("  offset=%d  \"%s\" | \"%s\"\n",
   4231                  parse_error.offset,
   4232                  aescstrdup(parse_error.preContext, -1),
   4233                  aescstrdup(parse_error.postContext, -1));
   4234         return;
   4235     }
   4236     log_verbose("Testing the <<* syntax\n");
   4237     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   4238     ucol_setStrength(myCollation, UCOL_TERTIARY);
   4239     for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
   4240       doTest(myCollation,
   4241              testcases[testcase_no].source,
   4242              testcases[testcase_no].target,
   4243              testcases[testcase_no].result
   4244              );
   4245     }
   4246     ucol_close(myCollation);
   4247   }
   4248 }
   4249 
   4250 const static OneTestCase rangeTestcases[] = {
   4251   { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
   4252   { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
   4253   { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
   4254 
   4255   { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
   4256   { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
   4257   { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
   4258   { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
   4259   { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
   4260 
   4261   { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
   4262   { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
   4263   { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
   4264   { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
   4265 
   4266   { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
   4267   { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
   4268   { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
   4269   { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
   4270   { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
   4271   { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
   4272   { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
   4273   { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
   4274 };
   4275 
   4276 static int nRangeTestcases = UPRV_LENGTHOF(rangeTestcases);
   4277 
   4278 const static OneTestCase rangeTestcasesSupplemental[] = {
   4279   { {0x4e00},                            {0xfffb},                          UCOL_LESS }, /* U+4E00 < U+FFFB */
   4280   { {0xfffb},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFB < U+10000 */
   4281   { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
   4282   { {0x4e00},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+4E00 < U+10001 */
   4283   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
   4284   { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
   4285   { {0x4e00},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+4E00 < U+10001 */
   4286 };
   4287 
   4288 static int nRangeTestcasesSupplemental = UPRV_LENGTHOF(rangeTestcasesSupplemental);
   4289 
   4290 const static OneTestCase rangeTestcasesQwerty[] = {
   4291   { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
   4292   { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
   4293 
   4294   { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
   4295   { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
   4296 
   4297   { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
   4298   { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
   4299 
   4300   { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
   4301   { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
   4302 
   4303   { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
   4304     {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
   4305   { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
   4306     {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
   4307 };
   4308 
   4309 static int nRangeTestcasesQwerty = UPRV_LENGTHOF(rangeTestcasesQwerty);
   4310 
   4311 static void TestSameStrengthList(void)
   4312 {
   4313   const char* strRules[] = {
   4314     /* Normal */
   4315     "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
   4316 
   4317     /* Lists */
   4318     "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
   4319   };
   4320   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
   4321 }
   4322 
   4323 static void TestSameStrengthListQuoted(void)
   4324 {
   4325   const char* strRules[] = {
   4326     /* Lists with quoted characters */
   4327     "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
   4328     "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
   4329 
   4330     "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
   4331     "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
   4332 
   4333     "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
   4334     "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
   4335   };
   4336   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
   4337 }
   4338 
   4339 static void TestSameStrengthListSupplemental(void)
   4340 {
   4341   const char* strRules[] = {
   4342     "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
   4343     "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
   4344     "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
   4345     "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
   4346   };
   4347   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
   4348 }
   4349 
   4350 static void TestSameStrengthListQwerty(void)
   4351 {
   4352   const char* strRules[] = {
   4353     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
   4354     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
   4355     "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
   4356     "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
   4357     "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
   4358 
   4359     /* Quoted characters also will work if two quoted characters are not consecutive.  */
   4360     "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
   4361 
   4362     /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
   4363     /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
   4364 
   4365  };
   4366   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
   4367 }
   4368 
   4369 static void TestSameStrengthListQuotedQwerty(void)
   4370 {
   4371   const char* strRules[] = {
   4372     "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
   4373     "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
   4374     "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
   4375 
   4376     /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
   4377     /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
   4378    };
   4379   doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
   4380 }
   4381 
   4382 static void TestSameStrengthListRanges(void)
   4383 {
   4384   const char* strRules[] = {
   4385     "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
   4386   };
   4387   doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
   4388 }
   4389 
   4390 static void TestSameStrengthListSupplementalRanges(void)
   4391 {
   4392   const char* strRules[] = {
   4393     /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
   4394     "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
   4395   };
   4396   doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
   4397 }
   4398 
   4399 static void TestSpecialCharacters(void)
   4400 {
   4401   const char* strRules[] = {
   4402     /* Normal */
   4403     "&';'<'+'<','<'-'<'&'<'*'",
   4404 
   4405     /* List */
   4406     "&';'<*'+,-&*'",
   4407 
   4408     /* Range */
   4409     "&';'<*'+'-'-&*'",
   4410   };
   4411 
   4412   const static OneTestCase specialCharacterStrings[] = {
   4413     { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
   4414     { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
   4415     { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
   4416     { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
   4417   };
   4418   doTestOneTestCase(specialCharacterStrings, UPRV_LENGTHOF(specialCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
   4419 }
   4420 
   4421 static void TestPrivateUseCharacters(void)
   4422 {
   4423   const char* strRules[] = {
   4424     /* Normal */
   4425     "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
   4426     "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
   4427   };
   4428 
   4429   const static OneTestCase privateUseCharacterStrings[] = {
   4430     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   4431     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   4432     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   4433     { {0xe2da}, {0xe2db}, UCOL_LESS },
   4434     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   4435     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   4436   };
   4437   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
   4438 }
   4439 
   4440 static void TestPrivateUseCharactersInList(void)
   4441 {
   4442   const char* strRules[] = {
   4443     /* List */
   4444     "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
   4445     /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
   4446     "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
   4447   };
   4448 
   4449   const static OneTestCase privateUseCharacterStrings[] = {
   4450     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   4451     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   4452     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   4453     { {0xe2da}, {0xe2db}, UCOL_LESS },
   4454     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   4455     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   4456   };
   4457   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
   4458 }
   4459 
   4460 static void TestPrivateUseCharactersInRange(void)
   4461 {
   4462   const char* strRules[] = {
   4463     /* Range */
   4464     "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
   4465     "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
   4466     /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
   4467   };
   4468 
   4469   const static OneTestCase privateUseCharacterStrings[] = {
   4470     { {0x5ea7}, {0xe2d8}, UCOL_LESS },
   4471     { {0xe2d8}, {0xe2d9}, UCOL_LESS },
   4472     { {0xe2d9}, {0xe2da}, UCOL_LESS },
   4473     { {0xe2da}, {0xe2db}, UCOL_LESS },
   4474     { {0xe2db}, {0xe2dc}, UCOL_LESS },
   4475     { {0xe2dc}, {0x4e8d}, UCOL_LESS },
   4476   };
   4477   doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
   4478 }
   4479 
   4480 static void TestInvalidListsAndRanges(void)
   4481 {
   4482   const char* invalidRules[] = {
   4483     /* Range not in starred expression */
   4484     "&\\ufffe<\\uffff-\\U00010002",
   4485 
   4486     /* Range without start */
   4487     "&a<*-c",
   4488 
   4489     /* Range without end */
   4490     "&a<*b-",
   4491 
   4492     /* More than one hyphen */
   4493     "&a<*b-g-l",
   4494 
   4495     /* Range in the wrong order */
   4496     "&a<*k-b",
   4497 
   4498   };
   4499 
   4500   UChar rule[500];
   4501   UErrorCode status = U_ZERO_ERROR;
   4502   UParseError parse_error;
   4503   int n_rules = UPRV_LENGTHOF(invalidRules);
   4504   int rule_no;
   4505   int length;
   4506   UCollator  *myCollation;
   4507 
   4508   for (rule_no = 0; rule_no < n_rules; ++rule_no) {
   4509 
   4510     length = u_unescape(invalidRules[rule_no], rule, 500);
   4511     if (length == 0) {
   4512         log_err("ERROR: The rule cannot be unescaped: %s\n");
   4513         return;
   4514     }
   4515     myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
   4516     (void)myCollation;      /* Suppress set but not used warning. */
   4517     if(!U_FAILURE(status)){
   4518       log_err("ERROR: Could not cause a failure as expected: \n");
   4519     }
   4520     status = U_ZERO_ERROR;
   4521   }
   4522 }
   4523 
   4524 /*
   4525  * This test ensures that characters placed before a character in a different script have the same lead byte
   4526  * in their collation key before and after script reordering.
   4527  */
   4528 static void TestBeforeRuleWithScriptReordering(void)
   4529 {
   4530     UParseError error;
   4531     UErrorCode status = U_ZERO_ERROR;
   4532     UCollator  *myCollation;
   4533     char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
   4534     UChar rules[500];
   4535     uint32_t rulesLength = 0;
   4536     int32_t reorderCodes[1] = {USCRIPT_GREEK};
   4537     UCollationResult collResult;
   4538 
   4539     uint8_t baseKey[256];
   4540     uint32_t baseKeyLength;
   4541     uint8_t beforeKey[256];
   4542     uint32_t beforeKeyLength;
   4543 
   4544     UChar base[] = { 0x03b1 }; /* base */
   4545     int32_t baseLen = UPRV_LENGTHOF(base);
   4546 
   4547     UChar before[] = { 0x0e01 }; /* ko kai */
   4548     int32_t beforeLen = UPRV_LENGTHOF(before);
   4549 
   4550     /*UChar *data[] = { before, base };
   4551     genericRulesStarter(srules, data, 2);*/
   4552 
   4553     log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
   4554 
   4555     (void)beforeKeyLength;   /* Suppress set but not used warnings. */
   4556     (void)baseKeyLength;
   4557 
   4558     /* build collator */
   4559     log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
   4560 
   4561     rulesLength = u_unescape(srules, rules, UPRV_LENGTHOF(rules));
   4562     myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   4563     if(U_FAILURE(status)) {
   4564         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   4565         return;
   4566     }
   4567 
   4568     /* check collation results - before rule applied but not script reordering */
   4569     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
   4570     if (collResult != UCOL_GREATER) {
   4571         log_err("Collation result not correct before script reordering = %d\n", collResult);
   4572     }
   4573 
   4574     /* check the lead byte of the collation keys before script reordering */
   4575     baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
   4576     beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
   4577     if (baseKey[0] != beforeKey[0]) {
   4578       log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
   4579    }
   4580 
   4581     /* reorder the scripts */
   4582     ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
   4583     if(U_FAILURE(status)) {
   4584         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
   4585         return;
   4586     }
   4587 
   4588     /* check collation results - before rule applied and after script reordering */
   4589     collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
   4590     if (collResult != UCOL_GREATER) {
   4591         log_err("Collation result not correct after script reordering = %d\n", collResult);
   4592     }
   4593 
   4594     /* check the lead byte of the collation keys after script reordering */
   4595     ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
   4596     ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
   4597     if (baseKey[0] != beforeKey[0]) {
   4598         log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
   4599     }
   4600 
   4601     ucol_close(myCollation);
   4602 }
   4603 
   4604 /*
   4605  * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
   4606  */
   4607 static void TestNonLeadBytesDuringCollationReordering(void)
   4608 {
   4609     UErrorCode status = U_ZERO_ERROR;
   4610     UCollator  *myCollation;
   4611     int32_t reorderCodes[1] = {USCRIPT_GREEK};
   4612 
   4613     uint8_t baseKey[256];
   4614     uint32_t baseKeyLength;
   4615     uint8_t reorderKey[256];
   4616     uint32_t reorderKeyLength;
   4617 
   4618     UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
   4619 
   4620     uint32_t i;
   4621 
   4622 
   4623     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   4624 
   4625     /* build collator tertiary */
   4626     myCollation = ucol_open("", &status);
   4627     ucol_setStrength(myCollation, UCOL_TERTIARY);
   4628     if(U_FAILURE(status)) {
   4629         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   4630         return;
   4631     }
   4632     baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
   4633 
   4634     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
   4635     if(U_FAILURE(status)) {
   4636         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   4637         return;
   4638     }
   4639     reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
   4640 
   4641     if (baseKeyLength != reorderKeyLength) {
   4642         log_err("Key lengths not the same during reordering.\n");
   4643         return;
   4644     }
   4645 
   4646     for (i = 1; i < baseKeyLength; i++) {
   4647         if (baseKey[i] != reorderKey[i]) {
   4648             log_err("Collation key bytes not the same at position %d.\n", i);
   4649             return;
   4650         }
   4651     }
   4652     ucol_close(myCollation);
   4653 
   4654     /* build collator quaternary */
   4655     myCollation = ucol_open("", &status);
   4656     ucol_setStrength(myCollation, UCOL_QUATERNARY);
   4657     if(U_FAILURE(status)) {
   4658         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   4659         return;
   4660     }
   4661     baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
   4662 
   4663     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
   4664     if(U_FAILURE(status)) {
   4665         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   4666         return;
   4667     }
   4668     reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
   4669 
   4670     if (baseKeyLength != reorderKeyLength) {
   4671         log_err("Key lengths not the same during reordering.\n");
   4672         return;
   4673     }
   4674 
   4675     for (i = 1; i < baseKeyLength; i++) {
   4676         if (baseKey[i] != reorderKey[i]) {
   4677             log_err("Collation key bytes not the same at position %d.\n", i);
   4678             return;
   4679         }
   4680     }
   4681     ucol_close(myCollation);
   4682 }
   4683 
   4684 /*
   4685  * Test reordering API.
   4686  */
   4687 static void TestReorderingAPI(void)
   4688 {
   4689     UErrorCode status = U_ZERO_ERROR;
   4690     UCollator  *myCollation;
   4691     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   4692     int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_KATAKANA};
   4693     int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   4694     int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
   4695     UCollationResult collResult;
   4696     int32_t retrievedReorderCodesLength;
   4697     int32_t retrievedReorderCodes[10];
   4698     UChar greekString[] = { 0x03b1 };
   4699     UChar punctuationString[] = { 0x203e };
   4700     int loopIndex;
   4701 
   4702     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   4703 
   4704     /* build collator tertiary */
   4705     myCollation = ucol_open("", &status);
   4706     ucol_setStrength(myCollation, UCOL_TERTIARY);
   4707     if(U_FAILURE(status)) {
   4708         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   4709         return;
   4710     }
   4711 
   4712     /* set the reorderding */
   4713     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
   4714     if (U_FAILURE(status)) {
   4715         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   4716         return;
   4717     }
   4718 
   4719     /* get the reordering */
   4720     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   4721     if (status != U_BUFFER_OVERFLOW_ERROR) {
   4722         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
   4723         return;
   4724     }
   4725     status = U_ZERO_ERROR;
   4726     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
   4727         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
   4728         return;
   4729     }
   4730     /* now let's really get it */
   4731     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
   4732     if (U_FAILURE(status)) {
   4733         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   4734         return;
   4735     }
   4736     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
   4737         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
   4738         return;
   4739     }
   4740     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   4741         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   4742             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   4743             return;
   4744         }
   4745     }
   4746     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
   4747     if (collResult != UCOL_LESS) {
   4748         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
   4749         return;
   4750     }
   4751 
   4752     /* clear the reordering */
   4753     ucol_setReorderCodes(myCollation, NULL, 0, &status);
   4754     if (U_FAILURE(status)) {
   4755         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
   4756         return;
   4757     }
   4758 
   4759     /* get the reordering again */
   4760     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   4761     if (retrievedReorderCodesLength != 0) {
   4762         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
   4763         return;
   4764     }
   4765 
   4766     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
   4767     if (collResult != UCOL_GREATER) {
   4768         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   4769         return;
   4770     }
   4771 
   4772     /* clear the reordering using [NONE] */
   4773     ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
   4774     if (U_FAILURE(status)) {
   4775         log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
   4776         return;
   4777     }
   4778 
   4779     /* get the reordering again */
   4780     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   4781     if (retrievedReorderCodesLength != 0) {
   4782         log_err_status(status,
   4783                        "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
   4784                        retrievedReorderCodesLength);
   4785         return;
   4786     }
   4787 
   4788     /* test for error condition on duplicate reorder codes */
   4789     ucol_setReorderCodes(myCollation, duplicateReorderCodes, UPRV_LENGTHOF(duplicateReorderCodes), &status);
   4790     if (!U_FAILURE(status)) {
   4791         log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
   4792         return;
   4793     }
   4794 
   4795     status = U_ZERO_ERROR;
   4796     /* test for reorder codes after a reset code */
   4797     ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, UPRV_LENGTHOF(reorderCodesStartingWithDefault), &status);
   4798     if (!U_FAILURE(status)) {
   4799         log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
   4800         return;
   4801     }
   4802 
   4803     ucol_close(myCollation);
   4804 }
   4805 
   4806 /*
   4807  * Test reordering API.
   4808  */
   4809 static void TestReorderingAPIWithRuleCreatedCollator(void)
   4810 {
   4811     UErrorCode status = U_ZERO_ERROR;
   4812     UCollator  *myCollation;
   4813     UChar rules[90];
   4814     static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
   4815     static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   4816     static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
   4817     UCollationResult collResult;
   4818     int32_t retrievedReorderCodesLength;
   4819     int32_t retrievedReorderCodes[10];
   4820     static const UChar greekString[] = { 0x03b1 };
   4821     static const UChar punctuationString[] = { 0x203e };
   4822     static const UChar hanString[] = { 0x65E5, 0x672C };
   4823     int loopIndex;
   4824 
   4825     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   4826 
   4827     /* build collator from rules */
   4828     u_uastrcpy(rules, "[reorder Hani Grek]");
   4829     myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
   4830     if(U_FAILURE(status)) {
   4831         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   4832         return;
   4833     }
   4834 
   4835     /* get the reordering */
   4836     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
   4837     if (U_FAILURE(status)) {
   4838         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   4839         return;
   4840     }
   4841     if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
   4842         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
   4843         return;
   4844     }
   4845     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   4846         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
   4847             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   4848             return;
   4849         }
   4850     }
   4851     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), hanString, UPRV_LENGTHOF(hanString));
   4852     if (collResult != UCOL_GREATER) {
   4853         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   4854         return;
   4855     }
   4856 
   4857     /* set the reordering */
   4858     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
   4859     if (U_FAILURE(status)) {
   4860         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   4861         return;
   4862     }
   4863 
   4864     /* get the reordering */
   4865     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   4866     if (status != U_BUFFER_OVERFLOW_ERROR) {
   4867         log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
   4868         return;
   4869     }
   4870     status = U_ZERO_ERROR;
   4871     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
   4872         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
   4873         return;
   4874     }
   4875     /* now let's really get it */
   4876     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
   4877     if (U_FAILURE(status)) {
   4878         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   4879         return;
   4880     }
   4881     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
   4882         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
   4883         return;
   4884     }
   4885     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   4886         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   4887             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   4888             return;
   4889         }
   4890     }
   4891     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
   4892     if (collResult != UCOL_LESS) {
   4893         log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
   4894         return;
   4895     }
   4896 
   4897     /* clear the reordering */
   4898     ucol_setReorderCodes(myCollation, NULL, 0, &status);
   4899     if (U_FAILURE(status)) {
   4900         log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
   4901         return;
   4902     }
   4903 
   4904     /* get the reordering again */
   4905     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
   4906     if (retrievedReorderCodesLength != 0) {
   4907         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
   4908         return;
   4909     }
   4910 
   4911     collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
   4912     if (collResult != UCOL_GREATER) {
   4913         log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
   4914         return;
   4915     }
   4916 
   4917     /* reset the reordering */
   4918     ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
   4919     if (U_FAILURE(status)) {
   4920         log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
   4921         return;
   4922     }
   4923     retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
   4924     if (U_FAILURE(status)) {
   4925         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   4926         return;
   4927     }
   4928     if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
   4929         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
   4930         return;
   4931     }
   4932     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   4933         if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
   4934             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   4935             return;
   4936         }
   4937     }
   4938 
   4939     ucol_close(myCollation);
   4940 }
   4941 
   4942 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
   4943     int32_t i;
   4944     for (i = 0; i < length; ++i) {
   4945         if (expectedScript == scripts[i]) { return TRUE; }
   4946     }
   4947     return FALSE;
   4948 }
   4949 
   4950 static void TestEquivalentReorderingScripts(void) {
   4951     // Beginning with ICU 55, collation reordering moves single scripts
   4952     // rather than groups of scripts,
   4953     // except where scripts share a range and sort primary-equal.
   4954     UErrorCode status = U_ZERO_ERROR;
   4955     int32_t equivalentScripts[100];
   4956     int32_t length;
   4957     int i;
   4958     int32_t prevScript;
   4959     /* These scripts are expected to be equivalent. */
   4960     static const int32_t expectedScripts[] = {
   4961         USCRIPT_HIRAGANA,
   4962         USCRIPT_KATAKANA,
   4963         USCRIPT_KATAKANA_OR_HIRAGANA
   4964     };
   4965 
   4966     equivalentScripts[0] = 0;
   4967     length = ucol_getEquivalentReorderCodes(
   4968             USCRIPT_GOTHIC, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
   4969     if (U_FAILURE(status)) {
   4970         log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
   4971         return;
   4972     }
   4973     if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) {
   4974         log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
   4975                 "length expected 1, was = %d; expected [%d] was [%d]\n",
   4976                 length, USCRIPT_GOTHIC, equivalentScripts[0]);
   4977     }
   4978 
   4979     length = ucol_getEquivalentReorderCodes(
   4980             USCRIPT_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
   4981     if (U_FAILURE(status)) {
   4982         log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
   4983         return;
   4984     }
   4985     if (length != UPRV_LENGTHOF(expectedScripts)) {
   4986         log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
   4987                 "expected %d, was = %d\n",
   4988                 UPRV_LENGTHOF(expectedScripts), length);
   4989     }
   4990     prevScript = -1;
   4991     for (i = 0; i < length; ++i) {
   4992         int32_t script = equivalentScripts[i];
   4993         if (script <= prevScript) {
   4994             log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i);
   4995         }
   4996         prevScript = script;
   4997     }
   4998     for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
   4999         if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
   5000             log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
   5001                     expectedScripts[i]);
   5002         }
   5003     }
   5004 
   5005     length = ucol_getEquivalentReorderCodes(
   5006             USCRIPT_KATAKANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
   5007     if (U_FAILURE(status)) {
   5008         log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
   5009         return;
   5010     }
   5011     if (length != UPRV_LENGTHOF(expectedScripts)) {
   5012         log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
   5013                 "expected %d, was = %d\n",
   5014                 UPRV_LENGTHOF(expectedScripts), length);
   5015     }
   5016     for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
   5017         if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
   5018             log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
   5019                     expectedScripts[i]);
   5020         }
   5021     }
   5022 
   5023     length = ucol_getEquivalentReorderCodes(
   5024             USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
   5025     if (U_FAILURE(status) || length != UPRV_LENGTHOF(expectedScripts)) {
   5026         log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
   5027                 "expected %d, was = %d\n",
   5028                 UPRV_LENGTHOF(expectedScripts), length);
   5029     }
   5030 
   5031     length = ucol_getEquivalentReorderCodes(
   5032             USCRIPT_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
   5033     if (U_FAILURE(status) || length != 3) {
   5034         log_err("ERROR/Hani: retrieved equivalent script length wrong: "
   5035                 "expected 3, was = %d\n", length);
   5036     }
   5037     length = ucol_getEquivalentReorderCodes(
   5038             USCRIPT_SIMPLIFIED_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
   5039     if (U_FAILURE(status) || length != 3) {
   5040         log_err("ERROR/Hans: retrieved equivalent script length wrong: "
   5041                 "expected 3, was = %d\n", length);
   5042     }
   5043     length = ucol_getEquivalentReorderCodes(
   5044             USCRIPT_TRADITIONAL_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
   5045     if (U_FAILURE(status) || length != 3) {
   5046         log_err("ERROR/Hant: retrieved equivalent script length wrong: "
   5047                 "expected 3, was = %d\n", length);
   5048     }
   5049 
   5050     length = ucol_getEquivalentReorderCodes(
   5051             USCRIPT_MEROITIC_CURSIVE, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
   5052     if (U_FAILURE(status) || length != 2) {
   5053         log_err("ERROR/Merc: retrieved equivalent script length wrong: "
   5054                 "expected 2, was = %d\n", length);
   5055     }
   5056     length = ucol_getEquivalentReorderCodes(
   5057             USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
   5058     if (U_FAILURE(status) || length != 2) {
   5059         log_err("ERROR/Mero: retrieved equivalent script length wrong: "
   5060                 "expected 2, was = %d\n", length);
   5061     }
   5062 }
   5063 
   5064 static void TestReorderingAcrossCloning(void)
   5065 {
   5066     UErrorCode status = U_ZERO_ERROR;
   5067     UCollator  *myCollation;
   5068     int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
   5069     UCollator *clonedCollation;
   5070     int32_t retrievedReorderCodesLength;
   5071     int32_t retrievedReorderCodes[10];
   5072     int loopIndex;
   5073 
   5074     log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
   5075 
   5076     /* build collator tertiary */
   5077     myCollation = ucol_open("", &status);
   5078     ucol_setStrength(myCollation, UCOL_TERTIARY);
   5079     if(U_FAILURE(status)) {
   5080         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5081         return;
   5082     }
   5083 
   5084     /* set the reorderding */
   5085     ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
   5086     if (U_FAILURE(status)) {
   5087         log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
   5088         return;
   5089     }
   5090 
   5091     /* clone the collator */
   5092     clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
   5093     if (U_FAILURE(status)) {
   5094         log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
   5095         return;
   5096     }
   5097 
   5098     /* get the reordering */
   5099     retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
   5100     if (U_FAILURE(status)) {
   5101         log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
   5102         return;
   5103     }
   5104     if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
   5105         log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
   5106         return;
   5107     }
   5108     for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
   5109         if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
   5110             log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
   5111             return;
   5112         }
   5113     }
   5114 
   5115     /*uprv_free(buffer);*/
   5116     ucol_close(myCollation);
   5117     ucol_close(clonedCollation);
   5118 }
   5119 
   5120 /*
   5121  * Utility function to test one collation reordering test case set.
   5122  * @param testcases Array of test cases.
   5123  * @param n_testcases Size of the array testcases.
   5124  * @param reorderTokens Array of reordering codes.
   5125  * @param reorderTokensLen Size of the array reorderTokens.
   5126  */
   5127 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
   5128 {
   5129     uint32_t testCaseNum;
   5130     UErrorCode status = U_ZERO_ERROR;
   5131     UCollator  *myCollation;
   5132 
   5133     myCollation = ucol_open("", &status);
   5134     if (U_FAILURE(status)) {
   5135         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5136         return;
   5137     }
   5138     ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
   5139     if(U_FAILURE(status)) {
   5140         log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
   5141         return;
   5142     }
   5143 
   5144     for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
   5145         doTest(myCollation,
   5146             testCases[testCaseNum].source,
   5147             testCases[testCaseNum].target,
   5148             testCases[testCaseNum].result
   5149         );
   5150     }
   5151     ucol_close(myCollation);
   5152 }
   5153 
   5154 static void TestGreekFirstReorder(void)
   5155 {
   5156     const char* strRules[] = {
   5157         "[reorder Grek]"
   5158     };
   5159 
   5160     const int32_t apiRules[] = {
   5161         USCRIPT_GREEK
   5162     };
   5163 
   5164     const static OneTestCase privateUseCharacterStrings[] = {
   5165         { {0x0391}, {0x0391}, UCOL_EQUAL },
   5166         { {0x0041}, {0x0391}, UCOL_GREATER },
   5167         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
   5168         { {0x0060}, {0x0391}, UCOL_LESS },
   5169         { {0x0391}, {0xe2dc}, UCOL_LESS },
   5170         { {0x0391}, {0x0060}, UCOL_GREATER },
   5171     };
   5172 
   5173     /* Test rules creation */
   5174     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
   5175 
   5176     /* Test collation reordering API */
   5177     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
   5178 }
   5179 
   5180 static void TestGreekLastReorder(void)
   5181 {
   5182     const char* strRules[] = {
   5183         "[reorder Zzzz Grek]"
   5184     };
   5185 
   5186     const int32_t apiRules[] = {
   5187         USCRIPT_UNKNOWN, USCRIPT_GREEK
   5188     };
   5189 
   5190     const static OneTestCase privateUseCharacterStrings[] = {
   5191         { {0x0391}, {0x0391}, UCOL_EQUAL },
   5192         { {0x0041}, {0x0391}, UCOL_LESS },
   5193         { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
   5194         { {0x0060}, {0x0391}, UCOL_LESS },
   5195         { {0x0391}, {0xe2dc}, UCOL_GREATER },
   5196     };
   5197 
   5198     /* Test rules creation */
   5199     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
   5200 
   5201     /* Test collation reordering API */
   5202     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
   5203 }
   5204 
   5205 static void TestNonScriptReorder(void)
   5206 {
   5207     const char* strRules[] = {
   5208         "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
   5209     };
   5210 
   5211     const int32_t apiRules[] = {
   5212         USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
   5213         UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
   5214         UCOL_REORDER_CODE_CURRENCY
   5215     };
   5216 
   5217     const static OneTestCase privateUseCharacterStrings[] = {
   5218         { {0x0391}, {0x0041}, UCOL_LESS },
   5219         { {0x0041}, {0x0391}, UCOL_GREATER },
   5220         { {0x0060}, {0x0041}, UCOL_LESS },
   5221         { {0x0060}, {0x0391}, UCOL_GREATER },
   5222         { {0x0024}, {0x0041}, UCOL_GREATER },
   5223     };
   5224 
   5225     /* Test rules creation */
   5226     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
   5227 
   5228     /* Test collation reordering API */
   5229     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
   5230 }
   5231 
   5232 static void TestHaniReorder(void)
   5233 {
   5234     const char* strRules[] = {
   5235         "[reorder Hani]"
   5236     };
   5237     const int32_t apiRules[] = {
   5238         USCRIPT_HAN
   5239     };
   5240 
   5241     const static OneTestCase privateUseCharacterStrings[] = {
   5242         { {0x4e00}, {0x0041}, UCOL_LESS },
   5243         { {0x4e00}, {0x0060}, UCOL_GREATER },
   5244         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
   5245         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
   5246         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
   5247         { {0xfa27}, {0x0041}, UCOL_LESS },
   5248         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
   5249     };
   5250 
   5251     /* Test rules creation */
   5252     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
   5253 
   5254     /* Test collation reordering API */
   5255     doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
   5256 }
   5257 
   5258 static void TestHaniReorderWithOtherRules(void)
   5259 {
   5260     const char* strRules[] = {
   5261         "[reorder Hani] &b<a"
   5262     };
   5263     /*const int32_t apiRules[] = {
   5264         USCRIPT_HAN
   5265     };*/
   5266 
   5267     const static OneTestCase privateUseCharacterStrings[] = {
   5268         { {0x4e00}, {0x0041}, UCOL_LESS },
   5269         { {0x4e00}, {0x0060}, UCOL_GREATER },
   5270         { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
   5271         { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
   5272         { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
   5273         { {0xfa27}, {0x0041}, UCOL_LESS },
   5274         { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
   5275         { {0x0062}, {0x0061}, UCOL_LESS },
   5276     };
   5277 
   5278     /* Test rules creation */
   5279     doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
   5280 }
   5281 
   5282 static void TestMultipleReorder(void)
   5283 {
   5284     const char* strRules[] = {
   5285         "[reorder Grek Zzzz DIGIT Latn Hani]"
   5286     };
   5287 
   5288     const int32_t apiRules[] = {
   5289         USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
   5290     };
   5291 
   5292     const static OneTestCase collationTestCases[] = {
   5293         { {0x0391}, {0x0041}, UCOL_LESS},
   5294         { {0x0031}, {0x0041}, UCOL_LESS},
   5295         { {0x0041}, {0x4e00}, UCOL_LESS},
   5296     };
   5297 
   5298     /* Test rules creation */
   5299     doTestOneTestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), strRules, UPRV_LENGTHOF(strRules));
   5300 
   5301     /* Test collation reordering API */
   5302     doTestOneReorderingAPITestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), apiRules, UPRV_LENGTHOF(apiRules));
   5303 }
   5304 
   5305 /*
   5306  * Test that covers issue reported in ticket 8814
   5307  */
   5308 static void TestReorderWithNumericCollation(void)
   5309 {
   5310     UErrorCode status = U_ZERO_ERROR;
   5311     UCollator  *myCollation;
   5312     UCollator  *myReorderCollation;
   5313     int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
   5314     /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
   5315     UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
   5316     UChar fortyS[] = { 0x0053 };
   5317     UChar fortyThreeP[] = { 0x0050 };
   5318     uint8_t fortyS_sortKey[128];
   5319     int32_t fortyS_sortKey_Length;
   5320     uint8_t fortyThreeP_sortKey[128];
   5321     int32_t fortyThreeP_sortKey_Length;
   5322     uint8_t fortyS_sortKey_reorder[128];
   5323     int32_t fortyS_sortKey_reorder_Length;
   5324     uint8_t fortyThreeP_sortKey_reorder[128];
   5325     int32_t fortyThreeP_sortKey_reorder_Length;
   5326     UCollationResult collResult;
   5327     UCollationResult collResultReorder;
   5328 
   5329     log_verbose("Testing reordering with and without numeric collation\n");
   5330 
   5331     /* build collator tertiary with numeric */
   5332     myCollation = ucol_open("", &status);
   5333     /*
   5334     ucol_setStrength(myCollation, UCOL_TERTIARY);
   5335     */
   5336     ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   5337     if(U_FAILURE(status)) {
   5338         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5339         return;
   5340     }
   5341 
   5342     /* build collator tertiary with numeric and reordering */
   5343     myReorderCollation = ucol_open("", &status);
   5344     /*
   5345     ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
   5346     */
   5347     ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   5348     ucol_setReorderCodes(myReorderCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
   5349     if(U_FAILURE(status)) {
   5350         log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
   5351         return;
   5352     }
   5353 
   5354     fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey, 128);
   5355     fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey, 128);
   5356     fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey_reorder, 128);
   5357     fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
   5358 
   5359     if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
   5360         log_err_status(status, "ERROR: couldn't generate sort keys\n");
   5361         return;
   5362     }
   5363     collResult = ucol_strcoll(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
   5364     collResultReorder = ucol_strcoll(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
   5365     /*
   5366     fprintf(stderr, "\tcollResult = %x\n", collResult);
   5367     fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
   5368     fprintf(stderr, "\nfortyS\n");
   5369     for (i = 0; i < fortyS_sortKey_Length; i++) {
   5370         fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
   5371     }
   5372     fprintf(stderr, "\nfortyThreeP\n");
   5373     for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
   5374         fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
   5375     }
   5376     */
   5377     if (collResult != collResultReorder) {
   5378         log_err_status(status, "ERROR: collation results should have been the same.\n");
   5379         return;
   5380     }
   5381 
   5382     ucol_close(myCollation);
   5383     ucol_close(myReorderCollation);
   5384 }
   5385 
   5386 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
   5387 {
   5388   for (; *a == *b; ++a, ++b) {
   5389     if (*a == 0) {
   5390       return 0;
   5391     }
   5392   }
   5393   return (*a < *b ? -1 : 1);
   5394 }
   5395 
   5396 static void TestImportRulesDeWithPhonebook(void)
   5397 {
   5398   const char* normalRules[] = {
   5399     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
   5400     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
   5401     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
   5402   };
   5403   const OneTestCase normalTests[] = {
   5404     { {0x00e6}, {0x00c6}, UCOL_LESS},
   5405     { {0x00fc}, {0x00dc}, UCOL_GREATER},
   5406   };
   5407 
   5408   const char* importRules[] = {
   5409     "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
   5410     "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
   5411     "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
   5412   };
   5413   const OneTestCase importTests[] = {
   5414     { {0x00e6}, {0x00c6}, UCOL_LESS},
   5415     { {0x00fc}, {0x00dc}, UCOL_LESS},
   5416   };
   5417 
   5418   doTestOneTestCase(normalTests, UPRV_LENGTHOF(normalTests), normalRules, UPRV_LENGTHOF(normalRules));
   5419   doTestOneTestCase(importTests, UPRV_LENGTHOF(importTests), importRules, UPRV_LENGTHOF(importRules));
   5420 }
   5421 
   5422 #if 0
   5423 static void TestImportRulesFiWithEor(void)
   5424 {
   5425   /* DUCET. */
   5426   const char* defaultRules[] = {
   5427     "&a<b",                                    /* Dummy rule. */
   5428   };
   5429 
   5430   const OneTestCase defaultTests[] = {
   5431     { {0x0110}, {0x00F0}, UCOL_LESS},
   5432     { {0x00a3}, {0x00a5}, UCOL_LESS},
   5433     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
   5434   };
   5435 
   5436   /* European Ordering rules: ignore currency characters. */
   5437   const char* eorRules[] = {
   5438     "[import root-u-co-eor]",
   5439   };
   5440 
   5441   const OneTestCase eorTests[] = {
   5442     { {0x0110}, {0x00F0}, UCOL_LESS},
   5443     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
   5444     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
   5445   };
   5446 
   5447   const char* fiStdRules[] = {
   5448     "[import fi-u-co-standard]",
   5449   };
   5450 
   5451   const OneTestCase fiStdTests[] = {
   5452     { {0x0110}, {0x00F0}, UCOL_GREATER},
   5453     { {0x00a3}, {0x00a5}, UCOL_LESS},
   5454     { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
   5455   };
   5456 
   5457   /* Both European Ordering Rules and Fi Standard Rules. */
   5458   const char* eorFiStdRules[] = {
   5459     "[import root-u-co-eor][import fi-u-co-standard]",
   5460   };
   5461 
   5462   /* This is essentially same as the one before once fi.txt is updated with import. */
   5463   const char* fiEorRules[] = {
   5464     "[import fi-u-co-eor]",
   5465   };
   5466 
   5467   const OneTestCase fiEorTests[] = {
   5468     { {0x0110}, {0x00F0}, UCOL_GREATER},
   5469     { {0x00a3}, {0x00a5}, UCOL_EQUAL},
   5470     { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
   5471   };
   5472 
   5473   doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
   5474   doTestOneTestCase(eorTests, UPRV_LENGTHOF(eorTests), eorRules, UPRV_LENGTHOF(eorRules));
   5475   doTestOneTestCase(fiStdTests, UPRV_LENGTHOF(fiStdTests), fiStdRules, UPRV_LENGTHOF(fiStdRules));
   5476   doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), eorFiStdRules, UPRV_LENGTHOF(eorFiStdRules));
   5477 
   5478   log_knownIssue("8962", NULL);
   5479   /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
   5480         eor{
   5481             Sequence{
   5482                 "[import root-u-co-eor][import fi-u-co-standard]"
   5483             }
   5484             Version{"21.0"}
   5485         }
   5486   */
   5487   /* doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), fiEorRules, UPRV_LENGTHOF(fiEorRules)); */
   5488 
   5489 }
   5490 #endif
   5491 
   5492 #if 0
   5493 /*
   5494  * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
   5495  * the resource files are built with -includeUnihanColl option.
   5496  * TODO: Uncomment this function and make it work when unihan rules are built by default.
   5497  */
   5498 static void TestImportRulesCJKWithUnihan(void)
   5499 {
   5500   /* DUCET. */
   5501   const char* defaultRules[] = {
   5502     "&a<b",                                    /* Dummy rule. */
   5503   };
   5504 
   5505   const OneTestCase defaultTests[] = {
   5506     { {0x3402}, {0x4e1e}, UCOL_GREATER},
   5507   };
   5508 
   5509   /* European Ordering rules: ignore currency characters. */
   5510   const char* unihanRules[] = {
   5511     "[import ko-u-co-unihan]",
   5512   };
   5513 
   5514   const OneTestCase unihanTests[] = {
   5515     { {0x3402}, {0x4e1e}, UCOL_LESS},
   5516   };
   5517 
   5518   doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
   5519   doTestOneTestCase(unihanTests, UPRV_LENGTHOF(unihanTests), unihanRules, UPRV_LENGTHOF(unihanRules));
   5520 
   5521 }
   5522 #endif
   5523 
   5524 static void TestImport(void)
   5525 {
   5526     UCollator* vicoll;
   5527     UCollator* escoll;
   5528     UCollator* viescoll;
   5529     UCollator* importviescoll;
   5530     UParseError error;
   5531     UErrorCode status = U_ZERO_ERROR;
   5532     UChar* virules;
   5533     int32_t viruleslength;
   5534     UChar* esrules;
   5535     int32_t esruleslength;
   5536     UChar* viesrules;
   5537     int32_t viesruleslength;
   5538     char srules[500] = "[import vi][import es]";
   5539     UChar rules[500];
   5540     uint32_t length = 0;
   5541     int32_t itemCount;
   5542     int32_t i, k;
   5543     UChar32 start;
   5544     UChar32 end;
   5545     UChar str[500];
   5546     int32_t strLength;
   5547 
   5548     uint8_t sk1[500];
   5549     uint8_t sk2[500];
   5550 
   5551     UBool b;
   5552     USet* tailoredSet;
   5553     USet* importTailoredSet;
   5554 
   5555 
   5556     vicoll = ucol_open("vi", &status);
   5557     if(U_FAILURE(status)){
   5558         log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
   5559         return;
   5560     }
   5561 
   5562     virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
   5563     if(viruleslength == 0) {
   5564         log_data_err("missing vi tailoring rule string\n");
   5565         ucol_close(vicoll);
   5566         return;
   5567     }
   5568     escoll = ucol_open("es", &status);
   5569     esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
   5570     viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
   5571     viesrules[0] = 0;
   5572     u_strcat(viesrules, virules);
   5573     u_strcat(viesrules, esrules);
   5574     viesruleslength = viruleslength + esruleslength;
   5575     viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   5576 
   5577     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
   5578     length = u_unescape(srules, rules, 500);
   5579     importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
   5580     if(U_FAILURE(status)){
   5581         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5582         return;
   5583     }
   5584 
   5585     tailoredSet = ucol_getTailoredSet(viescoll, &status);
   5586     importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
   5587 
   5588     if(!uset_equals(tailoredSet, importTailoredSet)){
   5589         log_err("Tailored sets not equal");
   5590     }
   5591 
   5592     uset_close(importTailoredSet);
   5593 
   5594     itemCount = uset_getItemCount(tailoredSet);
   5595 
   5596     for( i = 0; i < itemCount; i++){
   5597         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
   5598         if(strLength < 2){
   5599             for (; start <= end; start++){
   5600                 k = 0;
   5601                 U16_APPEND(str, k, 500, start, b);
   5602                 (void)b;    /* Suppress set but not used warning. */
   5603                 ucol_getSortKey(viescoll, str, 1, sk1, 500);
   5604                 ucol_getSortKey(importviescoll, str, 1, sk2, 500);
   5605                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
   5606                     log_err("Sort key for %s not equal\n", str);
   5607                     break;
   5608                 }
   5609             }
   5610         }else{
   5611             ucol_getSortKey(viescoll, str, strLength, sk1, 500);
   5612             ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
   5613             if(compare_uint8_t_arrays(sk1, sk2) != 0){
   5614                 log_err("ZZSort key for %s not equal\n", str);
   5615                 break;
   5616             }
   5617 
   5618         }
   5619     }
   5620 
   5621     uset_close(tailoredSet);
   5622 
   5623     uprv_free(viesrules);
   5624 
   5625     ucol_close(vicoll);
   5626     ucol_close(escoll);
   5627     ucol_close(viescoll);
   5628     ucol_close(importviescoll);
   5629 }
   5630 
   5631 static void TestImportWithType(void)
   5632 {
   5633     UCollator* vicoll;
   5634     UCollator* decoll;
   5635     UCollator* videcoll;
   5636     UCollator* importvidecoll;
   5637     UParseError error;
   5638     UErrorCode status = U_ZERO_ERROR;
   5639     const UChar* virules;
   5640     int32_t viruleslength;
   5641     const UChar* derules;
   5642     int32_t deruleslength;
   5643     UChar* viderules;
   5644     int32_t videruleslength;
   5645     const char srules[500] = "[import vi][import de-u-co-phonebk]";
   5646     UChar rules[500];
   5647     uint32_t length = 0;
   5648     int32_t itemCount;
   5649     int32_t i, k;
   5650     UChar32 start;
   5651     UChar32 end;
   5652     UChar str[500];
   5653     int32_t strLength;
   5654 
   5655     uint8_t sk1[500];
   5656     uint8_t sk2[500];
   5657 
   5658     USet* tailoredSet;
   5659     USet* importTailoredSet;
   5660 
   5661     vicoll = ucol_open("vi", &status);
   5662     if(U_FAILURE(status)){
   5663         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5664         return;
   5665     }
   5666     virules = ucol_getRules(vicoll, &viruleslength);
   5667     if(viruleslength == 0) {
   5668         log_data_err("missing vi tailoring rule string\n");
   5669         ucol_close(vicoll);
   5670         return;
   5671     }
   5672     /* decoll = ucol_open("de@collation=phonebook", &status); */
   5673     decoll = ucol_open("de-u-co-phonebk", &status);
   5674     if(U_FAILURE(status)){
   5675         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5676         return;
   5677     }
   5678 
   5679 
   5680     derules = ucol_getRules(decoll, &deruleslength);
   5681     viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
   5682     viderules[0] = 0;
   5683     u_strcat(viderules, virules);
   5684     u_strcat(viderules, derules);
   5685     videruleslength = viruleslength + deruleslength;
   5686     videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
   5687 
   5688     /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
   5689     length = u_unescape(srules, rules, 500);
   5690     importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
   5691     if(U_FAILURE(status)){
   5692         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
   5693         return;
   5694     }
   5695 
   5696     tailoredSet = ucol_getTailoredSet(videcoll, &status);
   5697     importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
   5698 
   5699     if(!uset_equals(tailoredSet, importTailoredSet)){
   5700         log_err("Tailored sets not equal");
   5701     }
   5702 
   5703     uset_close(importTailoredSet);
   5704 
   5705     itemCount = uset_getItemCount(tailoredSet);
   5706 
   5707     for( i = 0; i < itemCount; i++){
   5708         strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
   5709         if(strLength < 2){
   5710             for (; start <= end; start++){
   5711                 k = 0;
   5712                 U16_APPEND_UNSAFE(str, k, start);
   5713                 ucol_getSortKey(videcoll, str, 1, sk1, 500);
   5714                 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
   5715                 if(compare_uint8_t_arrays(sk1, sk2) != 0){
   5716                     log_err("Sort key for %s not equal\n", str);
   5717                     break;
   5718                 }
   5719             }
   5720         }else{
   5721             ucol_getSortKey(videcoll, str, strLength, sk1, 500);
   5722             ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
   5723             if(compare_uint8_t_arrays(sk1, sk2) != 0){
   5724                 log_err("Sort key for %s not equal\n", str);
   5725                 break;
   5726             }
   5727 
   5728         }
   5729     }
   5730 
   5731     uset_close(tailoredSet);
   5732 
   5733     uprv_free(viderules);
   5734 
   5735     ucol_close(videcoll);
   5736     ucol_close(importvidecoll);
   5737     ucol_close(vicoll);
   5738     ucol_close(decoll);
   5739 }
   5740 
   5741 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
   5742 static const UChar longUpperStr1[]= { /* 155 chars */
   5743     0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
   5744     0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
   5745     0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
   5746     0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
   5747     0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
   5748     0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
   5749     0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
   5750     0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
   5751     0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
   5752     0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
   5753 };
   5754 
   5755 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
   5756 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
   5757     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   5758     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   5759     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   5760     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
   5761     0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
   5762 };
   5763 
   5764 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
   5765 static const UChar longUpperStr3[]= { /* 324 chars */
   5766     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5767     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5768     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5769     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5770     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5771     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5772     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5773     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5774     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5775     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5776     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
   5777     0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
   5778 };
   5779 
   5780 typedef struct {
   5781     const UChar * longUpperStrPtr;
   5782     int32_t       longUpperStrLen;
   5783 } LongUpperStrItem;
   5784 
   5785 /* String pointers must be in reverse collation order of the corresponding strings */
   5786 static const LongUpperStrItem longUpperStrItems[] = {
   5787     { longUpperStr1, UPRV_LENGTHOF(longUpperStr1) },
   5788     { longUpperStr2, UPRV_LENGTHOF(longUpperStr2) },
   5789     { longUpperStr3, UPRV_LENGTHOF(longUpperStr3) },
   5790     { NULL,          0                           }
   5791 };
   5792 
   5793 enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
   5794 
   5795 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
   5796 static void TestCaseLevelBufferOverflow(void)
   5797 {
   5798     UErrorCode status = U_ZERO_ERROR;
   5799     UCollator * ucol = ucol_open("root", &status);
   5800     if ( U_SUCCESS(status) ) {
   5801         ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
   5802         if ( U_SUCCESS(status) ) {
   5803             const LongUpperStrItem * itemPtr;
   5804             uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
   5805             for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
   5806                 int32_t sortKeyLen;
   5807                 if (itemPtr > longUpperStrItems) {
   5808                     uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
   5809                 }
   5810                 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
   5811                 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
   5812                     log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
   5813                     break;
   5814                 }
   5815                 if ( itemPtr > longUpperStrItems ) {
   5816                     int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
   5817                     if (compareResult >= 0) {
   5818                         log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
   5819                     }
   5820                 }
   5821             }
   5822         } else {
   5823             log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
   5824         }
   5825         ucol_close(ucol);
   5826     } else {
   5827         log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
   5828     }
   5829 }
   5830 
   5831 /* Test for #10595 */
   5832 static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
   5833 #define KEY_PART_SIZE 16
   5834 
   5835 static void TestNextSortKeyPartJaIdentical(void)
   5836 {
   5837     UErrorCode status = U_ZERO_ERROR;
   5838     UCollator *coll;
   5839     uint8_t keyPart[KEY_PART_SIZE];
   5840     UCharIterator iter;
   5841     uint32_t state[2] = {0, 0};
   5842     int32_t keyPartLen;
   5843 
   5844     coll = ucol_open("ja", &status);
   5845     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
   5846     if (U_FAILURE(status)) {
   5847         log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
   5848         return;
   5849     }
   5850 
   5851     uiter_setString(&iter, testJapaneseName, 5);
   5852     keyPartLen = KEY_PART_SIZE;
   5853     while (keyPartLen == KEY_PART_SIZE) {
   5854         keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
   5855         if (U_FAILURE(status)) {
   5856             log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
   5857             break;
   5858         }
   5859     }
   5860 
   5861     ucol_close(coll);
   5862 }
   5863 
   5864 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
   5865 
   5866 void addMiscCollTest(TestNode** root)
   5867 {
   5868     TEST(TestRuleOptions);
   5869     TEST(TestBeforePrefixFailure);
   5870     TEST(TestContractionClosure);
   5871     TEST(TestPrefixCompose);
   5872     TEST(TestStrCollIdenticalPrefix);
   5873     TEST(TestPrefix);
   5874     TEST(TestNewJapanese);
   5875     /*TEST(TestLimitations);*/
   5876     TEST(TestNonChars);
   5877     TEST(TestExtremeCompression);
   5878     TEST(TestSurrogates);
   5879     TEST(TestVariableTopSetting);
   5880     TEST(TestMaxVariable);
   5881     TEST(TestBocsuCoverage);
   5882     TEST(TestCyrillicTailoring);
   5883     TEST(TestCase);
   5884     TEST(IncompleteCntTest);
   5885     TEST(BlackBirdTest);
   5886     TEST(FunkyATest);
   5887     TEST(BillFairmanTest);
   5888     TEST(TestChMove);
   5889     TEST(TestImplicitTailoring);
   5890     TEST(TestFCDProblem);
   5891     TEST(TestEmptyRule);
   5892     /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
   5893     TEST(TestJ815);
   5894     TEST(TestUpperCaseFirst);
   5895     TEST(TestBefore);
   5896     TEST(TestHangulTailoring);
   5897     TEST(TestUCARules);
   5898     TEST(TestIncrementalNormalize);
   5899     TEST(TestComposeDecompose);
   5900     TEST(TestCompressOverlap);
   5901     TEST(TestContraction);
   5902     TEST(TestExpansion);
   5903     /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
   5904     /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
   5905     TEST(TestOptimize);
   5906     TEST(TestSuppressContractions);
   5907     TEST(Alexis2);
   5908     TEST(TestHebrewUCA);
   5909     TEST(TestPartialSortKeyTermination);
   5910     TEST(TestSettings);
   5911     TEST(TestEquals);
   5912     TEST(TestJ2726);
   5913     TEST(NullRule);
   5914     TEST(TestNumericCollation);
   5915     TEST(TestTibetanConformance);
   5916     TEST(TestPinyinProblem);
   5917     TEST(TestSeparateTrees);
   5918     TEST(TestBeforePinyin);
   5919     TEST(TestBeforeTightening);
   5920     /*TEST(TestMoreBefore);*/
   5921     TEST(TestTailorNULL);
   5922     TEST(TestUpperFirstQuaternary);
   5923     TEST(TestJ4960);
   5924     TEST(TestJ5223);
   5925     TEST(TestJ5232);
   5926     TEST(TestJ5367);
   5927     TEST(TestHiragana);
   5928     TEST(TestSortKeyConsistency);
   5929     TEST(TestVI5913);  /* VI, RO tailored rules */
   5930     TEST(TestCroatianSortKey);
   5931     TEST(TestTailor6179);
   5932     TEST(TestUCAPrecontext);
   5933     TEST(TestOutOfBuffer5468);
   5934     TEST(TestSameStrengthList);
   5935 
   5936     TEST(TestSameStrengthListQuoted);
   5937     TEST(TestSameStrengthListSupplemental);
   5938     TEST(TestSameStrengthListQwerty);
   5939     TEST(TestSameStrengthListQuotedQwerty);
   5940     TEST(TestSameStrengthListRanges);
   5941     TEST(TestSameStrengthListSupplementalRanges);
   5942     TEST(TestSpecialCharacters);
   5943     TEST(TestPrivateUseCharacters);
   5944     TEST(TestPrivateUseCharactersInList);
   5945     TEST(TestPrivateUseCharactersInRange);
   5946     TEST(TestInvalidListsAndRanges);
   5947     TEST(TestImportRulesDeWithPhonebook);
   5948     /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
   5949     /* TEST(TestImportRulesCJKWithUnihan); */
   5950     TEST(TestImport);
   5951     TEST(TestImportWithType);
   5952 
   5953     TEST(TestBeforeRuleWithScriptReordering);
   5954     TEST(TestNonLeadBytesDuringCollationReordering);
   5955     TEST(TestReorderingAPI);
   5956     TEST(TestReorderingAPIWithRuleCreatedCollator);
   5957     TEST(TestEquivalentReorderingScripts);
   5958     TEST(TestGreekFirstReorder);
   5959     TEST(TestGreekLastReorder);
   5960     TEST(TestNonScriptReorder);
   5961     TEST(TestHaniReorder);
   5962     TEST(TestHaniReorderWithOtherRules);
   5963     TEST(TestMultipleReorder);
   5964     TEST(TestReorderingAcrossCloning);
   5965     TEST(TestReorderWithNumericCollation);
   5966 
   5967     TEST(TestCaseLevelBufferOverflow);
   5968     TEST(TestNextSortKeyPartJaIdentical);
   5969 }
   5970 
   5971 #endif /* #if !UCONFIG_NO_COLLATION */
   5972