Home | History | Annotate | Download | only in cintltst
      1 
      2 /********************************************************************
      3  * COPYRIGHT:
      4  * Copyright (c) 2001-2015, International Business Machines Corporation and
      5  * others. All Rights Reserved.
      6  ********************************************************************/
      7 /*******************************************************************************
      8 *
      9 * File cmsccoll.C
     10 *
     11 *******************************************************************************/
     12 /**
     13  * These are the tests specific to ICU 1.8 and above, that I didn't know where
     14  * to fit.
     15  */
     16 
     17 #include <stdio.h>
     18 
     19 #include "unicode/utypes.h"
     20 
     21 #if !UCONFIG_NO_COLLATION
     22 
     23 #include "unicode/ucol.h"
     24 #include "unicode/ucoleitr.h"
     25 #include "unicode/uloc.h"
     26 #include "cintltst.h"
     27 #include "ccolltst.h"
     28 #include "callcoll.h"
     29 #include "unicode/ustring.h"
     30 #include "string.h"
     31 #include "ucol_imp.h"
     32 #include "cmemory.h"
     33 #include "cstring.h"
     34 #include "uassert.h"
     35 #include "unicode/parseerr.h"
     36 #include "unicode/ucnv.h"
     37 #include "unicode/ures.h"
     38 #include "unicode/uscript.h"
     39 #include "unicode/utf16.h"
     40 #include "uparse.h"
     41 #include "putilimp.h"
     42 
     43 
     44 #define LEN(a) (sizeof(a)/sizeof(a[0]))
     45 
     46 #define MAX_TOKEN_LEN 16
     47 
     48 typedef UCollationResult tst_strcoll(void *collator, const int object,
     49                         const UChar *source, const int sLen,
     50                         const UChar *target, const int tLen);
     51 
     52 
     53 
     54 const static char cnt1[][10] = {
     55 
     56   "AA",
     57   "AC",
     58   "AZ",
     59   "AQ",
     60   "AB",
     61   "ABZ",
     62   "ABQ",
     63   "Z",
     64   "ABC",
     65   "Q",
     66   "B"
     67 };
     68 
     69 const static char cnt2[][10] = {
     70   "DA",
     71   "DAD",
     72   "DAZ",
     73   "MAR",
     74   "Z",
     75   "DAVIS",
     76   "MARK",
     77   "DAV",
     78   "DAVI"
     79 };
     80 
     81 static void IncompleteCntTest(void)
     82 {
     83   UErrorCode status = U_ZERO_ERROR;
     84   UChar temp[90];
     85   UChar t1[90];
     86   UChar t2[90];
     87 
     88   UCollator *coll =  NULL;
     89   uint32_t i = 0, j = 0;
     90   uint32_t size = 0;
     91 
     92   u_uastrcpy(temp, " & Z < ABC < Q < B");
     93 
     94   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
     95 
     96   if(U_SUCCESS(status)) {
     97     size = sizeof(cnt1)/sizeof(cnt1[0]);
     98     for(i = 0; i < size-1; i++) {
     99       for(j = i+1; j < size; j++) {
    100         UCollationElements *iter;
    101         u_uastrcpy(t1, cnt1[i]);
    102         u_uastrcpy(t2, cnt1[j]);
    103         doTest(coll, t1, t2, UCOL_LESS);
    104         /* synwee : added collation element iterator test */
    105         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    106         if (U_FAILURE(status)) {
    107           log_err("Creation of iterator failed\n");
    108           break;
    109         }
    110         backAndForth(iter);
    111         ucol_closeElements(iter);
    112       }
    113     }
    114   }
    115 
    116   ucol_close(coll);
    117 
    118 
    119   u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
    120   coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
    121 
    122   if(U_SUCCESS(status)) {
    123     size = sizeof(cnt2)/sizeof(cnt2[0]);
    124     for(i = 0; i < size-1; i++) {
    125       for(j = i+1; j < size; j++) {
    126         UCollationElements *iter;
    127         u_uastrcpy(t1, cnt2[i]);
    128         u_uastrcpy(t2, cnt2[j]);
    129         doTest(coll, t1, t2, UCOL_LESS);
    130 
    131         /* synwee : added collation element iterator test */
    132         iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
    133         if (U_FAILURE(status)) {
    134           log_err("Creation of iterator failed\n");
    135           break;
    136         }
    137         backAndForth(iter);
    138         ucol_closeElements(iter);
    139       }
    140     }
    141   }
    142 
    143   ucol_close(coll);
    144 
    145 
    146 }
    147 
    148 const static char shifted[][20] = {
    149   "black bird",
    150   "black-bird",
    151   "blackbird",
    152   "black Bird",
    153   "black-Bird",
    154   "blackBird",
    155   "black birds",
    156   "black-birds",
    157   "blackbirds"
    158 };
    159 
    160 const static UCollationResult shiftedTert[] = {
    161   UCOL_EQUAL,
    162   UCOL_EQUAL,
    163   UCOL_EQUAL,
    164   UCOL_LESS,
    165   UCOL_EQUAL,
    166   UCOL_EQUAL,
    167   UCOL_LESS,
    168   UCOL_EQUAL,
    169   UCOL_EQUAL
    170 };
    171 
    172 const static char nonignorable[][20] = {
    173   "black bird",
    174   "black Bird",
    175   "black birds",
    176   "black-bird",
    177   "black-Bird",
    178   "black-birds",
    179   "blackbird",
    180   "blackBird",
    181   "blackbirds"
    182 };
    183 
    184 static void BlackBirdTest(void) {
    185   UErrorCode status = U_ZERO_ERROR;
    186   UChar t1[90];
    187   UChar t2[90];
    188 
    189   uint32_t i = 0, j = 0;
    190   uint32_t size = 0;
    191   UCollator *coll = ucol_open("en_US", &status);
    192 
    193   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
    194   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
    195 
    196   if(U_SUCCESS(status)) {
    197     size = sizeof(nonignorable)/sizeof(nonignorable[0]);
    198     for(i = 0; i < size-1; i++) {
    199       for(j = i+1; j < size; j++) {
    200         u_uastrcpy(t1, nonignorable[i]);
    201         u_uastrcpy(t2, nonignorable[j]);
    202         doTest(coll, t1, t2, UCOL_LESS);
    203       }
    204     }
    205   }
    206 
    207   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
    208   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
    209 
    210   if(U_SUCCESS(status)) {
    211     size = sizeof(shifted)/sizeof(shifted[0]);
    212     for(i = 0; i < size-1; i++) {
    213       for(j = i+1; j < size; j++) {
    214         u_uastrcpy(t1, shifted[i]);
    215         u_uastrcpy(t2, shifted[j]);
    216         doTest(coll, t1, t2, UCOL_LESS);
    217       }
    218     }
    219   }
    220 
    221   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
    222   if(U_SUCCESS(status)) {
    223     size = sizeof(shifted)/sizeof(shifted[0]);
    224     for(i = 1; i < size; i++) {
    225       u_uastrcpy(t1, shifted[i-1]);
    226       u_uastrcpy(t2, shifted[i]);
    227       doTest(coll, t1, t2, shiftedTert[i]);
    228     }
    229   }
    230 
    231   ucol_close(coll);
    232 }
    233 
    234 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
    235     {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
    236     {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
    237     {0x0041/*'A'*/, 0x0300, 0x0000},
    238     {0x00C0, 0x0301, 0x0000},
    239     /* this would work with forced normalization */
    240     {0x00C0, 0x0316, 0x0000}
    241 };
    242 
    243 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
    244     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    245     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
    246     {0x00C0, 0},
    247     {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
    248     /* this would work with forced normalization */
    249     {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
    250 };
    251 
    252 const static UCollationResult results[] = {
    253     UCOL_GREATER,
    254     UCOL_EQUAL,
    255     UCOL_EQUAL,
    256     UCOL_GREATER,
    257     UCOL_EQUAL
    258 };
    259 
    260 static void FunkyATest(void)
    261 {
    262 
    263     int32_t i;
    264     UErrorCode status = U_ZERO_ERROR;
    265     UCollator  *myCollation;
    266     myCollation = ucol_open("en_US", &status);
    267     if(U_FAILURE(status)){
    268         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    269         return;
    270     }
    271     log_verbose("Testing some A letters, for some reason\n");
    272     ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    273     ucol_setStrength(myCollation, UCOL_TERTIARY);
    274     for (i = 0; i < 4 ; i++)
    275     {
    276         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    277     }
    278     ucol_close(myCollation);
    279 }
    280 
    281 UColAttributeValue caseFirst[] = {
    282     UCOL_OFF,
    283     UCOL_LOWER_FIRST,
    284     UCOL_UPPER_FIRST
    285 };
    286 
    287 
    288 UColAttributeValue alternateHandling[] = {
    289     UCOL_NON_IGNORABLE,
    290     UCOL_SHIFTED
    291 };
    292 
    293 UColAttributeValue caseLevel[] = {
    294     UCOL_OFF,
    295     UCOL_ON
    296 };
    297 
    298 UColAttributeValue strengths[] = {
    299     UCOL_PRIMARY,
    300     UCOL_SECONDARY,
    301     UCOL_TERTIARY,
    302     UCOL_QUATERNARY,
    303     UCOL_IDENTICAL
    304 };
    305 
    306 #if 0
    307 static const char * strengthsC[] = {
    308     "UCOL_PRIMARY",
    309     "UCOL_SECONDARY",
    310     "UCOL_TERTIARY",
    311     "UCOL_QUATERNARY",
    312     "UCOL_IDENTICAL"
    313 };
    314 
    315 static const char * caseFirstC[] = {
    316     "UCOL_OFF",
    317     "UCOL_LOWER_FIRST",
    318     "UCOL_UPPER_FIRST"
    319 };
    320 
    321 
    322 static const char * alternateHandlingC[] = {
    323     "UCOL_NON_IGNORABLE",
    324     "UCOL_SHIFTED"
    325 };
    326 
    327 static const char * caseLevelC[] = {
    328     "UCOL_OFF",
    329     "UCOL_ON"
    330 };
    331 
    332 /* not used currently - does not test only prints */
    333 static void PrintMarkDavis(void)
    334 {
    335   UErrorCode status = U_ZERO_ERROR;
    336   UChar m[256];
    337   uint8_t sortkey[256];
    338   UCollator *coll = ucol_open("en_US", &status);
    339   uint32_t h,i,j,k, sortkeysize;
    340   uint32_t sizem = 0;
    341   char buffer[512];
    342   uint32_t len = 512;
    343 
    344   log_verbose("PrintMarkDavis");
    345 
    346   u_uastrcpy(m, "Mark Davis");
    347   sizem = u_strlen(m);
    348 
    349 
    350   m[1] = 0xe4;
    351 
    352   for(i = 0; i<sizem; i++) {
    353     fprintf(stderr, "\\u%04X ", m[i]);
    354   }
    355   fprintf(stderr, "\n");
    356 
    357   for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
    358     ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
    359     fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
    360 
    361     for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
    362       ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
    363       fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
    364 
    365       for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
    366         ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
    367         fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
    368 
    369         for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
    370           ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
    371           sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
    372           fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
    373           fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
    374         }
    375 
    376       }
    377 
    378     }
    379 
    380   }
    381 }
    382 #endif
    383 
    384 static void BillFairmanTest(void) {
    385 /*
    386 ** check for actual locale via ICU resource bundles
    387 **
    388 ** lp points to the original locale ("fr_FR_....")
    389 */
    390 
    391     UResourceBundle *lr,*cr;
    392     UErrorCode              lec = U_ZERO_ERROR;
    393     const char *lp = "fr_FR_you_ll_never_find_this_locale";
    394 
    395     log_verbose("BillFairmanTest\n");
    396 
    397     lr = ures_open(NULL,lp,&lec);
    398     if (lr) {
    399         cr = ures_getByKey(lr,"collations",0,&lec);
    400         if (cr) {
    401             lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
    402             if (lp) {
    403                 if (U_SUCCESS(lec)) {
    404                     if(strcmp(lp, "fr") != 0) {
    405                         log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
    406                     }
    407                 }
    408             }
    409             ures_close(cr);
    410         }
    411         ures_close(lr);
    412     }
    413 }
    414 
    415 const static char chTest[][20] = {
    416   "c",
    417   "C",
    418   "ca", "cb", "cx", "cy", "CZ",
    419   "c\\u030C", "C\\u030C",
    420   "h",
    421   "H",
    422   "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
    423   "ch", "cH", "Ch", "CH",
    424   "cha", "charly", "che", "chh", "chch", "chr",
    425   "i", "I", "iarly",
    426   "r", "R",
    427   "r\\u030C", "R\\u030C",
    428   "s",
    429   "S",
    430   "s\\u030C", "S\\u030C",
    431   "z", "Z",
    432   "z\\u030C", "Z\\u030C"
    433 };
    434 
    435 static void TestChMove(void) {
    436     UChar t1[256] = {0};
    437     UChar t2[256] = {0};
    438 
    439     uint32_t i = 0, j = 0;
    440     uint32_t size = 0;
    441     UErrorCode status = U_ZERO_ERROR;
    442 
    443     UCollator *coll = ucol_open("cs", &status);
    444 
    445     if(U_SUCCESS(status)) {
    446         size = sizeof(chTest)/sizeof(chTest[0]);
    447         for(i = 0; i < size-1; i++) {
    448             for(j = i+1; j < size; j++) {
    449                 u_unescape(chTest[i], t1, 256);
    450                 u_unescape(chTest[j], t2, 256);
    451                 doTest(coll, t1, t2, UCOL_LESS);
    452             }
    453         }
    454     }
    455     else {
    456         log_data_err("Can't open collator");
    457     }
    458     ucol_close(coll);
    459 }
    460 
    461 
    462 
    463 
    464 /*
    465 const static char impTest[][20] = {
    466   "\\u4e00",
    467     "a",
    468     "A",
    469     "b",
    470     "B",
    471     "\\u4e01"
    472 };
    473 */
    474 
    475 
    476 static void TestImplicitTailoring(void) {
    477   static const struct {
    478     const char *rules;
    479     const char *data[10];
    480     const uint32_t len;
    481   } tests[] = {
    482       {
    483         /* Tailor b and c before U+4E00. */
    484         "&[before 1]\\u4e00 < b < c "
    485         /* Now, before U+4E00 is c; put d and e after that. */
    486         "&[before 1]\\u4e00 < d < e",
    487         { "b", "c", "d", "e", "\\u4e00"}, 5 },
    488       { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
    489       { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
    490       { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
    491   };
    492 
    493   int32_t i = 0;
    494 
    495   for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
    496       genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
    497   }
    498 
    499 /*
    500   UChar t1[256] = {0};
    501   UChar t2[256] = {0};
    502 
    503   const char *rule = "&\\u4e00 < a <<< A < b <<< B";
    504 
    505   uint32_t i = 0, j = 0;
    506   uint32_t size = 0;
    507   uint32_t ruleLen = 0;
    508   UErrorCode status = U_ZERO_ERROR;
    509   UCollator *coll = NULL;
    510   ruleLen = u_unescape(rule, t1, 256);
    511 
    512   coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
    513 
    514   if(U_SUCCESS(status)) {
    515     size = sizeof(impTest)/sizeof(impTest[0]);
    516     for(i = 0; i < size-1; i++) {
    517       for(j = i+1; j < size; j++) {
    518         u_unescape(impTest[i], t1, 256);
    519         u_unescape(impTest[j], t2, 256);
    520         doTest(coll, t1, t2, UCOL_LESS);
    521       }
    522     }
    523   }
    524   else {
    525     log_err("Can't open collator");
    526   }
    527   ucol_close(coll);
    528   */
    529 }
    530 
    531 static void TestFCDProblem(void) {
    532   UChar t1[256] = {0};
    533   UChar t2[256] = {0};
    534 
    535   const char *s1 = "\\u0430\\u0306\\u0325";
    536   const char *s2 = "\\u04D1\\u0325";
    537 
    538   UErrorCode status = U_ZERO_ERROR;
    539   UCollator *coll = ucol_open("", &status);
    540   u_unescape(s1, t1, 256);
    541   u_unescape(s2, t2, 256);
    542 
    543   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
    544   doTest(coll, t1, t2, UCOL_EQUAL);
    545 
    546   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    547   doTest(coll, t1, t2, UCOL_EQUAL);
    548 
    549   ucol_close(coll);
    550 }
    551 
    552 /*
    553 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
    554 We're only using NFC/NFD in this test.
    555 */
    556 #define NORM_BUFFER_TEST_LEN 18
    557 typedef struct {
    558   UChar32 u;
    559   UChar NFC[NORM_BUFFER_TEST_LEN];
    560   UChar NFD[NORM_BUFFER_TEST_LEN];
    561 } tester;
    562 
    563 static void TestComposeDecompose(void) {
    564     /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
    565     static const UChar UNICODESET_STR[] = {
    566         0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
    567         0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
    568         0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
    569     };
    570     int32_t noOfLoc;
    571     int32_t i = 0, j = 0;
    572 
    573     UErrorCode status = U_ZERO_ERROR;
    574     const char *locName = NULL;
    575     uint32_t nfcSize;
    576     uint32_t nfdSize;
    577     tester **t;
    578     uint32_t noCases = 0;
    579     UCollator *coll = NULL;
    580     UChar32 u = 0;
    581     UChar comp[NORM_BUFFER_TEST_LEN];
    582     uint32_t len = 0;
    583     UCollationElements *iter;
    584     USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
    585     int32_t charsToTestSize;
    586 
    587     noOfLoc = uloc_countAvailable();
    588 
    589     coll = ucol_open("", &status);
    590     if (U_FAILURE(status)) {
    591         log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
    592         return;
    593     }
    594     charsToTestSize = uset_size(charsToTest);
    595     if (charsToTestSize <= 0) {
    596         log_err("Set was zero. Missing data?\n");
    597         return;
    598     }
    599     t = (tester **)malloc(charsToTestSize * sizeof(tester *));
    600     t[0] = (tester *)malloc(sizeof(tester));
    601     log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
    602 
    603     for(u = 0; u < charsToTestSize; u++) {
    604         UChar32 ch = uset_charAt(charsToTest, u);
    605         len = 0;
    606         U16_APPEND_UNSAFE(comp, len, ch);
    607         nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
    608         nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
    609 
    610         if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
    611           || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
    612             t[noCases]->u = ch;
    613             if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
    614                 u_strncpy(t[noCases]->NFC, comp, len);
    615                 t[noCases]->NFC[len] = 0;
    616             }
    617             noCases++;
    618             t[noCases] = (tester *)malloc(sizeof(tester));
    619             uprv_memset(t[noCases], 0, sizeof(tester));
    620         }
    621     }
    622     log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
    623     uset_close(charsToTest);
    624     charsToTest = NULL;
    625 
    626     for(u=0; u<(UChar32)noCases; u++) {
    627         if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
    628             log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
    629             doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
    630         }
    631     }
    632     /*
    633     for(u = 0; u < charsToTestSize; u++) {
    634       if(!(u&0xFFFF)) {
    635         log_verbose("%08X ", u);
    636       }
    637       uprv_memset(t[noCases], 0, sizeof(tester));
    638       t[noCases]->u = u;
    639       len = 0;
    640       U16_APPEND_UNSAFE(comp, len, u);
    641       comp[len] = 0;
    642       nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
    643       nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
    644       doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
    645       doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
    646     }
    647     */
    648 
    649     ucol_close(coll);
    650 
    651     log_verbose("Testing locales, number of cases = %i\n", noCases);
    652     for(i = 0; i<noOfLoc; i++) {
    653         status = U_ZERO_ERROR;
    654         locName = uloc_getAvailable(i);
    655         if(hasCollationElements(locName)) {
    656             char cName[256];
    657             UChar name[256];
    658             int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
    659 
    660             for(j = 0; j<nameSize; j++) {
    661                 cName[j] = (char)name[j];
    662             }
    663             cName[nameSize] = 0;
    664             log_verbose("\nTesting locale %s (%s)\n", locName, cName);
    665 
    666             coll = ucol_open(locName, &status);
    667             ucol_setStrength(coll, UCOL_IDENTICAL);
    668             iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
    669 
    670             for(u=0; u<(UChar32)noCases; u++) {
    671                 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
    672                     log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
    673                     doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
    674                     log_verbose("Testing NFC\n");
    675                     ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
    676                     backAndForth(iter);
    677                     log_verbose("Testing NFD\n");
    678                     ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
    679                     backAndForth(iter);
    680                 }
    681             }
    682             ucol_closeElements(iter);
    683             ucol_close(coll);
    684         }
    685     }
    686     for(u = 0; u <= (UChar32)noCases; u++) {
    687         free(t[u]);
    688     }
    689     free(t);
    690 }
    691 
    692 static void TestEmptyRule(void) {
    693   UErrorCode status = U_ZERO_ERROR;
    694   UChar rulez[] = { 0 };
    695   UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
    696 
    697   ucol_close(coll);
    698 }
    699 
    700 static void TestUCARules(void) {
    701   UErrorCode status = U_ZERO_ERROR;
    702   UChar b[256];
    703   UChar *rules = b;
    704   uint32_t ruleLen = 0;
    705   UCollator *UCAfromRules = NULL;
    706   UCollator *coll = ucol_open("", &status);
    707   if(status == U_FILE_ACCESS_ERROR) {
    708     log_data_err("Is your data around?\n");
    709     return;
    710   } else if(U_FAILURE(status)) {
    711     log_err("Error opening collator\n");
    712     return;
    713   }
    714   ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
    715 
    716   log_verbose("TestUCARules\n");
    717   if(ruleLen > 256) {
    718     rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
    719     ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
    720   }
    721   log_verbose("Rules length is %d\n", ruleLen);
    722   UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
    723   if(U_SUCCESS(status)) {
    724     ucol_close(UCAfromRules);
    725   } else {
    726     log_verbose("Unable to create a collator from UCARules!\n");
    727   }
    728 /*
    729   u_unescape(blah, b, 256);
    730   ucol_getSortKey(coll, b, 1, res, 256);
    731 */
    732   ucol_close(coll);
    733   if(rules != b) {
    734     free(rules);
    735   }
    736 }
    737 
    738 
    739 /* Pinyin tonal order */
    740 /*
    741     A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
    742           (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
    743     E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
    744     I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
    745     O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
    746     U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
    747       < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
    748 .. (\u00fc)
    749 
    750 However, in testing we got the following order:
    751     A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
    752           (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
    753     E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
    754 .. (\u0113)
    755     I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
    756     O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
    757     U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
    758 .. (\u01d8)
    759       < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
    760 */
    761 
    762 static void TestBefore(void) {
    763   const static char *data[] = {
    764       "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
    765       "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
    766       "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
    767       "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
    768       "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
    769       "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
    770   };
    771   genericRulesStarter(
    772     "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
    773     "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
    774     "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
    775     "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
    776     "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
    777     "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
    778     data, sizeof(data)/sizeof(data[0]));
    779 }
    780 
    781 #if 0
    782 /* superceded by TestBeforePinyin */
    783 static void TestJ784(void) {
    784   const static char *data[] = {
    785       "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
    786       "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
    787       "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
    788       "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
    789       "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
    790       "\\u00fc",
    791            "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
    792   };
    793   genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
    794 }
    795 #endif
    796 
    797 #if 0
    798 /* superceded by the changes to the lv locale */
    799 static void TestJ831(void) {
    800   const static char *data[] = {
    801     "I",
    802       "i",
    803       "Y",
    804       "y"
    805   };
    806   genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
    807 }
    808 #endif
    809 
    810 static void TestJ815(void) {
    811   const static char *data[] = {
    812     "aa",
    813       "Aa",
    814       "ab",
    815       "Ab",
    816       "ad",
    817       "Ad",
    818       "ae",
    819       "Ae",
    820       "\\u00e6",
    821       "\\u00c6",
    822       "af",
    823       "Af",
    824       "b",
    825       "B"
    826   };
    827   genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
    828   genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
    829 }
    830 
    831 
    832 static void TestCase(void)
    833 {
    834     const static UChar gRules[MAX_TOKEN_LEN] =
    835     /*" & 0 < 1,\u2461<a,A"*/
    836     { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
    837 
    838     const static UChar testCase[][MAX_TOKEN_LEN] =
    839     {
    840         /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
    841         /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
    842         /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
    843         /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
    844     };
    845 
    846     const static UCollationResult caseTestResults[][9] =
    847     {
    848         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
    849         { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
    850         { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
    851         { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
    852     };
    853 
    854     const static UColAttributeValue caseTestAttributes[][2] =
    855     {
    856         { UCOL_LOWER_FIRST, UCOL_OFF},
    857         { UCOL_UPPER_FIRST, UCOL_OFF},
    858         { UCOL_LOWER_FIRST, UCOL_ON},
    859         { UCOL_UPPER_FIRST, UCOL_ON}
    860     };
    861     int32_t i,j,k;
    862     UErrorCode status = U_ZERO_ERROR;
    863     UCollationElements *iter;
    864     UCollator  *myCollation;
    865     myCollation = ucol_open("en_US", &status);
    866 
    867     if(U_FAILURE(status)){
    868         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    869         return;
    870     }
    871     log_verbose("Testing different case settings\n");
    872     ucol_setStrength(myCollation, UCOL_TERTIARY);
    873 
    874     for(k = 0; k<4; k++) {
    875       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
    876       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
    877       log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
    878       for (i = 0; i < 3 ; i++) {
    879         for(j = i+1; j<4; j++) {
    880           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
    881         }
    882       }
    883     }
    884     ucol_close(myCollation);
    885 
    886     myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
    887     if(U_FAILURE(status)){
    888         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    889         return;
    890     }
    891     log_verbose("Testing different case settings with custom rules\n");
    892     ucol_setStrength(myCollation, UCOL_TERTIARY);
    893 
    894     for(k = 0; k<4; k++) {
    895       ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
    896       ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
    897       for (i = 0; i < 3 ; i++) {
    898         for(j = i+1; j<4; j++) {
    899           log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
    900           doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
    901           iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
    902           backAndForth(iter);
    903           ucol_closeElements(iter);
    904           iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
    905           backAndForth(iter);
    906           ucol_closeElements(iter);
    907         }
    908       }
    909     }
    910     ucol_close(myCollation);
    911     {
    912       const static char *lowerFirst[] = {
    913         "h",
    914         "H",
    915         "ch",
    916         "Ch",
    917         "CH",
    918         "cha",
    919         "chA",
    920         "Cha",
    921         "ChA",
    922         "CHa",
    923         "CHA",
    924         "i",
    925         "I"
    926       };
    927 
    928       const static char *upperFirst[] = {
    929         "H",
    930         "h",
    931         "CH",
    932         "Ch",
    933         "ch",
    934         "CHA",
    935         "CHa",
    936         "ChA",
    937         "Cha",
    938         "chA",
    939         "cha",
    940         "I",
    941         "i"
    942       };
    943       log_verbose("mixed case test\n");
    944       log_verbose("lower first, case level off\n");
    945       genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
    946       log_verbose("upper first, case level off\n");
    947       genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
    948       log_verbose("lower first, case level on\n");
    949       genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
    950       log_verbose("upper first, case level on\n");
    951       genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
    952     }
    953 
    954 }
    955 
    956 static void TestIncrementalNormalize(void) {
    957 
    958     /*UChar baseA     =0x61;*/
    959     UChar baseA     =0x41;
    960 /*    UChar baseB     = 0x42;*/
    961     static const UChar ccMix[]   = {0x316, 0x321, 0x300};
    962     /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
    963     /*
    964         0x316 is combining grave accent below, cc=220
    965         0x321 is combining palatalized hook below, cc=202
    966         0x300 is combining grave accent, cc=230
    967     */
    968 
    969 #define MAXSLEN 2000
    970     /*int          maxSLen   = 64000;*/
    971     int          sLen;
    972     int          i;
    973 
    974     UCollator        *coll;
    975     UErrorCode       status = U_ZERO_ERROR;
    976     UCollationResult result;
    977 
    978     int32_t myQ = getTestOption(QUICK_OPTION);
    979 
    980     if(getTestOption(QUICK_OPTION) < 0) {
    981         setTestOption(QUICK_OPTION, 1);
    982     }
    983 
    984     {
    985         /* Test 1.  Run very long unnormalized strings, to force overflow of*/
    986         /*          most buffers along the way.*/
    987         UChar            strA[MAXSLEN+1];
    988         UChar            strB[MAXSLEN+1];
    989 
    990         coll = ucol_open("en_US", &status);
    991         if(status == U_FILE_ACCESS_ERROR) {
    992           log_data_err("Is your data around?\n");
    993           return;
    994         } else if(U_FAILURE(status)) {
    995           log_err("Error opening collator\n");
    996           return;
    997         }
    998         ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    999 
   1000         /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
   1001         /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
   1002         /*for (sLen = 1000; sLen<1001; sLen++) {*/
   1003         for (sLen = 500; sLen<501; sLen++) {
   1004         /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
   1005             strA[0] = baseA;
   1006             strB[0] = baseA;
   1007             for (i=1; i<=sLen-1; i++) {
   1008                 strA[i] = ccMix[i % 3];
   1009                 strB[sLen-i] = ccMix[i % 3];
   1010             }
   1011             strA[sLen]   = 0;
   1012             strB[sLen]   = 0;
   1013 
   1014             ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
   1015             doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
   1016             ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
   1017             doTest(coll, strA, strB, UCOL_EQUAL);
   1018         }
   1019     }
   1020 
   1021     setTestOption(QUICK_OPTION, myQ);
   1022 
   1023 
   1024     /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
   1025     /*         of the string.  Checks a couple of edge cases.*/
   1026 
   1027     {
   1028         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
   1029         static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
   1030         ucol_setStrength(coll, UCOL_TERTIARY);
   1031         doTest(coll, strA, strB, UCOL_EQUAL);
   1032     }
   1033 
   1034     /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
   1035 
   1036     {
   1037       /* New UCA  3.1.1.
   1038        * test below used a code point from Desseret, which sorts differently
   1039        * than d800 dc00
   1040        */
   1041         /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
   1042         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
   1043         static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
   1044         ucol_setStrength(coll, UCOL_TERTIARY);
   1045         doTest(coll, strA, strB, UCOL_GREATER);
   1046     }
   1047 
   1048     /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
   1049 
   1050     {
   1051         static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
   1052         static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
   1053         char  sortKeyA[50];
   1054         char  sortKeyAz[50];
   1055         char  sortKeyB[50];
   1056         char  sortKeyBz[50];
   1057         int   r;
   1058 
   1059         /* there used to be -3 here. Hmmmm.... */
   1060         /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
   1061         result = ucol_strcoll(coll, strA, 3, strB, 3);
   1062         if (result != UCOL_GREATER) {
   1063             log_err("ERROR 1 in test 4\n");
   1064         }
   1065         result = ucol_strcoll(coll, strA, -1, strB, -1);
   1066         if (result != UCOL_EQUAL) {
   1067             log_err("ERROR 2 in test 4\n");
   1068         }
   1069 
   1070         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   1071         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   1072         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   1073         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   1074 
   1075         r = strcmp(sortKeyA, sortKeyAz);
   1076         if (r <= 0) {
   1077             log_err("Error 3 in test 4\n");
   1078         }
   1079         r = strcmp(sortKeyA, sortKeyB);
   1080         if (r <= 0) {
   1081             log_err("Error 4 in test 4\n");
   1082         }
   1083         r = strcmp(sortKeyAz, sortKeyBz);
   1084         if (r != 0) {
   1085             log_err("Error 5 in test 4\n");
   1086         }
   1087 
   1088         ucol_setStrength(coll, UCOL_IDENTICAL);
   1089         ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   1090         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   1091         ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   1092         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   1093 
   1094         r = strcmp(sortKeyA, sortKeyAz);
   1095         if (r <= 0) {
   1096             log_err("Error 6 in test 4\n");
   1097         }
   1098         r = strcmp(sortKeyA, sortKeyB);
   1099         if (r <= 0) {
   1100             log_err("Error 7 in test 4\n");
   1101         }
   1102         r = strcmp(sortKeyAz, sortKeyBz);
   1103         if (r != 0) {
   1104             log_err("Error 8 in test 4\n");
   1105         }
   1106         ucol_setStrength(coll, UCOL_TERTIARY);
   1107     }
   1108 
   1109 
   1110     /*  Test 5:  Null characters in non-normal source strings.*/
   1111 
   1112     {
   1113         static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
   1114         static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
   1115         char  sortKeyA[50];
   1116         char  sortKeyAz[50];
   1117         char  sortKeyB[50];
   1118         char  sortKeyBz[50];
   1119         int   r;
   1120 
   1121         result = ucol_strcoll(coll, strA, 6, strB, 6);
   1122         if (result != UCOL_GREATER) {
   1123             log_err("ERROR 1 in test 5\n");
   1124         }
   1125         result = ucol_strcoll(coll, strA, -1, strB, -1);
   1126         if (result != UCOL_EQUAL) {
   1127             log_err("ERROR 2 in test 5\n");
   1128         }
   1129 
   1130         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   1131         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   1132         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   1133         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   1134 
   1135         r = strcmp(sortKeyA, sortKeyAz);
   1136         if (r <= 0) {
   1137             log_err("Error 3 in test 5\n");
   1138         }
   1139         r = strcmp(sortKeyA, sortKeyB);
   1140         if (r <= 0) {
   1141             log_err("Error 4 in test 5\n");
   1142         }
   1143         r = strcmp(sortKeyAz, sortKeyBz);
   1144         if (r != 0) {
   1145             log_err("Error 5 in test 5\n");
   1146         }
   1147 
   1148         ucol_setStrength(coll, UCOL_IDENTICAL);
   1149         ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
   1150         ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
   1151         ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
   1152         ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
   1153 
   1154         r = strcmp(sortKeyA, sortKeyAz);
   1155         if (r <= 0) {
   1156             log_err("Error 6 in test 5\n");
   1157         }
   1158         r = strcmp(sortKeyA, sortKeyB);
   1159         if (r <= 0) {
   1160             log_err("Error 7 in test 5\n");
   1161         }
   1162         r = strcmp(sortKeyAz, sortKeyBz);
   1163         if (r != 0) {
   1164             log_err("Error 8 in test 5\n");
   1165         }
   1166         ucol_setStrength(coll, UCOL_TERTIARY);
   1167     }
   1168 
   1169 
   1170     /*  Test 6:  Null character as base of a non-normal combining sequence.*/
   1171 
   1172     {
   1173         static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
   1174         static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
   1175 
   1176         result = ucol_strcoll(coll, strA, 5, strB, 5);
   1177         if (result != UCOL_LESS) {
   1178             log_err("Error 1 in test 6\n");
   1179         }
   1180         result = ucol_strcoll(coll, strA, -1, strB, -1);
   1181         if (result != UCOL_EQUAL) {
   1182             log_err("Error 2 in test 6\n");
   1183         }
   1184     }
   1185 
   1186     ucol_close(coll);
   1187 }
   1188 
   1189 
   1190 
   1191 #if 0
   1192 static void TestGetCaseBit(void) {
   1193   static const char *caseBitData[] = {
   1194     "a", "A", "ch", "Ch", "CH",
   1195       "\\uFF9E", "\\u0009"
   1196   };
   1197 
   1198   static const uint8_t results[] = {
   1199     UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
   1200       UCOL_UPPER_CASE, UCOL_LOWER_CASE
   1201   };
   1202 
   1203   uint32_t i, blen = 0;
   1204   UChar b[256] = {0};
   1205   UErrorCode status = U_ZERO_ERROR;
   1206   UCollator *UCA = ucol_open("", &status);
   1207   uint8_t res = 0;
   1208 
   1209   for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
   1210     blen = u_unescape(caseBitData[i], b, 256);
   1211     res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
   1212     if(results[i] != res) {
   1213       log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
   1214     }
   1215   }
   1216 }
   1217 #endif
   1218 
   1219 static void TestHangulTailoring(void) {
   1220     static const char *koreanData[] = {
   1221         "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
   1222             "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
   1223             "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
   1224             "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
   1225             "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
   1226             "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
   1227     };
   1228 
   1229     const char *rules =
   1230         "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
   1231         "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
   1232         "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
   1233         "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
   1234         "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
   1235         "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
   1236 
   1237 
   1238   UErrorCode status = U_ZERO_ERROR;
   1239   UChar rlz[2048] = { 0 };
   1240   uint32_t rlen = u_unescape(rules, rlz, 2048);
   1241 
   1242   UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   1243   if(status == U_FILE_ACCESS_ERROR) {
   1244     log_data_err("Is your data around?\n");
   1245     return;
   1246   } else if(U_FAILURE(status)) {
   1247     log_err("Error opening collator\n");
   1248     return;
   1249   }
   1250 
   1251   log_verbose("Using start of korean rules\n");
   1252 
   1253   if(U_SUCCESS(status)) {
   1254     genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   1255   } else {
   1256     log_err("Unable to open collator with rules %s\n", rules);
   1257   }
   1258 
   1259   ucol_close(coll);
   1260 
   1261   log_verbose("Using ko__LOTUS locale\n");
   1262   genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
   1263 }
   1264 
   1265 /*
   1266  * The secondary/tertiary compression middle byte
   1267  * as used by the current implementation.
   1268  * Subject to change as the sort key compression changes.
   1269  * See class CollationKeys.
   1270  */
   1271 enum {
   1272     SEC_COMMON_MIDDLE = 0x25,  /* range 05..45 */
   1273     TER_ONLY_COMMON_MIDDLE = 0x65  /* range 05..C5 */
   1274 };
   1275 
   1276 static void TestCompressOverlap(void) {
   1277     UChar       secstr[150];
   1278     UChar       tertstr[150];
   1279     UErrorCode  status = U_ZERO_ERROR;
   1280     UCollator  *coll;
   1281     uint8_t     result[500];
   1282     uint32_t    resultlen;
   1283     int         count = 0;
   1284     uint8_t    *tempptr;
   1285 
   1286     coll = ucol_open("", &status);
   1287 
   1288     if (U_FAILURE(status)) {
   1289         log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
   1290         return;
   1291     }
   1292     while (count < 149) {
   1293         secstr[count] = 0x0020; /* [06, 05, 05] */
   1294         tertstr[count] = 0x0020;
   1295         count ++;
   1296     }
   1297 
   1298     /* top down compression ----------------------------------- */
   1299     secstr[count] = 0x0332; /* [, 87, 05] */
   1300     tertstr[count] = 0x3000; /* [06, 05, 07] */
   1301 
   1302     /* no compression secstr should have 150 secondary bytes, tertstr should
   1303     have 150 tertiary bytes.
   1304     with correct compression, secstr should have 6 secondary
   1305     bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
   1306     resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
   1307     (void)resultlen;    /* Suppress set but not used warning. */
   1308     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
   1309     while (*(tempptr + 1) != 1) {
   1310         /* the last secondary collation element is not checked since it is not
   1311         part of the compression */
   1312         if (*tempptr < SEC_COMMON_MIDDLE) {
   1313             log_err("Secondary top down compression overlapped\n");
   1314         }
   1315         tempptr ++;
   1316     }
   1317 
   1318     /* tertiary top/bottom/common for en_US is similar to the secondary
   1319     top/bottom/common */
   1320     resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
   1321     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
   1322     while (*(tempptr + 1) != 0) {
   1323         /* the last secondary collation element is not checked since it is not
   1324         part of the compression */
   1325         if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
   1326             log_err("Tertiary top down compression overlapped\n");
   1327         }
   1328         tempptr ++;
   1329     }
   1330 
   1331     /* bottom up compression ------------------------------------- */
   1332     secstr[count] = 0;
   1333     tertstr[count] = 0;
   1334     resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
   1335     tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
   1336     while (*(tempptr + 1) != 1) {
   1337         /* the last secondary collation element is not checked since it is not
   1338         part of the compression */
   1339         if (*tempptr > SEC_COMMON_MIDDLE) {
   1340             log_err("Secondary bottom up compression overlapped\n");
   1341         }
   1342         tempptr ++;
   1343     }
   1344 
   1345     /* tertiary top/bottom/common for en_US is similar to the secondary
   1346     top/bottom/common */
   1347     resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
   1348     tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
   1349     while (*(tempptr + 1) != 0) {
   1350         /* the last secondary collation element is not checked since it is not
   1351         part of the compression */
   1352         if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
   1353             log_err("Tertiary bottom up compression overlapped\n");
   1354         }
   1355         tempptr ++;
   1356     }
   1357 
   1358     ucol_close(coll);
   1359 }
   1360 
   1361 static void TestCyrillicTailoring(void) {
   1362   static const char *test[] = {
   1363     "\\u0410b",
   1364       "\\u0410\\u0306a",
   1365       "\\u04d0A"
   1366   };
   1367 
   1368     /* Russian overrides contractions, so this test is not valid anymore */
   1369     /*genericLocaleStarter("ru", test, 3);*/
   1370 
   1371     // Most of the following are commented out because UCA 8.0
   1372     // drops most of the Cyrillic contractions from the default order.
   1373     // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
   1374 
   1375     // genericLocaleStarter("root", test, 3);
   1376     // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
   1377     // genericRulesStarter("&Z < \\u0410", test, 3);
   1378     genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
   1379     genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
   1380     // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
   1381     // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
   1382 }
   1383 
   1384 static void TestSuppressContractions(void) {
   1385 
   1386   static const char *testNoCont2[] = {
   1387       "\\u0410\\u0302a",
   1388       "\\u0410\\u0306b",
   1389       "\\u0410c"
   1390   };
   1391   static const char *testNoCont[] = {
   1392       "a\\u0410",
   1393       "A\\u0410\\u0306",
   1394       "\\uFF21\\u0410\\u0302"
   1395   };
   1396 
   1397   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
   1398   genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
   1399 }
   1400 
   1401 static void TestContraction(void) {
   1402     const static char *testrules[] = {
   1403         "&A = AB / B",
   1404         "&A = A\\u0306/\\u0306",
   1405         "&c = ch / h"
   1406     };
   1407     const static UChar testdata[][2] = {
   1408         {0x0041 /* 'A' */, 0x0042 /* 'B' */},
   1409         {0x0041 /* 'A' */, 0x0306 /* combining breve */},
   1410         {0x0063 /* 'c' */, 0x0068 /* 'h' */}
   1411     };
   1412     const static UChar testdata2[][2] = {
   1413         {0x0063 /* 'c' */, 0x0067 /* 'g' */},
   1414         {0x0063 /* 'c' */, 0x0068 /* 'h' */},
   1415         {0x0063 /* 'c' */, 0x006C /* 'l' */}
   1416     };
   1417 #if 0
   1418     /*
   1419      * These pairs of rule strings are not guaranteed to yield the very same mappings.
   1420      * In fact, LDML 24 recommends an improved way of creating mappings
   1421      * which always yields different mappings for such pairs. See
   1422      * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
   1423      */
   1424     const static char *testrules3[] = {
   1425         "&z < xyz &xyzw << B",
   1426         "&z < xyz &xyz << B / w",
   1427         "&z < ch &achm << B",
   1428         "&z < ch &a << B / chm",
   1429         "&\\ud800\\udc00w << B",
   1430         "&\\ud800\\udc00 << B / w",
   1431         "&a\\ud800\\udc00m << B",
   1432         "&a << B / \\ud800\\udc00m",
   1433     };
   1434 #endif
   1435 
   1436     UErrorCode  status   = U_ZERO_ERROR;
   1437     UCollator  *coll;
   1438     UChar       rule[256] = {0};
   1439     uint32_t    rlen     = 0;
   1440     int         i;
   1441 
   1442     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
   1443         UCollationElements *iter1;
   1444         int j = 0;
   1445         log_verbose("Rule %s for testing\n", testrules[i]);
   1446         rlen = u_unescape(testrules[i], rule, 32);
   1447         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1448         if (U_FAILURE(status)) {
   1449             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   1450             return;
   1451         }
   1452         iter1 = ucol_openElements(coll, testdata[i], 2, &status);
   1453         if (U_FAILURE(status)) {
   1454             log_err("Collation iterator creation failed\n");
   1455             return;
   1456         }
   1457         while (j < 2) {
   1458             UCollationElements *iter2 = ucol_openElements(coll,
   1459                                                          &(testdata[i][j]),
   1460                                                          1, &status);
   1461             uint32_t ce;
   1462             if (U_FAILURE(status)) {
   1463                 log_err("Collation iterator creation failed\n");
   1464                 return;
   1465             }
   1466             ce = ucol_next(iter2, &status);
   1467             while (ce != UCOL_NULLORDER) {
   1468                 if ((uint32_t)ucol_next(iter1, &status) != ce) {
   1469                     log_err("Collation elements in contraction split does not match\n");
   1470                     return;
   1471                 }
   1472                 ce = ucol_next(iter2, &status);
   1473             }
   1474             j ++;
   1475             ucol_closeElements(iter2);
   1476         }
   1477         if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
   1478             log_err("Collation elements not exhausted\n");
   1479             return;
   1480         }
   1481         ucol_closeElements(iter1);
   1482         ucol_close(coll);
   1483     }
   1484 
   1485     rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
   1486     coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1487     if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
   1488         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   1489                 testdata2[0][0], testdata2[0][1], testdata2[1][0],
   1490                 testdata2[1][1]);
   1491         return;
   1492     }
   1493     if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
   1494         log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
   1495                 testdata2[1][0], testdata2[1][1], testdata2[2][0],
   1496                 testdata2[2][1]);
   1497         return;
   1498     }
   1499     ucol_close(coll);
   1500 #if 0  /* see above */
   1501     for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
   1502         log_verbose("testrules3 i==%d  \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
   1503         UCollator          *coll1,
   1504                            *coll2;
   1505         UCollationElements *iter1,
   1506                            *iter2;
   1507         UChar               ch = 0x0042 /* 'B' */;
   1508         uint32_t            ce;
   1509         rlen = u_unescape(testrules3[i], rule, 32);
   1510         coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1511         rlen = u_unescape(testrules3[i + 1], rule, 32);
   1512         coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1513         if (U_FAILURE(status)) {
   1514             log_err("Collator creation failed %s\n", testrules[i]);
   1515             return;
   1516         }
   1517         iter1 = ucol_openElements(coll1, &ch, 1, &status);
   1518         iter2 = ucol_openElements(coll2, &ch, 1, &status);
   1519         if (U_FAILURE(status)) {
   1520             log_err("Collation iterator creation failed\n");
   1521             return;
   1522         }
   1523         ce = ucol_next(iter1, &status);
   1524         if (U_FAILURE(status)) {
   1525             log_err("Retrieving ces failed\n");
   1526             return;
   1527         }
   1528         while (ce != UCOL_NULLORDER) {
   1529             uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
   1530             if (ce == ce2) {
   1531                 log_verbose("CEs match: %08x\n", ce);
   1532             } else {
   1533                 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
   1534                 return;
   1535             }
   1536             ce = ucol_next(iter1, &status);
   1537             if (U_FAILURE(status)) {
   1538                 log_err("Retrieving ces failed\n");
   1539                 return;
   1540             }
   1541         }
   1542         if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
   1543             log_err("CEs not exhausted\n");
   1544             return;
   1545         }
   1546         ucol_closeElements(iter1);
   1547         ucol_closeElements(iter2);
   1548         ucol_close(coll1);
   1549         ucol_close(coll2);
   1550     }
   1551 #endif
   1552 }
   1553 
   1554 static void TestExpansion(void) {
   1555     const static char *testrules[] = {
   1556 #if 0
   1557         /*
   1558          * This seems to have tested that M was not mapped to an expansion.
   1559          * I believe the old builder just did that because it computed the extension CEs
   1560          * at the very end, which was a bug.
   1561          * Among other problems, it violated the core tailoring principle
   1562          * by making an earlier rule depend on a later one.
   1563          * And, of course, if M did not get an expansion, then it was primary different from K,
   1564          * unlike what the rule &K<<M says.
   1565          */
   1566         "&J << K / B & K << M",
   1567 #endif
   1568         "&J << K / B << M"
   1569     };
   1570     const static UChar testdata[][3] = {
   1571         {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
   1572         {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
   1573         {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
   1574         {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
   1575         {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
   1576         {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
   1577     };
   1578 
   1579     UErrorCode  status   = U_ZERO_ERROR;
   1580     UCollator  *coll;
   1581     UChar       rule[256] = {0};
   1582     uint32_t    rlen     = 0;
   1583     int         i;
   1584 
   1585     for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
   1586         int j = 0;
   1587         log_verbose("Rule %s for testing\n", testrules[i]);
   1588         rlen = u_unescape(testrules[i], rule, 32);
   1589         coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
   1590         if (U_FAILURE(status)) {
   1591             log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
   1592             return;
   1593         }
   1594 
   1595         for (j = 0; j < 5; j ++) {
   1596             doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
   1597         }
   1598         ucol_close(coll);
   1599     }
   1600 }
   1601 
   1602 #if 0
   1603 /* this test tests the current limitations of the engine */
   1604 /* it always fail, so it is disabled by default */
   1605 static void TestLimitations(void) {
   1606   /* recursive expansions */
   1607   {
   1608     static const char *rule = "&a=b/c&d=c/e";
   1609     static const char *tlimit01[] = {"add","b","adf"};
   1610     static const char *tlimit02[] = {"aa","b","af"};
   1611     log_verbose("recursive expansions\n");
   1612     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
   1613     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
   1614   }
   1615   /* contractions spanning expansions */
   1616   {
   1617     static const char *rule = "&a<<<c/e&g<<<eh";
   1618     static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
   1619     static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
   1620     log_verbose("contractions spanning expansions\n");
   1621     genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
   1622     genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
   1623   }
   1624   /* normalization: nulls in contractions */
   1625   {
   1626     static const char *rule = "&a<<<\\u0000\\u0302";
   1627     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   1628     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   1629     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   1630     static const UColAttributeValue valOn[] = { UCOL_ON };
   1631     static const UColAttributeValue valOff[] = { UCOL_OFF };
   1632 
   1633     log_verbose("NULL in contractions\n");
   1634     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   1635     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   1636     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   1637     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   1638 
   1639   }
   1640   /* normalization: contractions spanning normalization */
   1641   {
   1642     static const char *rule = "&a<<<\\u0000\\u0302";
   1643     static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
   1644     static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
   1645     static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
   1646     static const UColAttributeValue valOn[] = { UCOL_ON };
   1647     static const UColAttributeValue valOff[] = { UCOL_OFF };
   1648 
   1649     log_verbose("contractions spanning normalization\n");
   1650     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
   1651     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
   1652     genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
   1653     genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
   1654 
   1655   }
   1656   /* variable top:  */
   1657   {
   1658     /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
   1659     static const char *rule = "&\\u2010<x<[variable top]=z";
   1660     /*static const char *rule3 = "&' '<x<[variable top]=z";*/
   1661     static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
   1662     static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
   1663     static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
   1664     static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
   1665     static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
   1666     static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
   1667 
   1668     log_verbose("variable top\n");
   1669     genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   1670     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   1671     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   1672     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
   1673     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
   1674 
   1675   }
   1676   /* case level */
   1677   {
   1678     static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
   1679     static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
   1680     static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
   1681     static const UColAttribute att[] = { UCOL_CASE_FIRST};
   1682     static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
   1683     /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
   1684     log_verbose("case level\n");
   1685     genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   1686     genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
   1687     /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
   1688     /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
   1689   }
   1690 
   1691 }
   1692 #endif
   1693 
   1694 static void TestBocsuCoverage(void) {
   1695   UErrorCode status = U_ZERO_ERROR;
   1696   const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
   1697   UChar       test[256] = {0};
   1698   uint32_t    tlen     = u_unescape(testString, test, 32);
   1699   uint8_t key[256]     = {0};
   1700   uint32_t klen         = 0;
   1701 
   1702   UCollator *coll = ucol_open("", &status);
   1703   if(U_SUCCESS(status)) {
   1704   ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
   1705 
   1706   klen = ucol_getSortKey(coll, test, tlen, key, 256);
   1707   (void)klen;    /* Suppress set but not used warning. */
   1708 
   1709   ucol_close(coll);
   1710   } else {
   1711     log_data_err("Couldn't open UCA\n");
   1712   }
   1713 }
   1714 
   1715 static void TestVariableTopSetting(void) {
   1716   UErrorCode status = U_ZERO_ERROR;
   1717   uint32_t varTopOriginal = 0, varTop1, varTop2;
   1718   UCollator *coll = ucol_open("", &status);
   1719   if(U_SUCCESS(status)) {
   1720 
   1721   static const UChar nul = 0;
   1722   static const UChar space = 0x20;
   1723   static const UChar dot = 0x2e;  /* punctuation */
   1724   static const UChar degree = 0xb0;  /* symbol */
   1725   static const UChar dollar = 0x24;  /* currency symbol */
   1726   static const UChar zero = 0x30;  /* digit */
   1727 
   1728   varTopOriginal = ucol_getVariableTop(coll, &status);
   1729   log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
   1730   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   1731 
   1732   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
   1733   varTop2 = ucol_getVariableTop(coll, &status);
   1734   log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
   1735   if(U_FAILURE(status) || varTop1 != varTop2 ||
   1736       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1737       ucol_equal(coll, &nul, 0, &dot, 1) ||
   1738       ucol_equal(coll, &nul, 0, &degree, 1) ||
   1739       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1740       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1741       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
   1742     log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
   1743   }
   1744 
   1745   varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
   1746   varTop2 = ucol_getVariableTop(coll, &status);
   1747   log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
   1748   if(U_FAILURE(status) || varTop1 != varTop2 ||
   1749       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1750       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1751       ucol_equal(coll, &nul, 0, &degree, 1) ||
   1752       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1753       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1754       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
   1755     log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
   1756   }
   1757 
   1758   varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
   1759   varTop2 = ucol_getVariableTop(coll, &status);
   1760   log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
   1761   if(U_FAILURE(status) || varTop1 != varTop2 ||
   1762       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1763       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1764       !ucol_equal(coll, &nul, 0, &degree, 1) ||
   1765       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1766       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1767       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
   1768     log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
   1769   }
   1770 
   1771   varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
   1772   varTop2 = ucol_getVariableTop(coll, &status);
   1773   log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
   1774   if(U_FAILURE(status) || varTop1 != varTop2 ||
   1775       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1776       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1777       !ucol_equal(coll, &nul, 0, &degree, 1) ||
   1778       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1779       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1780       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
   1781     log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
   1782   }
   1783 
   1784   log_verbose("Testing setting variable top to contractions\n");
   1785   {
   1786     UChar first[4] = { 0 };
   1787     first[0] = 0x0040;
   1788     first[1] = 0x0050;
   1789     first[2] = 0x0000;
   1790 
   1791     status = U_ZERO_ERROR;
   1792     ucol_setVariableTop(coll, first, -1, &status);
   1793 
   1794     if(U_SUCCESS(status)) {
   1795       log_err("Invalid contraction succeded in setting variable top!\n");
   1796     }
   1797 
   1798   }
   1799 
   1800   log_verbose("Test restoring variable top\n");
   1801 
   1802   status = U_ZERO_ERROR;
   1803   ucol_restoreVariableTop(coll, varTopOriginal, &status);
   1804   if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
   1805     log_err("Couldn't restore old variable top\n");
   1806   }
   1807 
   1808   log_verbose("Testing calling with error set\n");
   1809 
   1810   status = U_INTERNAL_PROGRAM_ERROR;
   1811   varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
   1812   varTop2 = ucol_getVariableTop(coll, &status);
   1813   ucol_restoreVariableTop(coll, varTop2, &status);
   1814   varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
   1815   varTop2 = ucol_getVariableTop(NULL, &status);
   1816   ucol_restoreVariableTop(NULL, varTop2, &status);
   1817   if(status != U_INTERNAL_PROGRAM_ERROR) {
   1818     log_err("Bad reaction to passed error!\n");
   1819   }
   1820   ucol_close(coll);
   1821   } else {
   1822     log_data_err("Couldn't open UCA collator\n");
   1823   }
   1824 }
   1825 
   1826 static void TestMaxVariable() {
   1827   UErrorCode status = U_ZERO_ERROR;
   1828   UColReorderCode oldMax, max;
   1829   UCollator *coll;
   1830 
   1831   static const UChar nul = 0;
   1832   static const UChar space = 0x20;
   1833   static const UChar dot = 0x2e;  /* punctuation */
   1834   static const UChar degree = 0xb0;  /* symbol */
   1835   static const UChar dollar = 0x24;  /* currency symbol */
   1836   static const UChar zero = 0x30;  /* digit */
   1837 
   1838   coll = ucol_open("", &status);
   1839   if(U_FAILURE(status)) {
   1840     log_data_err("Couldn't open root collator\n");
   1841     return;
   1842   }
   1843 
   1844   oldMax = ucol_getMaxVariable(coll);
   1845   log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
   1846   ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   1847 
   1848   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
   1849   max = ucol_getMaxVariable(coll);
   1850   log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
   1851   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
   1852       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1853       ucol_equal(coll, &nul, 0, &dot, 1) ||
   1854       ucol_equal(coll, &nul, 0, &degree, 1) ||
   1855       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1856       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1857       ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
   1858     log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
   1859   }
   1860 
   1861   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
   1862   max = ucol_getMaxVariable(coll);
   1863   log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
   1864   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
   1865       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1866       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1867       ucol_equal(coll, &nul, 0, &degree, 1) ||
   1868       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1869       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1870       ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
   1871     log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
   1872   }
   1873 
   1874   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
   1875   max = ucol_getMaxVariable(coll);
   1876   log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
   1877   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
   1878       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1879       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1880       !ucol_equal(coll, &nul, 0, &degree, 1) ||
   1881       ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1882       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1883       ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
   1884     log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
   1885   }
   1886 
   1887   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
   1888   max = ucol_getMaxVariable(coll);
   1889   log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
   1890   if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
   1891       !ucol_equal(coll, &nul, 0, &space, 1) ||
   1892       !ucol_equal(coll, &nul, 0, &dot, 1) ||
   1893       !ucol_equal(coll, &nul, 0, &degree, 1) ||
   1894       !ucol_equal(coll, &nul, 0, &dollar, 1) ||
   1895       ucol_equal(coll, &nul, 0, &zero, 1) ||
   1896       ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
   1897     log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
   1898   }
   1899 
   1900   log_verbose("Test restoring maxVariable\n");
   1901   status = U_ZERO_ERROR;
   1902   ucol_setMaxVariable(coll, oldMax, &status);
   1903   if(oldMax != ucol_getMaxVariable(coll)) {
   1904     log_err("Couldn't restore old maxVariable\n");
   1905   }
   1906 
   1907   log_verbose("Testing calling with error set\n");
   1908   status = U_INTERNAL_PROGRAM_ERROR;
   1909   ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
   1910   max = ucol_getMaxVariable(coll);
   1911   if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
   1912     log_err("Bad reaction to passed error!\n");
   1913   }
   1914   ucol_close(coll);
   1915 }
   1916 
   1917 static void TestNonChars(void) {
   1918   static const char *test[] = {
   1919       "\\u0000",  /* ignorable */
   1920       "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
   1921       "\\uFDD0", "\\uFDEF",
   1922       "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
   1923       "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
   1924       "\\U0003FFFE", "\\U0003FFFF",
   1925       "\\U0004FFFE", "\\U0004FFFF",
   1926       "\\U0005FFFE", "\\U0005FFFF",
   1927       "\\U0006FFFE", "\\U0006FFFF",
   1928       "\\U0007FFFE", "\\U0007FFFF",
   1929       "\\U0008FFFE", "\\U0008FFFF",
   1930       "\\U0009FFFE", "\\U0009FFFF",
   1931       "\\U000AFFFE", "\\U000AFFFF",
   1932       "\\U000BFFFE", "\\U000BFFFF",
   1933       "\\U000CFFFE", "\\U000CFFFF",
   1934       "\\U000DFFFE", "\\U000DFFFF",
   1935       "\\U000EFFFE", "\\U000EFFFF",
   1936       "\\U000FFFFE", "\\U000FFFFF",
   1937       "\\U0010FFFE", "\\U0010FFFF",
   1938       "\\uFFFF"  /* special character with maximum primary weight */
   1939   };
   1940   UErrorCode status = U_ZERO_ERROR;
   1941   UCollator *coll = ucol_open("en_US", &status);
   1942 
   1943   log_verbose("Test non characters\n");
   1944 
   1945   if(U_SUCCESS(status)) {
   1946     genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
   1947   } else {
   1948     log_err_status(status, "Unable to open collator\n");
   1949   }
   1950 
   1951   ucol_close(coll);
   1952 }
   1953 
   1954 static void TestExtremeCompression(void) {
   1955   static char *test[4];
   1956   int32_t j = 0, i = 0;
   1957 
   1958   for(i = 0; i<4; i++) {
   1959     test[i] = (char *)malloc(2048*sizeof(char));
   1960   }
   1961 
   1962   for(j = 20; j < 500; j++) {
   1963     for(i = 0; i<4; i++) {
   1964       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   1965       test[i][j-1] = (char)('a'+i);
   1966       test[i][j] = 0;
   1967     }
   1968     genericLocaleStarter("en_US", (const char **)test, 4);
   1969   }
   1970 
   1971 
   1972   for(i = 0; i<4; i++) {
   1973     free(test[i]);
   1974   }
   1975 }
   1976 
   1977 #if 0
   1978 static void TestExtremeCompression(void) {
   1979   static char *test[4];
   1980   int32_t j = 0, i = 0;
   1981   UErrorCode status = U_ZERO_ERROR;
   1982   UCollator *coll = ucol_open("en_US", status);
   1983   for(i = 0; i<4; i++) {
   1984     test[i] = (char *)malloc(2048*sizeof(char));
   1985   }
   1986   for(j = 10; j < 2048; j++) {
   1987     for(i = 0; i<4; i++) {
   1988       uprv_memset(test[i], 'a', (j-2)*sizeof(char));
   1989       test[i][j-1] = (char)('a'+i);
   1990       test[i][j] = 0;
   1991     }
   1992   }
   1993   genericLocaleStarter("en_US", (const char **)test, 4);
   1994 
   1995   for(j = 10; j < 2048; j++) {
   1996     for(i = 0; i<1; i++) {
   1997       uprv_memset(test[i], 'a', (j-1)*sizeof(char));
   1998       test[i][j] = 0;
   1999     }
   2000   }
   2001   for(i = 0; i<4; i++) {
   2002     free(test[i]);
   2003   }
   2004 }
   2005 #endif
   2006 
   2007 static void TestSurrogates(void) {
   2008   static const char *test[] = {
   2009     "z","\\ud900\\udc25",  "\\ud805\\udc50",
   2010        "\\ud800\\udc00y",  "\\ud800\\udc00r",
   2011        "\\ud800\\udc00f",  "\\ud800\\udc00",
   2012        "\\ud800\\udc00c", "\\ud800\\udc00b",
   2013        "\\ud800\\udc00fa", "\\ud800\\udc00fb",
   2014        "\\ud800\\udc00a",
   2015        "c", "b"
   2016   };
   2017 
   2018   static const char *rule =
   2019     "&z < \\ud900\\udc25   < \\ud805\\udc50"
   2020        "< \\ud800\\udc00y  < \\ud800\\udc00r"
   2021        "< \\ud800\\udc00f  << \\ud800\\udc00"
   2022        "< \\ud800\\udc00fa << \\ud800\\udc00fb"
   2023        "< \\ud800\\udc00a  < c < b" ;
   2024 
   2025   genericRulesStarter(rule, test, 14);
   2026 }
   2027 
   2028 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
   2029 static void TestPrefix(void) {
   2030   uint32_t i;
   2031 
   2032   static const struct {
   2033     const char *rules;
   2034     const char *data[50];
   2035     const uint32_t len;
   2036   } tests[] = {
   2037     { "&z <<< z|a",
   2038       {"zz", "za"}, 2 },
   2039 
   2040     { "&z <<< z|   a",
   2041       {"zz", "za"}, 2 },
   2042     { "[strength I]"
   2043       "&a=\\ud900\\udc25"
   2044       "&z<<<\\ud900\\udc25|a",
   2045       {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
   2046   };
   2047 
   2048 
   2049   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   2050     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   2051   }
   2052 }
   2053 
   2054 /* This test uses data suplied by Masashiko Maedera to test the implementation */
   2055 /* JIS X 4061 collation order implementation                                   */
   2056 static void TestNewJapanese(void) {
   2057 
   2058   static const char * const test1[] = {
   2059       "\\u30b7\\u30e3\\u30fc\\u30ec",
   2060       "\\u30b7\\u30e3\\u30a4",
   2061       "\\u30b7\\u30e4\\u30a3",
   2062       "\\u30b7\\u30e3\\u30ec",
   2063       "\\u3061\\u3087\\u3053",
   2064       "\\u3061\\u3088\\u3053",
   2065       "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
   2066       "\\u3066\\u30fc\\u305f",
   2067       "\\u30c6\\u30fc\\u30bf",
   2068       "\\u30c6\\u30a7\\u30bf",
   2069       "\\u3066\\u3048\\u305f",
   2070       "\\u3067\\u30fc\\u305f",
   2071       "\\u30c7\\u30fc\\u30bf",
   2072       "\\u30c7\\u30a7\\u30bf",
   2073       "\\u3067\\u3048\\u305f",
   2074       "\\u3066\\u30fc\\u305f\\u30fc",
   2075       "\\u30c6\\u30fc\\u30bf\\u30a1",
   2076       "\\u30c6\\u30a7\\u30bf\\u30fc",
   2077       "\\u3066\\u3047\\u305f\\u3041",
   2078       "\\u3066\\u3048\\u305f\\u30fc",
   2079       "\\u3067\\u30fc\\u305f\\u30fc",
   2080       "\\u30c7\\u30fc\\u30bf\\u30a1",
   2081       "\\u3067\\u30a7\\u305f\\u30a1",
   2082       "\\u30c7\\u3047\\u30bf\\u3041",
   2083       "\\u30c7\\u30a8\\u30bf\\u30a2",
   2084       "\\u3072\\u3086",
   2085       "\\u3073\\u3085\\u3042",
   2086       "\\u3074\\u3085\\u3042",
   2087       "\\u3073\\u3085\\u3042\\u30fc",
   2088       "\\u30d3\\u30e5\\u30a2\\u30fc",
   2089       "\\u3074\\u3085\\u3042\\u30fc",
   2090       "\\u30d4\\u30e5\\u30a2\\u30fc",
   2091       "\\u30d2\\u30e5\\u30a6",
   2092       "\\u30d2\\u30e6\\u30a6",
   2093       "\\u30d4\\u30e5\\u30a6\\u30a2",
   2094       "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
   2095       "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
   2096       "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
   2097       "\\u3072\\u3085\\u3093",
   2098       "\\u3074\\u3085\\u3093",
   2099       "\\u3075\\u30fc\\u308a",
   2100       "\\u30d5\\u30fc\\u30ea",
   2101       "\\u3075\\u3045\\u308a",
   2102       "\\u3075\\u30a5\\u308a",
   2103       "\\u3075\\u30a5\\u30ea",
   2104       "\\u30d5\\u30a6\\u30ea",
   2105       "\\u3076\\u30fc\\u308a",
   2106       "\\u30d6\\u30fc\\u30ea",
   2107       "\\u3076\\u3045\\u308a",
   2108       "\\u30d6\\u30a5\\u308a",
   2109       "\\u3077\\u3046\\u308a",
   2110       "\\u30d7\\u30a6\\u30ea",
   2111       "\\u3075\\u30fc\\u308a\\u30fc",
   2112       "\\u30d5\\u30a5\\u30ea\\u30fc",
   2113       "\\u3075\\u30a5\\u308a\\u30a3",
   2114       "\\u30d5\\u3045\\u308a\\u3043",
   2115       "\\u30d5\\u30a6\\u30ea\\u30fc",
   2116       "\\u3075\\u3046\\u308a\\u3043",
   2117       "\\u30d6\\u30a6\\u30ea\\u30a4",
   2118       "\\u3077\\u30fc\\u308a\\u30fc",
   2119       "\\u3077\\u30a5\\u308a\\u30a4",
   2120       "\\u3077\\u3046\\u308a\\u30fc",
   2121       "\\u30d7\\u30a6\\u30ea\\u30a4",
   2122       "\\u30d5\\u30fd",
   2123       "\\u3075\\u309e",
   2124       "\\u3076\\u309d",
   2125       "\\u3076\\u3075",
   2126       "\\u3076\\u30d5",
   2127       "\\u30d6\\u3075",
   2128       "\\u30d6\\u30d5",
   2129       "\\u3076\\u309e",
   2130       "\\u3076\\u3077",
   2131       "\\u30d6\\u3077",
   2132       "\\u3077\\u309d",
   2133       "\\u30d7\\u30fd",
   2134       "\\u3077\\u3075",
   2135 };
   2136 
   2137   static const char *test2[] = {
   2138     "\\u306f\\u309d", /* H\\u309d */
   2139     "\\u30cf\\u30fd", /* K\\u30fd */
   2140     "\\u306f\\u306f", /* HH */
   2141     "\\u306f\\u30cf", /* HK */
   2142     "\\u30cf\\u30cf", /* KK */
   2143     "\\u306f\\u309e", /* H\\u309e */
   2144     "\\u30cf\\u30fe", /* K\\u30fe */
   2145     "\\u306f\\u3070", /* HH\\u309b */
   2146     "\\u30cf\\u30d0", /* KK\\u309b */
   2147     "\\u306f\\u3071", /* HH\\u309c */
   2148     "\\u30cf\\u3071", /* KH\\u309c */
   2149     "\\u30cf\\u30d1", /* KK\\u309c */
   2150     "\\u3070\\u309d", /* H\\u309b\\u309d */
   2151     "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
   2152     "\\u3070\\u306f", /* H\\u309bH */
   2153     "\\u30d0\\u30cf", /* K\\u309bK */
   2154     "\\u3070\\u309e", /* H\\u309b\\u309e */
   2155     "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
   2156     "\\u3070\\u3070", /* H\\u309bH\\u309b */
   2157     "\\u30d0\\u3070", /* K\\u309bH\\u309b */
   2158     "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
   2159     "\\u3070\\u3071", /* H\\u309bH\\u309c */
   2160     "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
   2161     "\\u3071\\u309d", /* H\\u309c\\u309d */
   2162     "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
   2163     "\\u3071\\u306f", /* H\\u309cH */
   2164     "\\u30d1\\u30cf", /* K\\u309cK */
   2165     "\\u3071\\u3070", /* H\\u309cH\\u309b */
   2166     "\\u3071\\u30d0", /* H\\u309cK\\u309b */
   2167     "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
   2168     "\\u3071\\u3071", /* H\\u309cH\\u309c */
   2169     "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
   2170   };
   2171   /*
   2172   static const char *test3[] = {
   2173     "\\u221er\\u221e",
   2174     "\\u221eR#",
   2175     "\\u221et\\u221e",
   2176     "#r\\u221e",
   2177     "#R#",
   2178     "#t%",
   2179     "#T%",
   2180     "8t\\u221e",
   2181     "8T\\u221e",
   2182     "8t#",
   2183     "8T#",
   2184     "8t%",
   2185     "8T%",
   2186     "8t8",
   2187     "8T8",
   2188     "\\u03c9r\\u221e",
   2189     "\\u03a9R%",
   2190     "rr\\u221e",
   2191     "rR\\u221e",
   2192     "Rr\\u221e",
   2193     "RR\\u221e",
   2194     "RT%",
   2195     "rt8",
   2196     "tr\\u221e",
   2197     "tr8",
   2198     "TR8",
   2199     "tt8",
   2200     "\\u30b7\\u30e3\\u30fc\\u30ec",
   2201   };
   2202   */
   2203   static const UColAttribute att[] = { UCOL_STRENGTH };
   2204   static const UColAttributeValue val[] = { UCOL_QUATERNARY };
   2205 
   2206   static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
   2207   static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
   2208 
   2209   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
   2210   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
   2211   /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
   2212   genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
   2213   genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
   2214 }
   2215 
   2216 static void TestStrCollIdenticalPrefix(void) {
   2217   const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
   2218   const char* test[] = {
   2219     "ab\\ud9b0\\udc70",
   2220     "ab\\ud9b0\\udc71"
   2221   };
   2222   genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
   2223 }
   2224 /* Contractions should have all their canonically equivalent */
   2225 /* strings included */
   2226 static void TestContractionClosure(void) {
   2227   static const struct {
   2228     const char *rules;
   2229     const char *data[10];
   2230     const uint32_t len;
   2231   } tests[] = {
   2232     {   "&b=\\u00e4\\u00e4",
   2233       { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
   2234     {   "&b=\\u00C5",
   2235       { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
   2236   };
   2237   uint32_t i;
   2238 
   2239 
   2240   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   2241     genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
   2242   }
   2243 }
   2244 
   2245 /* This tests also fails*/
   2246 static void TestBeforePrefixFailure(void) {
   2247   static const struct {
   2248     const char *rules;
   2249     const char *data[10];
   2250     const uint32_t len;
   2251   } tests[] = {
   2252     { "&g <<< a"
   2253       "&[before 3]\\uff41 <<< x",
   2254       {"x", "\\uff41"}, 2 },
   2255     {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   2256         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   2257         "&[before 3]\\u30a7<<<\\u30a9",
   2258       {"\\u30a9", "\\u30a7"}, 2 },
   2259     {   "&[before 3]\\u30a7<<<\\u30a9"
   2260         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   2261         "&\\u30A8=\\u30A8=\\u3048=\\uff74",
   2262       {"\\u30a9", "\\u30a7"}, 2 },
   2263   };
   2264   uint32_t i;
   2265 
   2266 
   2267   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   2268     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   2269   }
   2270 
   2271 #if 0
   2272   const char* rule1 =
   2273         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   2274         "&\\u30A8=\\u30A8=\\u3048=\\uff74"
   2275         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
   2276   const char* rule2 =
   2277         "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
   2278         "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
   2279         "&\\u30A8=\\u30A8=\\u3048=\\uff74";
   2280   const char* test[] = {
   2281       "\\u30c6\\u30fc\\u30bf",
   2282       "\\u30c6\\u30a7\\u30bf",
   2283   };
   2284   genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
   2285   genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
   2286 /* this piece of code should be in some sort of verbose mode     */
   2287 /* it gets the collation elements for elements and prints them   */
   2288 /* This is useful when trying to see whether the problem is      */
   2289   {
   2290     UErrorCode status = U_ZERO_ERROR;
   2291     uint32_t i = 0;
   2292     UCollationElements *it = NULL;
   2293     uint32_t CE;
   2294     UChar string[256];
   2295     uint32_t uStringLen;
   2296     UCollator *coll = NULL;
   2297 
   2298     uStringLen = u_unescape(rule1, string, 256);
   2299 
   2300     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2301 
   2302     /*coll = ucol_open("ja_JP_JIS", &status);*/
   2303     it = ucol_openElements(coll, string, 0, &status);
   2304 
   2305     for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
   2306       log_verbose("%s\n", test[i]);
   2307       uStringLen = u_unescape(test[i], string, 256);
   2308       ucol_setText(it, string, uStringLen, &status);
   2309 
   2310       while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
   2311         log_verbose("%08X\n", CE);
   2312       }
   2313       log_verbose("\n");
   2314 
   2315     }
   2316 
   2317     ucol_closeElements(it);
   2318     ucol_close(coll);
   2319   }
   2320 #endif
   2321 }
   2322 
   2323 static void TestPrefixCompose(void) {
   2324   const char* rule1 =
   2325         "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
   2326   /*
   2327   const char* test[] = {
   2328       "\\u30c6\\u30fc\\u30bf",
   2329       "\\u30c6\\u30a7\\u30bf",
   2330   };
   2331   */
   2332   {
   2333     UErrorCode status = U_ZERO_ERROR;
   2334     /*uint32_t i = 0;*/
   2335     /*UCollationElements *it = NULL;*/
   2336 /*    uint32_t CE;*/
   2337     UChar string[256];
   2338     uint32_t uStringLen;
   2339     UCollator *coll = NULL;
   2340 
   2341     uStringLen = u_unescape(rule1, string, 256);
   2342 
   2343     coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2344     ucol_close(coll);
   2345   }
   2346 
   2347 
   2348 }
   2349 
   2350 /*
   2351 [last variable] last variable value
   2352 [last primary ignorable] largest CE for primary ignorable
   2353 [last secondary ignorable] largest CE for secondary ignorable
   2354 [last tertiary ignorable] largest CE for tertiary ignorable
   2355 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
   2356 */
   2357 
   2358 static void TestRuleOptions(void) {
   2359   /* values here are hardcoded and are correct for the current UCA
   2360    * when the UCA changes, one might be forced to change these
   2361    * values.
   2362    */
   2363 
   2364   /*
   2365    * These strings contain the last character before [variable top]
   2366    * and the first and second characters (by primary weights) after it.
   2367    * See FractionalUCA.txt. For example:
   2368       [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
   2369       [variable top = 0C FE]
   2370       [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
   2371      and
   2372       00B4; [0D 0C, 05, 05]
   2373    *
   2374    * Note: Starting with UCA 6.0, the [variable top] collation element
   2375    * is not the weight of any character or string,
   2376    * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
   2377    */
   2378 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
   2379 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
   2380 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
   2381 
   2382   /*
   2383    * This string has to match the character that has the [last regular] weight
   2384    * which changes with each UCA version.
   2385    * See the bottom of FractionalUCA.txt which says something like
   2386       [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
   2387    *
   2388    * Note: Starting with UCA 6.0, the [last regular] collation element
   2389    * is not the weight of any character or string,
   2390    * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
   2391    */
   2392 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
   2393 
   2394   static const struct {
   2395     const char *rules;
   2396     const char *data[10];
   2397     const uint32_t len;
   2398   } tests[] = {
   2399 #if 0
   2400     /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
   2401     /* - all befores here amount to zero */
   2402     { "&[before 3][first tertiary ignorable]<<<a",
   2403         { "\\u0000", "a"}, 2
   2404     }, /* you cannot go before first tertiary ignorable */
   2405 
   2406     { "&[before 3][last tertiary ignorable]<<<a",
   2407         { "\\u0000", "a"}, 2
   2408     }, /* you cannot go before last tertiary ignorable */
   2409 #endif
   2410     /*
   2411      * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
   2412      * and it *is* possible to "go before" that.
   2413      */
   2414     { "&[before 3][first secondary ignorable]<<<a",
   2415         { "\\u0000", "a"}, 2
   2416     },
   2417 
   2418     { "&[before 3][last secondary ignorable]<<<a",
   2419         { "\\u0000", "a"}, 2
   2420     },
   2421 
   2422     /* 'normal' befores */
   2423 
   2424     /*
   2425      * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
   2426      * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
   2427      * because there is no tailoring space before that boundary.
   2428      * Made the tests work by tailoring to a space instead.
   2429      */
   2430     { "&[before 3][first primary ignorable]<<<c<<<b &' '<a",  /* was &[first primary ignorable]<a */
   2431         {  "c", "b", "\\u0332", "a" }, 4
   2432     },
   2433 
   2434     /* we don't have a code point that corresponds to
   2435      * the last primary ignorable
   2436      */
   2437     { "&[before 3][last primary ignorable]<<<c<<<b &' '<a",  /* was &[last primary ignorable]<a */
   2438         {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
   2439     },
   2440 
   2441     { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
   2442         {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
   2443     },
   2444 
   2445     { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
   2446         { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
   2447     },
   2448 
   2449     { "&[first regular]<a"
   2450       "&[before 1][first regular]<b",
   2451       { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
   2452     },
   2453 
   2454     { "&[before 1][last regular]<b"
   2455       "&[last regular]<a",
   2456         { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
   2457     },
   2458 
   2459     { "&[before 1][first implicit]<b"
   2460       "&[first implicit]<a",
   2461         { "b", "\\u4e00", "a", "\\u4e01"}, 4
   2462     },
   2463 #if 0  /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
   2464     { "&[before 1][last implicit]<b"
   2465       "&[last implicit]<a",
   2466         { "b", "\\U0010FFFD", "a" }, 3
   2467     },
   2468 #endif
   2469     { "&[last variable]<z"
   2470       "&' '<x"  /* was &[last primary ignorable]<x, see above */
   2471       "&[last secondary ignorable]<<y"
   2472       "&[last tertiary ignorable]<<<w"
   2473       "&[top]<u",
   2474       {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
   2475     }
   2476 
   2477   };
   2478   uint32_t i;
   2479 
   2480   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   2481     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   2482   }
   2483 }
   2484 
   2485 
   2486 static void TestOptimize(void) {
   2487   /* this is not really a test - just trying out
   2488    * whether copying of UCA contents will fail
   2489    * Cannot really test, since the functionality
   2490    * remains the same.
   2491    */
   2492   static const struct {
   2493     const char *rules;
   2494     const char *data[10];
   2495     const uint32_t len;
   2496   } tests[] = {
   2497     /* - all befores here amount to zero */
   2498     { "[optimize [\\uAC00-\\uD7FF]]",
   2499     { "a", "b"}, 2}
   2500   };
   2501   uint32_t i;
   2502 
   2503   for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
   2504     genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
   2505   }
   2506 }
   2507 
   2508 /*
   2509 cycheng (at) ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
   2510 weiv    ucol_strcollIter?
   2511 cycheng (at) ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
   2512 weiv    these are the input strings?
   2513 cycheng (at) ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
   2514 weiv    will check - could be a problem with utf-8 iterator
   2515 cycheng (at) ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
   2516 weiv    hmmm
   2517 cycheng (at) ca.ibm.c... note that we have a standalone high surrogate
   2518 weiv    that doesn't sound right
   2519 cycheng (at) ca.ibm.c... we got the same inconsistent results on AIX and Win2000
   2520 weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
   2521 cycheng (at) ca.ibm.c... yes
   2522 weiv    and then do the comparison
   2523 cycheng (at) ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
   2524 weiv    utf-16 strings look like a little endian ones in the example you sent me
   2525 weiv    It could be a bug - let me try to test it out
   2526 cycheng (at) ca.ibm.c... ok
   2527 cycheng (at) ca.ibm.c... we can wait till the conf. call
   2528 cycheng (at) ca.ibm.c... next weke
   2529 weiv    that would be great
   2530 weiv    hmmm
   2531 weiv    I might be wrong
   2532 weiv    let me play with it some more
   2533 cycheng (at) ca.ibm.c... ok
   2534 cycheng (at) ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
   2535 cycheng (at) ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
   2536 cycheng (at) ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
   2537 weiv    ok
   2538 cycheng (at) ca.ibm.c... i ask sherman to send you more inconsistent data
   2539 weiv    thanks
   2540 cycheng (at) ca.ibm.c... the 4 strings we sent are just samples
   2541 */
   2542 #if 0
   2543 static void Alexis(void) {
   2544   UErrorCode status = U_ZERO_ERROR;
   2545   UCollator *coll = ucol_open("", &status);
   2546 
   2547 
   2548   const char utf16be[2][4] = {
   2549     { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
   2550     { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
   2551   };
   2552 
   2553   const char utf8[2][4] = {
   2554     { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
   2555     { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
   2556   };
   2557 
   2558   UCharIterator iterU161, iterU162;
   2559   UCharIterator iterU81, iterU82;
   2560 
   2561   UCollationResult resU16, resU8;
   2562 
   2563   uiter_setUTF16BE(&iterU161, utf16be[0], 4);
   2564   uiter_setUTF16BE(&iterU162, utf16be[1], 4);
   2565 
   2566   uiter_setUTF8(&iterU81, utf8[0], 4);
   2567   uiter_setUTF8(&iterU82, utf8[1], 4);
   2568 
   2569   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   2570 
   2571   resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
   2572   resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
   2573 
   2574 
   2575   if(resU16 != resU8) {
   2576     log_err("different results\n");
   2577   }
   2578 
   2579   ucol_close(coll);
   2580 }
   2581 #endif
   2582 
   2583 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
   2584 static void Alexis2(void) {
   2585   UErrorCode status = U_ZERO_ERROR;
   2586   UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   2587   char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   2588   char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
   2589   int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
   2590 
   2591   UConverter *conv = NULL;
   2592 
   2593   UCharIterator U16BEItS, U16BEItT;
   2594   UCharIterator U8ItS, U8ItT;
   2595 
   2596   UCollationResult resU16, resU16BE, resU8;
   2597 
   2598   static const char* const pairs[][2] = {
   2599     { "\\ud800\\u0021", "\\uFFFC\\u0062"},
   2600     { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
   2601     { "\\u0E40\\u0021", "\\u00A1\\u0021"},
   2602     { "\\u0E40\\u0021", "\\uFE57\\u0062"},
   2603     { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
   2604     { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
   2605     { "\\u0020", "\\u0020\\u0000"}
   2606 /*
   2607 5F20 (my result here)
   2608 5F204E008E3F
   2609 5F20 (your result here)
   2610 */
   2611   };
   2612 
   2613   int32_t i = 0;
   2614 
   2615   UCollator *coll = ucol_open("", &status);
   2616   if(status == U_FILE_ACCESS_ERROR) {
   2617     log_data_err("Is your data around?\n");
   2618     return;
   2619   } else if(U_FAILURE(status)) {
   2620     log_err("Error opening collator\n");
   2621     return;
   2622   }
   2623   ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   2624   conv = ucnv_open("UTF16BE", &status);
   2625   for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
   2626     U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   2627     U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
   2628 
   2629     resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
   2630 
   2631     log_verbose("Result of strcoll is %i\n", resU16);
   2632 
   2633     U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
   2634     U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
   2635     (void)U16BELenS;    /* Suppress set but not used warnings. */
   2636     (void)U16BELenT;
   2637 
   2638     /* use the original sizes, as the result from converter is in bytes */
   2639     uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
   2640     uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
   2641 
   2642     resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
   2643 
   2644     log_verbose("Result of U16BE is %i\n", resU16BE);
   2645 
   2646     if(resU16 != resU16BE) {
   2647       log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
   2648     }
   2649 
   2650     u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
   2651     u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
   2652 
   2653     uiter_setUTF8(&U8ItS, U8Source, U8LenS);
   2654     uiter_setUTF8(&U8ItT, U8Target, U8LenT);
   2655 
   2656     resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
   2657 
   2658     if(resU16 != resU8) {
   2659       log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
   2660     }
   2661 
   2662   }
   2663 
   2664   ucol_close(coll);
   2665   ucnv_close(conv);
   2666 }
   2667 
   2668 static void TestHebrewUCA(void) {
   2669   UErrorCode status = U_ZERO_ERROR;
   2670   static const char *first[] = {
   2671     "d790d6b8d79cd795d6bcd7a9",
   2672     "d790d79cd79ed7a7d799d799d7a1",
   2673     "d790d6b4d79ed795d6bcd7a9",
   2674   };
   2675 
   2676   char utf8String[3][256];
   2677   UChar utf16String[3][256];
   2678 
   2679   int32_t i = 0, j = 0;
   2680   int32_t sizeUTF8[3];
   2681   int32_t sizeUTF16[3];
   2682 
   2683   UCollator *coll = ucol_open("", &status);
   2684   if (U_FAILURE(status)) {
   2685       log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
   2686       return;
   2687   }
   2688   /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
   2689 
   2690   for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
   2691     sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
   2692     u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
   2693     log_verbose("%i: ");
   2694     for(j = 0; j < sizeUTF16[i]; j++) {
   2695       /*log_verbose("\\u%04X", utf16String[i][j]);*/
   2696       log_verbose("%04X", utf16String[i][j]);
   2697     }
   2698     log_verbose("\n");
   2699   }
   2700   for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
   2701     for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
   2702       doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
   2703     }
   2704   }
   2705 
   2706   ucol_close(coll);
   2707 
   2708 }
   2709 
   2710 static void TestPartialSortKeyTermination(void) {
   2711   static const char* cases[] = {
   2712     "\\u1234\\u1234\\udc00",
   2713     "\\udc00\\ud800\\ud800"
   2714   };
   2715 
   2716   int32_t i;
   2717 
   2718   UErrorCode status = U_ZERO_ERROR;
   2719 
   2720   UCollator *coll = ucol_open("", &status);
   2721 
   2722   UCharIterator iter;
   2723 
   2724   UChar currCase[256];
   2725   int32_t length = 0;
   2726   int32_t pKeyLen = 0;
   2727 
   2728   uint8_t key[256];
   2729 
   2730   for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
   2731     uint32_t state[2] = {0, 0};
   2732     length = u_unescape(cases[i], currCase, 256);
   2733     uiter_setString(&iter, currCase, length);
   2734     pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
   2735     (void)pKeyLen;   /* Suppress set but not used warning. */
   2736 
   2737     log_verbose("Done\n");
   2738 
   2739   }
   2740   ucol_close(coll);
   2741 }
   2742 
   2743 static void TestSettings(void) {
   2744   static const char* cases[] = {
   2745     "apple",
   2746       "Apple"
   2747   };
   2748 
   2749   static const char* locales[] = {
   2750     "",
   2751       "en"
   2752   };
   2753 
   2754   UErrorCode status = U_ZERO_ERROR;
   2755 
   2756   int32_t i = 0, j = 0;
   2757 
   2758   UChar source[256], target[256];
   2759   int32_t sLen = 0, tLen = 0;
   2760 
   2761   UCollator *collateObject = NULL;
   2762   for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
   2763     collateObject = ucol_open(locales[i], &status);
   2764     ucol_setStrength(collateObject, UCOL_PRIMARY);
   2765     ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
   2766     for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
   2767       sLen = u_unescape(cases[j-1], source, 256);
   2768       source[sLen] = 0;
   2769       tLen = u_unescape(cases[j], target, 256);
   2770       source[tLen] = 0;
   2771       doTest(collateObject, source, target, UCOL_EQUAL);
   2772     }
   2773     ucol_close(collateObject);
   2774   }
   2775 }
   2776 
   2777 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
   2778     UErrorCode status = U_ZERO_ERROR;
   2779     int32_t errorNo = 0;
   2780     const UChar *sourceRules = NULL;
   2781     int32_t sourceRulesLen = 0;
   2782     UParseError parseError;
   2783     UColAttributeValue french = UCOL_OFF;
   2784 
   2785     if(!ucol_equals(source, target)) {
   2786         log_err("Same collators, different address not equal\n");
   2787         errorNo++;
   2788     }
   2789     ucol_close(target);
   2790     if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
   2791         target = ucol_safeClone(source, NULL, NULL, &status);
   2792         if(U_FAILURE(status)) {
   2793             log_err("Error creating clone\n");
   2794             errorNo++;
   2795             return errorNo;
   2796         }
   2797         if(!ucol_equals(source, target)) {
   2798             log_err("Collator different from it's clone\n");
   2799             errorNo++;
   2800         }
   2801         french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
   2802         if(french == UCOL_ON) {
   2803             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
   2804         } else {
   2805             ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
   2806         }
   2807         if(U_FAILURE(status)) {
   2808             log_err("Error setting attributes\n");
   2809             errorNo++;
   2810             return errorNo;
   2811         }
   2812         if(ucol_equals(source, target)) {
   2813             log_err("Collators same even when options changed\n");
   2814             errorNo++;
   2815         }
   2816         ucol_close(target);
   2817 
   2818         sourceRules = ucol_getRules(source, &sourceRulesLen);
   2819         target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   2820         if(U_FAILURE(status)) {
   2821             log_err("Error instantiating target from rules - %s\n", u_errorName(status));
   2822             errorNo++;
   2823             return errorNo;
   2824         }
   2825         /* Note: The tailoring rule string is an optional data item. */
   2826         if(!ucol_equals(source, target) && sourceRulesLen != 0) {
   2827             log_err("Collator different from collator that was created from the same rules\n");
   2828             errorNo++;
   2829         }
   2830         ucol_close(target);
   2831     }
   2832     return errorNo;
   2833 }
   2834 
   2835 
   2836 static void TestEquals(void) {
   2837     /* ucol_equals is not currently a public API. There is a chance that it will become
   2838     * something like this.
   2839     */
   2840     /* test whether the two collators instantiated from the same locale are equal */
   2841     UErrorCode status = U_ZERO_ERROR;
   2842     UParseError parseError;
   2843     int32_t noOfLoc = uloc_countAvailable();
   2844     const char *locName = NULL;
   2845     UCollator *source = NULL, *target = NULL;
   2846     int32_t i = 0;
   2847 
   2848     const char* rules[] = {
   2849         "&l < lj <<< Lj <<< LJ",
   2850         "&n < nj <<< Nj <<< NJ",
   2851         "&ae <<< \\u00e4",
   2852         "&AE <<< \\u00c4"
   2853     };
   2854     /*
   2855     const char* badRules[] = {
   2856     "&l <<< Lj",
   2857     "&n < nj <<< nJ <<< NJ",
   2858     "&a <<< \\u00e4",
   2859     "&AE <<< \\u00c4 <<< x"
   2860     };
   2861     */
   2862 
   2863     UChar sourceRules[1024], targetRules[1024];
   2864     int32_t sourceRulesSize = 0, targetRulesSize = 0;
   2865     int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
   2866 
   2867     for(i = 0; i < rulesSize; i++) {
   2868         sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
   2869         targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
   2870     }
   2871 
   2872     source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   2873     if(status == U_FILE_ACCESS_ERROR) {
   2874         log_data_err("Is your data around?\n");
   2875         return;
   2876     } else if(U_FAILURE(status)) {
   2877         log_err("Error opening collator\n");
   2878         return;
   2879     }
   2880     target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
   2881     if(!ucol_equals(source, target)) {
   2882         log_err("Equivalent collators not equal!\n");
   2883     }
   2884     ucol_close(source);
   2885     ucol_close(target);
   2886 
   2887     source = ucol_open("root", &status);
   2888     target = ucol_open("root", &status);
   2889     log_verbose("Testing root\n");
   2890     if(!ucol_equals(source, source)) {
   2891         log_err("Same collator not equal\n");
   2892     }
   2893     if(TestEqualsForCollator("root", source, target)) {
   2894         log_err("Errors for root\n");
   2895     }
   2896     ucol_close(source);
   2897 
   2898     for(i = 0; i<noOfLoc; i++) {
   2899         status = U_ZERO_ERROR;
   2900         locName = uloc_getAvailable(i);
   2901         /*if(hasCollationElements(locName)) {*/
   2902         log_verbose("Testing equality for locale %s\n", locName);
   2903         source = ucol_open(locName, &status);
   2904         target = ucol_open(locName, &status);
   2905         if (U_FAILURE(status)) {
   2906             log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
   2907             continue;
   2908         }
   2909         if(TestEqualsForCollator(locName, source, target)) {
   2910             log_err("Errors for locale %s\n", locName);
   2911         }
   2912         ucol_close(source);
   2913         /*}*/
   2914     }
   2915 }
   2916 
   2917 static void TestJ2726(void) {
   2918     UChar a[2] = { 0x61, 0x00 }; /*"a"*/
   2919     UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
   2920     UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
   2921     UErrorCode status = U_ZERO_ERROR;
   2922     UCollator *coll = ucol_open("en", &status);
   2923     ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
   2924     ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
   2925     doTest(coll, a, aSpace, UCOL_EQUAL);
   2926     doTest(coll, aSpace, a, UCOL_EQUAL);
   2927     doTest(coll, a, spaceA, UCOL_EQUAL);
   2928     doTest(coll, spaceA, a, UCOL_EQUAL);
   2929     doTest(coll, spaceA, aSpace, UCOL_EQUAL);
   2930     doTest(coll, aSpace, spaceA, UCOL_EQUAL);
   2931     ucol_close(coll);
   2932 }
   2933 
   2934 static void NullRule(void) {
   2935     UChar r[3] = {0};
   2936     UErrorCode status = U_ZERO_ERROR;
   2937     UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2938     if(U_SUCCESS(status)) {
   2939         log_err("This should have been an error!\n");
   2940         ucol_close(coll);
   2941     } else {
   2942         status = U_ZERO_ERROR;
   2943     }
   2944     coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
   2945     if(U_FAILURE(status)) {
   2946         log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
   2947     } else {
   2948         ucol_close(coll);
   2949     }
   2950 }
   2951 
   2952 /**
   2953  * Test for CollationElementIterator previous and next for the whole set of
   2954  * unicode characters with normalization on.
   2955  */
   2956 static void TestNumericCollation(void)
   2957 {
   2958     UErrorCode status = U_ZERO_ERROR;
   2959 
   2960     const static char *basicTestStrings[]={
   2961     "hello1",
   2962     "hello2",
   2963     "hello2002",
   2964     "hello2003",
   2965     "hello123456",
   2966     "hello1234567",
   2967     "hello10000000",
   2968     "hello100000000",
   2969     "hello1000000000",
   2970     "hello10000000000",
   2971     };
   2972 
   2973     const static char *preZeroTestStrings[]={
   2974     "avery10000",
   2975     "avery010000",
   2976     "avery0010000",
   2977     "avery00010000",
   2978     "avery000010000",
   2979     "avery0000010000",
   2980     "avery00000010000",
   2981     "avery000000010000",
   2982     };
   2983 
   2984     const static char *thirtyTwoBitNumericStrings[]={
   2985     "avery42949672960",
   2986     "avery42949672961",
   2987     "avery42949672962",
   2988     "avery429496729610"
   2989     };
   2990 
   2991      const static char *longNumericStrings[]={
   2992      /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
   2993         In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
   2994         are treated as multiple collation elements. */
   2995     "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
   2996     "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
   2997     "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
   2998     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
   2999     "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
   3000     "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
   3001     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
   3002     "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
   3003     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
   3004     "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
   3005     };
   3006 
   3007     const static char *supplementaryDigits[] = {
   3008       "\\uD835\\uDFCE", /* 0 */
   3009       "\\uD835\\uDFCF", /* 1 */
   3010       "\\uD835\\uDFD0", /* 2 */
   3011       "\\uD835\\uDFD1", /* 3 */
   3012       "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
   3013       "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
   3014       "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
   3015       "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
   3016       "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
   3017       "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
   3018     };
   3019 
   3020     const static char *foreignDigits[] = {
   3021       "\\u0661",
   3022         "\\u0662",
   3023         "\\u0663",
   3024       "\\u0661\\u0660",
   3025       "\\u0661\\u0662",
   3026       "\\u0661\\u0663",
   3027       "\\u0662\\u0660",
   3028       "\\u0662\\u0662",
   3029       "\\u0662\\u0663",
   3030       "\\u0663\\u0660",
   3031       "\\u0663\\u0662",
   3032       "\\u0663\\u0663"
   3033     };
   3034 
   3035     const static char *evenZeroes[] = {
   3036       "2000",
   3037       "2001",
   3038         "2002",
   3039         "2003"
   3040     };
   3041 
   3042     UColAttribute att = UCOL_NUMERIC_COLLATION;
   3043     UColAttributeValue val = UCOL_ON;
   3044 
   3045     /* Open our collator. */
   3046     UCollator* coll = ucol_open("root", &status);
   3047     if (U_FAILURE(status)){
   3048         log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
   3049               myErrorName(status));
   3050         return;
   3051     }
   3052     genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
   3053     genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
   3054     genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
   3055     genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
   3056     genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
   3057     genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
   3058 
   3059     /* Setting up our collator to do digits. */
   3060     ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
   3061     if (U_FAILURE(status)){
   3062         log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
   3063               myErrorName(status));
   3064         return;
   3065     }
   3066 
   3067     /*
   3068        Testing that prepended zeroes still yield the correct collation behavior.
   3069        We expect that every element in our strings array will be equal.
   3070     */
   3071     genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
   3072 
   3073     ucol_close(coll);
   3074 }
   3075 
   3076 static void TestTibetanConformance(void)
   3077 {
   3078     const char* test[] = {
   3079         "\\u0FB2\\u0591\\u0F71\\u0061",
   3080         "\\u0FB2\\u0F71\\u0061"
   3081     };
   3082 
   3083     UErrorCode status = U_ZERO_ERROR;
   3084     UCollator *coll = ucol_open("", &status);
   3085     UChar source[100];
   3086     UChar target[100];
   3087     int result;
   3088     ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
   3089     if (U_SUCCESS(status)) {
   3090         u_unescape(test[0], source, 100);
   3091         u_unescape(test[1], target, 100);
   3092         doTest(coll, source, target, UCOL_EQUAL);
   3093         result = ucol_strcoll(coll, source, -1,   target, -1);
   3094         log_verbose("result %d\n", result);
   3095         if (UCOL_EQUAL != result) {
   3096             log_err("Tibetan comparison error\n");
   3097         }
   3098     }
   3099     ucol_close(coll);
   3100 
   3101     genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
   3102 }
   3103 
   3104 static void TestPinyinProblem(void) {
   3105     static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
   3106     genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
   3107 }
   3108 
   3109 /**
   3110  * Iterate through the given iterator, checking to see that all the strings
   3111  * in the expected array are present.
   3112  * @param expected array of strings we expect to see, or NULL
   3113  * @param expectedCount number of elements of expected, or 0
   3114  */
   3115 static int32_t checkUEnumeration(const char* msg,
   3116                                  UEnumeration* iter,
   3117                                  const char** expected,
   3118                                  int32_t expectedCount) {
   3119     UErrorCode ec = U_ZERO_ERROR;
   3120     int32_t i = 0, n, j, bit;
   3121     int32_t seenMask = 0;
   3122 
   3123     U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
   3124     n = uenum_count(iter, &ec);
   3125     if (!assertSuccess("count", &ec)) return -1;
   3126     log_verbose("%s = [", msg);
   3127     for (;; ++i) {
   3128         const char* s = uenum_next(iter, NULL, &ec);
   3129         if (!assertSuccess("snext", &ec) || s == NULL) break;
   3130         if (i != 0) log_verbose(",");
   3131         log_verbose("%s", s);
   3132         /* check expected list */
   3133         for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   3134             if ((seenMask&bit) == 0 &&
   3135                 uprv_strcmp(s, expected[j]) == 0) {
   3136                 seenMask |= bit;
   3137                 break;
   3138             }
   3139         }
   3140     }
   3141     log_verbose("] (%d)\n", i);
   3142     assertTrue("count verified", i==n);
   3143     /* did we see all expected strings? */
   3144     for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
   3145         if ((seenMask&bit)!=0) {
   3146             log_verbose("Ok: \"%s\" seen\n", expected[j]);
   3147         } else {
   3148             log_err("FAIL: \"%s\" not seen\n", expected[j]);
   3149         }
   3150     }
   3151     return n;
   3152 }
   3153 
   3154 /**
   3155  * Test new API added for separate collation tree.
   3156  */
   3157 static void TestSeparateTrees(void) {
   3158     UErrorCode ec = U_ZERO_ERROR;
   3159     UEnumeration *e = NULL;
   3160     int32_t n = -1;
   3161     UBool isAvailable;
   3162     char loc[256];
   3163 
   3164     static const char* AVAIL[] = { "en", "de" };
   3165 
   3166     static const char* KW[] = { "collation" };
   3167 
   3168     static const char* KWVAL[] = { "phonebook", "stroke" };
   3169 
   3170 #if !UCONFIG_NO_SERVICE
   3171     e = ucol_openAvailableLocales(&ec);
   3172     if (e != NULL) {
   3173         assertSuccess("ucol_openAvailableLocales", &ec);
   3174         assertTrue("ucol_openAvailableLocales!=0", e!=0);
   3175         n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
   3176         (void)n;    /* Suppress set but not used warnings. */
   3177         /* Don't need to check n because we check list */
   3178         uenum_close(e);
   3179     } else {
   3180         log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
   3181     }
   3182 #endif
   3183 
   3184     e = ucol_getKeywords(&ec);
   3185     if (e != NULL) {
   3186         assertSuccess("ucol_getKeywords", &ec);
   3187         assertTrue("ucol_getKeywords!=0", e!=0);
   3188         n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
   3189         /* Don't need to check n because we check list */
   3190         uenum_close(e);
   3191     } else {
   3192         log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
   3193     }
   3194 
   3195     e = ucol_getKeywordValues(KW[0], &ec);
   3196     if (e != NULL) {
   3197         assertSuccess("ucol_getKeywordValues", &ec);
   3198         assertTrue("ucol_getKeywordValues!=0", e!=0);
   3199         n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
   3200         /* Don't need to check n because we check list */
   3201         uenum_close(e);
   3202     } else {
   3203         log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
   3204     }
   3205 
   3206     /* Try setting a warning before calling ucol_getKeywordValues */
   3207     ec = U_USING_FALLBACK_WARNING;
   3208     e = ucol_getKeywordValues(KW[0], &ec);
   3209     if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
   3210         assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
   3211         n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
   3212         /* Don't need to check n because we check list */
   3213         uenum_close(e);
   3214     }
   3215 
   3216     /*
   3217 U_DRAFT int32_t U_EXPORT2
   3218 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
   3219                              const char* locale, UBool* isAvailable,
   3220                              UErrorCode* status);
   3221 }
   3222 */
   3223     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
   3224                                      &isAvailable, &ec);
   3225     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   3226         assertEquals("getFunctionalEquivalent(de)", "root", loc);
   3227         assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
   3228                    isAvailable == TRUE);
   3229     }
   3230 
   3231     n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
   3232                                      &isAvailable, &ec);
   3233     if (assertSuccess("getFunctionalEquivalent", &ec)) {
   3234         assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
   3235         assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
   3236                    isAvailable == FALSE);
   3237     }
   3238 }
   3239 
   3240 /* supercedes TestJ784 */
   3241 static void TestBeforePinyin(void) {
   3242     const static char rules[] = {
   3243         "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
   3244         "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
   3245         "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
   3246         "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
   3247         "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
   3248         "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
   3249     };
   3250 
   3251     const static char *test[] = {
   3252         "l\\u0101",
   3253         "la",
   3254         "l\\u0101n",
   3255         "lan ",
   3256         "l\\u0113",
   3257         "le",
   3258         "l\\u0113n",
   3259         "len"
   3260     };
   3261 
   3262     const static char *test2[] = {
   3263         "x\\u0101",
   3264         "x\\u0100",
   3265         "X\\u0101",
   3266         "X\\u0100",
   3267         "x\\u00E1",
   3268         "x\\u00C1",
   3269         "X\\u00E1",
   3270         "X\\u00C1",
   3271         "x\\u01CE",
   3272         "x\\u01CD",
   3273         "X\\u01CE",
   3274         "X\\u01CD",
   3275         "x\\u00E0",
   3276         "x\\u00C0",
   3277         "X\\u00E0",
   3278         "X\\u00C0",
   3279         "xa",
   3280         "xA",
   3281         "Xa",
   3282         "XA",
   3283         "x\\u0101x",
   3284         "x\\u0100x",
   3285         "x\\u00E1x",
   3286         "x\\u00C1x",
   3287         "x\\u01CEx",
   3288         "x\\u01CDx",
   3289         "x\\u00E0x",
   3290         "x\\u00C0x",
   3291         "xax",
   3292         "xAx"
   3293     };
   3294 
   3295     genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
   3296     genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
   3297     genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
   3298     genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
   3299 }
   3300 
   3301 static void TestBeforeTightening(void) {
   3302     static const struct {
   3303         const char *rules;
   3304         UErrorCode expectedStatus;
   3305     } tests[] = {
   3306         { "&[before 1]a<x", U_ZERO_ERROR },
   3307         { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
   3308         { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
   3309         { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
   3310         { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
   3311         { "&[before 2]a<<x",U_ZERO_ERROR },
   3312         { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
   3313         { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
   3314         { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
   3315         { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
   3316         { "&[before 3]a<<<x",U_ZERO_ERROR },
   3317         { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
   3318         { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
   3319     };
   3320 
   3321     int32_t i = 0;
   3322 
   3323     UErrorCode status = U_ZERO_ERROR;
   3324     UChar rlz[RULE_BUFFER_LEN] = { 0 };
   3325     uint32_t rlen = 0;
   3326 
   3327     UCollator *coll = NULL;
   3328 
   3329 
   3330     for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
   3331         rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
   3332         coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
   3333         if(status != tests[i].expectedStatus) {
   3334             log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
   3335                 tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
   3336         }
   3337         ucol_close(coll);
   3338         status = U_ZERO_ERROR;
   3339     }
   3340 
   3341 }
   3342 
   3343 /*
   3344 &m < a
   3345 &[before 1] a < x <<< X << q <<< Q < z
   3346 assert: m <<< M < x <<< X << q <<< Q < z < a < n
   3347 
   3348 &m < a
   3349 &[before 2] a << x <<< X << q <<< Q < z
   3350 assert: m <<< M < x <<< X << q <<< Q << a < z < n
   3351 
   3352 &m < a
   3353 &[before 3] a <<< x <<< X << q <<< Q < z
   3354 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
   3355 
   3356 
   3357 &m << a
   3358 &[before 1] a < x <<< X << q <<< Q < z
   3359 assert: x <<< X << q <<< Q < z < m <<< M << a < n
   3360 
   3361 &m << a
   3362