Home | History | Annotate | Download | only in intltest
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2014, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 
      7 #include "unicode/utypes.h"
      8 
      9 #if !UCONFIG_NO_COLLATION
     10 
     11 #include "unicode/coll.h"
     12 #include "unicode/localpointer.h"
     13 #include "unicode/tblcoll.h"
     14 #include "unicode/unistr.h"
     15 #include "unicode/sortkey.h"
     16 #include "regcoll.h"
     17 #include "sfwdchit.h"
     18 #include "testutil.h"
     19 #include "cmemory.h"
     20 
     21 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
     22 
     23 CollationRegressionTest::CollationRegressionTest()
     24 {
     25     UErrorCode status = U_ZERO_ERROR;
     26 
     27     en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
     28     if(U_FAILURE(status)) {
     29       delete en_us;
     30       en_us = 0;
     31       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
     32       return;
     33     }
     34 }
     35 
     36 CollationRegressionTest::~CollationRegressionTest()
     37 {
     38     delete en_us;
     39 }
     40 
     41 
     42     // @bug 4048446
     43 //
     44 // CollationElementIterator.reset() doesn't work
     45 //
     46 void CollationRegressionTest::Test4048446(/* char* par */)
     47 {
     48     const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
     49     const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
     50     CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
     51     CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
     52     UErrorCode status = U_ZERO_ERROR;
     53 
     54     if (i1 == NULL|| i2 == NULL)
     55     {
     56         errln("Could not create CollationElementIterator's");
     57         delete i1;
     58         delete i2;
     59         return;
     60     }
     61 
     62     while (i1->next(status) != CollationElementIterator::NULLORDER)
     63     {
     64         if (U_FAILURE(status))
     65         {
     66             errln("error calling next()");
     67 
     68             delete i1;
     69             delete i2;
     70             return;
     71         }
     72     }
     73 
     74     i1->reset();
     75 
     76     assertEqual(*i1, *i2);
     77 
     78     delete i1;
     79     delete i2;
     80 }
     81 
     82 // @bug 4051866
     83 //
     84 // Collator -> rules -> Collator round-trip broken for expanding characters
     85 //
     86 void CollationRegressionTest::Test4051866(/* char* par */)
     87 {
     88     UnicodeString rules;
     89     UErrorCode status = U_ZERO_ERROR;
     90 
     91     rules += "&n < o ";
     92     rules += "& oe ,o";
     93     rules += (UChar)0x3080;
     94     rules += "& oe ,";
     95     rules += (UChar)0x1530;
     96     rules += " ,O";
     97     rules += "& OE ,O";
     98     rules += (UChar)0x3080;
     99     rules += "& OE ,";
    100     rules += (UChar)0x1520;
    101     rules += "< p ,P";
    102 
    103     // Build a collator containing expanding characters
    104     LocalPointer<RuleBasedCollator> c1(new RuleBasedCollator(rules, status));
    105 
    106     // Build another using the rules from  the first
    107     LocalPointer<RuleBasedCollator> c2(new RuleBasedCollator(c1->getRules(), status));
    108     if (U_FAILURE(status)) {
    109         errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status));
    110         return;
    111     }
    112 
    113     // Make sure they're the same
    114     if (!(c1->getRules() == c2->getRules()))
    115     {
    116         errln("Rules are not equal");
    117     }
    118 }
    119 
    120 // @bug 4053636
    121 //
    122 // Collator thinks "black-bird" == "black"
    123 //
    124 void CollationRegressionTest::Test4053636(/* char* par */)
    125 {
    126     if (en_us->equals("black_bird", "black"))
    127     {
    128         errln("black-bird == black");
    129     }
    130 }
    131 
    132 // @bug 4054238
    133 //
    134 // CollationElementIterator will not work correctly if the associated
    135 // Collator object's mode is changed
    136 //
    137 void CollationRegressionTest::Test4054238(/* char* par */)
    138 {
    139     const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
    140     const UnicodeString test3(chars3);
    141     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    142 
    143     // NOTE: The Java code uses en_us to create the CollationElementIterators
    144     // but I'm pretty sure that's wrong, so I've changed this to use c.
    145     UErrorCode status = U_ZERO_ERROR;
    146     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    147     CollationElementIterator *i1 = c->createCollationElementIterator(test3);
    148     delete i1;
    149     delete c;
    150 }
    151 
    152 // @bug 4054734
    153 //
    154 // Collator::IDENTICAL documented but not implemented
    155 //
    156 void CollationRegressionTest::Test4054734(/* char* par */)
    157 {
    158     /*
    159         Here's the original Java:
    160 
    161         String[] decomp = {
    162             "\u0001",   "<",    "\u0002",
    163             "\u0001",   "=",    "\u0001",
    164             "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
    165             "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
    166         };
    167 
    168         String[] nodecomp = {
    169             "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
    170         };
    171     */
    172 
    173     static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
    174     {
    175         {0x0001, 0},      {0x3c, 0}, {0x0002, 0},
    176         {0x0001, 0},      {0x3d, 0}, {0x0001, 0},
    177         {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
    178         {0x00c0, 0},      {0x3d, 0}, {0x41, 0x0300, 0}
    179     };
    180 
    181 
    182     UErrorCode status = U_ZERO_ERROR;
    183     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    184 
    185     c->setStrength(Collator::IDENTICAL);
    186 
    187     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    188     compareArray(*c, decomp, ARRAY_LENGTH(decomp));
    189 
    190     delete c;
    191 }
    192 
    193 // @bug 4054736
    194 //
    195 // Full Decomposition mode not implemented
    196 //
    197 void CollationRegressionTest::Test4054736(/* char* par */)
    198 {
    199     UErrorCode status = U_ZERO_ERROR;
    200     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    201 
    202     c->setStrength(Collator::SECONDARY);
    203     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    204 
    205     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    206     {
    207         {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC}  // Alef-Lamed vs. Alef, Lamed
    208     };
    209 
    210     compareArray(*c, tests, ARRAY_LENGTH(tests));
    211 
    212     delete c;
    213 }
    214 
    215 // @bug 4058613
    216 //
    217 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
    218 //
    219 void CollationRegressionTest::Test4058613(/* char* par */)
    220 {
    221     // Creating a default collator doesn't work when Korean is the default
    222     // locale
    223 
    224     Locale oldDefault = Locale::getDefault();
    225     UErrorCode status = U_ZERO_ERROR;
    226 
    227     Locale::setDefault(Locale::getKorean(), status);
    228 
    229     if (U_FAILURE(status))
    230     {
    231         errln("Could not set default locale to Locale::KOREAN");
    232         return;
    233     }
    234 
    235     Collator *c = NULL;
    236 
    237     c = Collator::createInstance("en_US", status);
    238 
    239     if (c == NULL || U_FAILURE(status))
    240     {
    241         errln("Could not create a Korean collator");
    242         Locale::setDefault(oldDefault, status);
    243         delete c;
    244         return;
    245     }
    246 
    247     // Since the fix to this bug was to turn off decomposition for Korean collators,
    248     // ensure that's what we got
    249     if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
    250     {
    251       errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
    252     }
    253 
    254     delete c;
    255 
    256     Locale::setDefault(oldDefault, status);
    257 }
    258 
    259 // @bug 4059820
    260 //
    261 // RuleBasedCollator.getRules does not return the exact pattern as input
    262 // for expanding character sequences
    263 //
    264 void CollationRegressionTest::Test4059820(/* char* par */)
    265 {
    266     UErrorCode status = U_ZERO_ERROR;
    267 
    268     RuleBasedCollator *c = NULL;
    269     UnicodeString rules = "&9 < a < b , c/a < d < z";
    270 
    271     c = new RuleBasedCollator(rules, status);
    272 
    273     if (c == NULL || U_FAILURE(status))
    274     {
    275         errln("Failure building a collator.");
    276         delete c;
    277         return;
    278     }
    279 
    280     if ( c->getRules().indexOf("c/a") == -1)
    281     {
    282         errln("returned rules do not contain 'c/a'");
    283     }
    284 
    285     delete c;
    286 }
    287 
    288 // @bug 4060154
    289 //
    290 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
    291 //
    292 void CollationRegressionTest::Test4060154(/* char* par */)
    293 {
    294     UErrorCode status = U_ZERO_ERROR;
    295     UnicodeString rules;
    296 
    297     rules += "&f < g, G < h, H < i, I < j, J";
    298     rules +=  " & H < ";
    299     rules += (UChar)0x0131;
    300     rules += ", ";
    301     rules += (UChar)0x0130;
    302     rules += ", i, I";
    303 
    304     RuleBasedCollator *c = NULL;
    305 
    306     c = new RuleBasedCollator(rules, status);
    307 
    308     if (c == NULL || U_FAILURE(status))
    309     {
    310         errln("failure building collator.");
    311         delete c;
    312         return;
    313     }
    314 
    315     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    316 
    317  /*
    318     String[] tertiary = {
    319         "A",        "<",    "B",
    320         "H",        "<",    "\u0131",
    321         "H",        "<",    "I",
    322         "\u0131",   "<",    "\u0130",
    323         "\u0130",   "<",    "i",
    324         "\u0130",   ">",    "H",
    325     };
    326 */
    327 
    328     static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
    329     {
    330         {0x41, 0},    {0x3c, 0}, {0x42, 0},
    331         {0x48, 0},    {0x3c, 0}, {0x0131, 0},
    332         {0x48, 0},    {0x3c, 0}, {0x49, 0},
    333         {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
    334         {0x0130, 0}, {0x3c, 0}, {0x69, 0},
    335         {0x0130, 0}, {0x3e, 0}, {0x48, 0}
    336     };
    337 
    338     c->setStrength(Collator::TERTIARY);
    339     compareArray(*c, tertiary, ARRAY_LENGTH(tertiary));
    340 
    341     /*
    342     String[] secondary = {
    343         "H",        "<",    "I",
    344         "\u0131",   "=",    "\u0130",
    345     };
    346 */
    347     static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
    348     {
    349         {0x48, 0},    {0x3c, 0}, {0x49, 0},
    350         {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
    351     };
    352 
    353     c->setStrength(Collator::PRIMARY);
    354     compareArray(*c, secondary, ARRAY_LENGTH(secondary));
    355 
    356     delete c;
    357 }
    358 
    359 // @bug 4062418
    360 //
    361 // Secondary/Tertiary comparison incorrect in French Secondary
    362 //
    363 void CollationRegressionTest::Test4062418(/* char* par */)
    364 {
    365     UErrorCode status = U_ZERO_ERROR;
    366 
    367     RuleBasedCollator *c = NULL;
    368 
    369     c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status);
    370 
    371     if (c == NULL || U_FAILURE(status))
    372     {
    373         errln("Failed to create collator for Locale::getCanadaFrench()");
    374         delete c;
    375         return;
    376     }
    377 
    378     c->setStrength(Collator::SECONDARY);
    379 
    380 /*
    381     String[] tests = {
    382             "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
    383     };
    384 */
    385     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    386     {
    387         {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
    388     };
    389 
    390     compareArray(*c, tests, ARRAY_LENGTH(tests));
    391 
    392     delete c;
    393 }
    394 
    395 // @bug 4065540
    396 //
    397 // Collator::compare() method broken if either string contains spaces
    398 //
    399 void CollationRegressionTest::Test4065540(/* char* par */)
    400 {
    401     if (en_us->compare("abcd e", "abcd f") == 0)
    402     {
    403         errln("'abcd e' == 'abcd f'");
    404     }
    405 }
    406 
    407 // @bug 4066189
    408 //
    409 // Unicode characters need to be recursively decomposed to get the
    410 // correct result. For example,
    411 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
    412 //
    413 void CollationRegressionTest::Test4066189(/* char* par */)
    414 {
    415     static const UChar chars1[] = {0x1EB1, 0};
    416     static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0};
    417     const UnicodeString test1(chars1);
    418     const UnicodeString test2(chars2);
    419     UErrorCode status = U_ZERO_ERROR;
    420 
    421     // NOTE: The java code used en_us to create the
    422     // CollationElementIterator's. I'm pretty sure that
    423     // was wrong, so I've change the code to use c1 and c2
    424     RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone();
    425     c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    426     CollationElementIterator *i1 = c1->createCollationElementIterator(test1);
    427 
    428     RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone();
    429     c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
    430     CollationElementIterator *i2 = c2->createCollationElementIterator(test2);
    431 
    432     assertEqual(*i1, *i2);
    433 
    434     delete i2;
    435     delete c2;
    436     delete i1;
    437     delete c1;
    438 }
    439 
    440 // @bug 4066696
    441 //
    442 // French secondary collation checking at the end of compare iteration fails
    443 //
    444 void CollationRegressionTest::Test4066696(/* char* par */)
    445 {
    446     UErrorCode status = U_ZERO_ERROR;
    447     RuleBasedCollator *c = NULL;
    448 
    449     c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status);
    450 
    451     if (c == NULL || U_FAILURE(status))
    452     {
    453         errln("Failure creating collator for Locale::getCanadaFrench()");
    454         delete c;
    455         return;
    456     }
    457 
    458     c->setStrength(Collator::SECONDARY);
    459 
    460 /*
    461     String[] tests = {
    462         "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
    463     };
    464 
    465   should be:
    466 
    467     String[] tests = {
    468         "\u00e0",   ">",     "\u01fa",       // a-grave <  A-ring-acute
    469     };
    470 
    471 */
    472 
    473     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    474     {
    475         {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
    476     };
    477 
    478     compareArray(*c, tests, ARRAY_LENGTH(tests));
    479 
    480     delete c;
    481 }
    482 
    483 // @bug 4076676
    484 //
    485 // Bad canonicalization of same-class combining characters
    486 //
    487 void CollationRegressionTest::Test4076676(/* char* par */)
    488 {
    489     // These combining characters are all in the same class, so they should not
    490     // be reordered, and they should compare as unequal.
    491     static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
    492     static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
    493 
    494     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    495     c->setStrength(Collator::TERTIARY);
    496 
    497     if (c->compare(s1,s2) == 0)
    498     {
    499         errln("Same-class combining chars were reordered");
    500     }
    501 
    502     delete c;
    503 }
    504 
    505 // @bug 4079231
    506 //
    507 // RuleBasedCollator::operator==(NULL) throws NullPointerException
    508 //
    509 void CollationRegressionTest::Test4079231(/* char* par */)
    510 {
    511     // I don't think there's any way to write this test
    512     // in C++. The following is equivalent to the Java,
    513     // but doesn't compile 'cause NULL can't be converted
    514     // to Collator&
    515     //
    516     // if (en_us->operator==(NULL))
    517     // {
    518     //     errln("en_us->operator==(NULL) returned TRUE");
    519     // }
    520 
    521  /*
    522    try {
    523         if (en_us->equals(null)) {
    524             errln("en_us->equals(null) returned true");
    525         }
    526     }
    527     catch (Exception e) {
    528         errln("en_us->equals(null) threw " + e.toString());
    529     }
    530 */
    531 }
    532 
    533 // @bug 4078588
    534 //
    535 // RuleBasedCollator breaks on "< a < bb" rule
    536 //
    537 void CollationRegressionTest::Test4078588(/* char *par */)
    538 {
    539     UErrorCode status = U_ZERO_ERROR;
    540     RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status);
    541 
    542     if (rbc == NULL || U_FAILURE(status))
    543     {
    544         errln("Failed to create RuleBasedCollator.");
    545         delete rbc;
    546         return;
    547     }
    548 
    549     Collator::EComparisonResult result = rbc->compare("a","bb");
    550 
    551     if (result != Collator::LESS)
    552     {
    553         errln((UnicodeString)"Compare(a,bb) returned " + (int)result
    554             + (UnicodeString)"; expected -1");
    555     }
    556 
    557     delete rbc;
    558 }
    559 
    560 // @bug 4081866
    561 //
    562 // Combining characters in different classes not reordered properly.
    563 //
    564 void CollationRegressionTest::Test4081866(/* char* par */)
    565 {
    566     // These combining characters are all in different classes,
    567     // so they should be reordered and the strings should compare as equal.
    568     static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
    569     static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
    570 
    571     UErrorCode status = U_ZERO_ERROR;
    572     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    573     c->setStrength(Collator::TERTIARY);
    574 
    575     // Now that the default collators are set to NO_DECOMPOSITION
    576     // (as a result of fixing bug 4114077), we must set it explicitly
    577     // when we're testing reordering behavior.  -- lwerner, 5/5/98
    578     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    579 
    580     if (c->compare(s1,s2) != 0)
    581     {
    582         errln("Combining chars were not reordered");
    583     }
    584 
    585     delete c;
    586 }
    587 
    588 // @bug 4087241
    589 //
    590 // string comparison errors in Scandinavian collators
    591 //
    592 void CollationRegressionTest::Test4087241(/* char* par */)
    593 {
    594     UErrorCode status = U_ZERO_ERROR;
    595     Locale da_DK("da", "DK");
    596     RuleBasedCollator *c = NULL;
    597 
    598     c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);
    599 
    600     if (c == NULL || U_FAILURE(status))
    601     {
    602         errln("Failed to create collator for da_DK locale");
    603         delete c;
    604         return;
    605     }
    606 
    607     c->setStrength(Collator::SECONDARY);
    608 
    609     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    610     {
    611         {0x7a, 0},          {0x3c, 0}, {0x00E6, 0},            // z        < ae
    612         {0x61, 0x0308, 0},  {0x3c, 0}, {0x61, 0x030A, 0},      // a-umlaut < a-ring
    613         {0x59, 0},          {0x3c, 0}, {0x75, 0x0308, 0},      // Y        < u-umlaut
    614     };
    615 
    616     compareArray(*c, tests, ARRAY_LENGTH(tests));
    617 
    618     delete c;
    619 }
    620 
    621 // @bug 4087243
    622 //
    623 // CollationKey takes ignorable strings into account when it shouldn't
    624 //
    625 void CollationRegressionTest::Test4087243(/* char* par */)
    626 {
    627     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    628     c->setStrength(Collator::TERTIARY);
    629 
    630     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    631     {
    632         {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0}    // 1 2 3  =  1 2 3 ctrl-A
    633     };
    634 
    635     compareArray(*c, tests, ARRAY_LENGTH(tests));
    636 
    637     delete c;
    638 }
    639 
    640 // @bug 4092260
    641 //
    642 // Mu/micro conflict
    643 // Micro symbol and greek lowercase letter Mu should sort identically
    644 //
    645 void CollationRegressionTest::Test4092260(/* char* par */)
    646 {
    647     UErrorCode status = U_ZERO_ERROR;
    648     Locale el("el", "");
    649     Collator *c = NULL;
    650 
    651     c = Collator::createInstance(el, status);
    652 
    653     if (c == NULL || U_FAILURE(status))
    654     {
    655         errln("Failed to create collator for el locale.");
    656         delete c;
    657         return;
    658     }
    659 
    660     // These now have tertiary differences in UCA
    661     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
    662 
    663     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    664     {
    665         {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
    666     };
    667 
    668     compareArray(*c, tests, ARRAY_LENGTH(tests));
    669 
    670     delete c;
    671 }
    672 
    673 // @bug 4095316
    674 //
    675 void CollationRegressionTest::Test4095316(/* char* par */)
    676 {
    677     UErrorCode status = U_ZERO_ERROR;
    678     Locale el_GR("el", "GR");
    679     Collator *c = Collator::createInstance(el_GR, status);
    680 
    681     if (c == NULL || U_FAILURE(status))
    682     {
    683         errln("Failed to create collator for el_GR locale");
    684         delete c;
    685         return;
    686     }
    687     // These now have tertiary differences in UCA
    688     //c->setStrength(Collator::TERTIARY);
    689     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
    690 
    691     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    692     {
    693         {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
    694     };
    695 
    696     compareArray(*c, tests, ARRAY_LENGTH(tests));
    697 
    698     delete c;
    699 }
    700 
    701 // @bug 4101940
    702 //
    703 void CollationRegressionTest::Test4101940(/* char* par */)
    704 {
    705     UErrorCode status = U_ZERO_ERROR;
    706     RuleBasedCollator *c = NULL;
    707     UnicodeString rules = "&9 < a < b";
    708     UnicodeString nothing = "";
    709 
    710     c = new RuleBasedCollator(rules, status);
    711 
    712     if (c == NULL || U_FAILURE(status))
    713     {
    714         errln("Failed to create RuleBasedCollator");
    715         delete c;
    716         return;
    717     }
    718 
    719     CollationElementIterator *i = c->createCollationElementIterator(nothing);
    720     i->reset();
    721 
    722     if (i->next(status) != CollationElementIterator::NULLORDER)
    723     {
    724         errln("next did not return NULLORDER");
    725     }
    726 
    727     delete i;
    728     delete c;
    729 }
    730 
    731 // @bug 4103436
    732 //
    733 // Collator::compare not handling spaces properly
    734 //
    735 void CollationRegressionTest::Test4103436(/* char* par */)
    736 {
    737     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    738     c->setStrength(Collator::TERTIARY);
    739 
    740     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    741     {
    742         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
    743         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
    744     };
    745 
    746     compareArray(*c, tests, ARRAY_LENGTH(tests));
    747 
    748     delete c;
    749 }
    750 
    751 // @bug 4114076
    752 //
    753 // Collation not Unicode conformant with Hangul syllables
    754 //
    755 void CollationRegressionTest::Test4114076(/* char* par */)
    756 {
    757     UErrorCode status = U_ZERO_ERROR;
    758     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    759     c->setStrength(Collator::TERTIARY);
    760 
    761     //
    762     // With Canonical decomposition, Hangul syllables should get decomposed
    763     // into Jamo, but Jamo characters should not be decomposed into
    764     // conjoining Jamo
    765     //
    766     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
    767     {
    768         {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
    769     };
    770 
    771     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    772     compareArray(*c, test1, ARRAY_LENGTH(test1));
    773 
    774     // From UTR #15:
    775     // *In earlier versions of Unicode, jamo characters like ksf
    776     //  had compatibility mappings to kf + sf. These mappings were
    777     //  removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
    778     // That is, the following test is obsolete as of 2.1.9
    779 
    780 //obsolete-    // With Full decomposition, it should go all the way down to
    781 //obsolete-    // conjoining Jamo characters.
    782 //obsolete-    //
    783 //obsolete-    static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
    784 //obsolete-    {
    785 //obsolete-        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
    786 //obsolete-    };
    787 //obsolete-
    788 //obsolete-    c->setDecomposition(Normalizer::DECOMP_COMPAT);
    789 //obsolete-    compareArray(*c, test2, ARRAY_LENGTH(test2));
    790 
    791     delete c;
    792 }
    793 
    794 
    795 // @bug 4124632
    796 //
    797 // Collator::getCollationKey was hanging on certain character sequences
    798 //
    799 void CollationRegressionTest::Test4124632(/* char* par */)
    800 {
    801     UErrorCode status = U_ZERO_ERROR;
    802     Collator *coll = NULL;
    803 
    804     coll = Collator::createInstance(Locale::getJapan(), status);
    805 
    806     if (coll == NULL || U_FAILURE(status))
    807     {
    808         errln("Failed to create collator for Locale::JAPAN");
    809         delete coll;
    810         return;
    811     }
    812 
    813     static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0};
    814     CollationKey key;
    815 
    816     coll->getCollationKey(test, key, status);
    817 
    818     if (key.isBogus() || U_FAILURE(status))
    819     {
    820         errln("CollationKey creation failed.");
    821     }
    822 
    823     delete coll;
    824 }
    825 
    826 // @bug 4132736
    827 //
    828 // sort order of french words with multiple accents has errors
    829 //
    830 void CollationRegressionTest::Test4132736(/* char* par */)
    831 {
    832     UErrorCode status = U_ZERO_ERROR;
    833 
    834     Collator *c = NULL;
    835 
    836     c = Collator::createInstance(Locale::getCanadaFrench(), status);
    837     c->setStrength(Collator::TERTIARY);
    838 
    839     if (c == NULL || U_FAILURE(status))
    840     {
    841         errln("Failed to create a collator for Locale::getCanadaFrench()");
    842         delete c;
    843         return;
    844     }
    845 
    846     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
    847     {
    848         {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
    849         {0x65, 0x0300, 0x0301, 0},       {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
    850     };
    851 
    852     compareArray(*c, test1, ARRAY_LENGTH(test1));
    853 
    854     delete c;
    855 }
    856 
    857 // @bug 4133509
    858 //
    859 // The sorting using java.text.CollationKey is not in the exact order
    860 //
    861 void CollationRegressionTest::Test4133509(/* char* par */)
    862 {
    863     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
    864     {
    865         {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
    866         {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0},      {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
    867         {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0},                  {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
    868     };
    869 
    870     compareArray(*en_us, test1, ARRAY_LENGTH(test1));
    871 }
    872 
    873 // @bug 4114077
    874 //
    875 // Collation with decomposition off doesn't work for Europe
    876 //
    877 void CollationRegressionTest::Test4114077(/* char* par */)
    878 {
    879     // Ensure that we get the same results with decomposition off
    880     // as we do with it on....
    881 
    882     UErrorCode status = U_ZERO_ERROR;
    883     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    884     c->setStrength(Collator::TERTIARY);
    885 
    886     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
    887     {
    888         {0x00C0, 0},                     {0x3d, 0}, {0x41, 0x0300, 0},            // Should be equivalent
    889         {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
    890         {0x0204, 0},                     {0x3d, 0}, {0x45, 0x030F, 0},
    891         {0x01fa, 0},                     {0x3d, 0}, {0x41, 0x030a, 0x0301, 0},    // a-ring-acute -> a-ring, acute
    892                                                 //   -> a, ring, acute
    893         {0x41, 0x0300, 0x0316, 0},         {0x3c, 0}, {0x41, 0x0316, 0x0300, 0}        // No reordering --> unequal
    894     };
    895 
    896     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
    897     compareArray(*c, test1, ARRAY_LENGTH(test1));
    898 
    899     static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
    900     {
    901         {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0}      // Reordering --> equal
    902     };
    903 
    904     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    905     compareArray(*c, test2, ARRAY_LENGTH(test2));
    906 
    907     delete c;
    908 }
    909 
    910 // @bug 4141640
    911 //
    912 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
    913 //
    914 void CollationRegressionTest::Test4141640(/* char* par */)
    915 {
    916     //
    917     // Rather than just creating a Swedish collator, we might as well
    918     // try to instantiate one for every locale available on the system
    919     // in order to prevent this sort of bug from cropping up in the future
    920     //
    921     UErrorCode status = U_ZERO_ERROR;
    922     int32_t i, localeCount;
    923     const Locale *locales = Locale::getAvailableLocales(localeCount);
    924 
    925     for (i = 0; i < localeCount; i += 1)
    926     {
    927         Collator *c = NULL;
    928 
    929         status = U_ZERO_ERROR;
    930         c = Collator::createInstance(locales[i], status);
    931 
    932         if (c == NULL || U_FAILURE(status))
    933         {
    934             UnicodeString msg, localeName;
    935 
    936             msg += "Could not create collator for locale ";
    937             msg += locales[i].getName();
    938 
    939             errln(msg);
    940         }
    941 
    942         delete c;
    943     }
    944 }
    945 
    946 // @bug 4139572
    947 //
    948 // getCollationKey throws exception for spanish text
    949 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
    950 //
    951 void CollationRegressionTest::Test4139572(/* char* par */)
    952 {
    953     //
    954     // Code pasted straight from the bug report
    955     // (and then translated to C++ ;-)
    956     //
    957     // create spanish locale and collator
    958     UErrorCode status = U_ZERO_ERROR;
    959     Locale l("es", "es");
    960     Collator *col = NULL;
    961 
    962     col = Collator::createInstance(l, status);
    963 
    964     if (col == NULL || U_FAILURE(status))
    965     {
    966         errln("Failed to create a collator for es_es locale.");
    967         delete col;
    968         return;
    969     }
    970 
    971     CollationKey key;
    972 
    973     // this spanish phrase kills it!
    974     col->getCollationKey("Nombre De Objeto", key, status);
    975 
    976     if (key.isBogus() || U_FAILURE(status))
    977     {
    978         errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
    979     }
    980 
    981     delete col;
    982 }
    983 /* HSYS : RuleBasedCollator::compare() performance enhancements
    984           compare() does not create CollationElementIterator() anymore.*/
    985 
    986 class My4146160Collator : public RuleBasedCollator
    987 {
    988 public:
    989     My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status);
    990     ~My4146160Collator();
    991 
    992     CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const;
    993 
    994     CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const;
    995 
    996     static int32_t count;
    997 };
    998 
    999 int32_t My4146160Collator::count = 0;
   1000 
   1001 My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status)
   1002   : RuleBasedCollator(rbc.getRules(), status)
   1003 {
   1004 }
   1005 
   1006 My4146160Collator::~My4146160Collator()
   1007 {
   1008 }
   1009 
   1010 CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const
   1011 {
   1012     count += 1;
   1013     return RuleBasedCollator::createCollationElementIterator(text);
   1014 }
   1015 
   1016 CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const
   1017 {
   1018     count += 1;
   1019     return RuleBasedCollator::createCollationElementIterator(text);
   1020 }
   1021 
   1022 // @bug 4146160
   1023 //
   1024 // RuleBasedCollator doesn't use createCollationElementIterator internally
   1025 //
   1026 void CollationRegressionTest::Test4146160(/* char* par */)
   1027 {
   1028 #if 0
   1029     //
   1030     // Use a custom collator class whose createCollationElementIterator
   1031     // methods increment a count....
   1032     //
   1033     UErrorCode status = U_ZERO_ERROR;
   1034     CollationKey key;
   1035 
   1036     My4146160Collator::count = 0;
   1037     My4146160Collator *mc = NULL;
   1038 
   1039     mc = new My4146160Collator(*en_us, status);
   1040 
   1041     if (mc == NULL || U_FAILURE(status))
   1042     {
   1043         errln("Failed to create a My4146160Collator.");
   1044         delete mc;
   1045         return;
   1046     }
   1047 
   1048     mc->getCollationKey("1", key, status);
   1049 
   1050     if (key.isBogus() || U_FAILURE(status))
   1051     {
   1052         errln("Failure to get a CollationKey from a My4146160Collator.");
   1053         delete mc;
   1054         return;
   1055     }
   1056 
   1057     if (My4146160Collator::count < 1)
   1058     {
   1059         errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
   1060     }
   1061 
   1062     My4146160Collator::count = 0;
   1063     mc->compare("1", "2");
   1064 
   1065     if (My4146160Collator::count < 1)
   1066     {
   1067         errln("My4146160Collator::createtCollationElementIterator not called for compare");
   1068     }
   1069 
   1070     delete mc;
   1071 #endif
   1072 }
   1073 
   1074 void CollationRegressionTest::Test4179216() {
   1075     // you can position a CollationElementIterator in the middle of
   1076     // a contracting character sequence, yielding a bogus collation
   1077     // element
   1078     IcuTestErrorCode errorCode(*this, "Test4179216");
   1079     RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode);
   1080     UnicodeString testText = "church church catcatcher runcrunchynchy";
   1081     CollationElementIterator *iter = coll.createCollationElementIterator(testText);
   1082 
   1083     // test that the "ch" combination works properly
   1084     iter->setOffset(4, errorCode);
   1085     int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1086 
   1087     iter->reset();
   1088     int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1089 
   1090     iter->setOffset(5, errorCode);
   1091     int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1092 
   1093     // Compares and prints only 16-bit primary weights.
   1094     if (elt4 != elt0 || elt5 != elt0) {
   1095         errln("The collation elements at positions 0 (0x%04x), "
   1096                 "4 (0x%04x), and 5 (0x%04x) don't match.",
   1097                 elt0, elt4, elt5);
   1098     }
   1099 
   1100     // test that the "cat" combination works properly
   1101     iter->setOffset(14, errorCode);
   1102     int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1103 
   1104     iter->setOffset(15, errorCode);
   1105     int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1106 
   1107     iter->setOffset(16, errorCode);
   1108     int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1109 
   1110     iter->setOffset(17, errorCode);
   1111     int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1112 
   1113     iter->setOffset(18, errorCode);
   1114     int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1115 
   1116     iter->setOffset(19, errorCode);
   1117     int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1118 
   1119     // Compares and prints only 16-bit primary weights.
   1120     if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
   1121             || elt14 != elt18 || elt14 != elt19) {
   1122         errln("\"cat\" elements don't match: elt14 = 0x%04x, "
   1123                 "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, "
   1124                 "elt18 = 0x%04x, elt19 = 0x%04x",
   1125                 elt14, elt15, elt16, elt17, elt18, elt19);
   1126     }
   1127 
   1128     // now generate a complete list of the collation elements,
   1129     // first using next() and then using setOffset(), and
   1130     // make sure both interfaces return the same set of elements
   1131     iter->reset();
   1132 
   1133     int32_t elt = iter->next(errorCode);
   1134     int32_t count = 0;
   1135     while (elt != CollationElementIterator::NULLORDER) {
   1136         ++count;
   1137         elt = iter->next(errorCode);
   1138     }
   1139 
   1140     LocalArray<UnicodeString> nextElements(new UnicodeString[count]);
   1141     LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]);
   1142     int32_t lastPos = 0;
   1143 
   1144     iter->reset();
   1145     elt = iter->next(errorCode);
   1146     count = 0;
   1147     while (elt != CollationElementIterator::NULLORDER) {
   1148         nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
   1149         lastPos = iter->getOffset();
   1150         elt = iter->next(errorCode);
   1151     }
   1152     int32_t nextElementsLength = count;
   1153     count = 0;
   1154     for (int32_t i = 0; i < testText.length(); ) {
   1155         iter->setOffset(i, errorCode);
   1156         lastPos = iter->getOffset();
   1157         elt = iter->next(errorCode);
   1158         setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
   1159         i = iter->getOffset();
   1160     }
   1161     for (int32_t i = 0; i < nextElementsLength; i++) {
   1162         if (nextElements[i] == setOffsetElements[i]) {
   1163             logln(nextElements[i]);
   1164         } else {
   1165             errln(UnicodeString("Error: next() yielded ") + nextElements[i] +
   1166                 ", but setOffset() yielded " + setOffsetElements[i]);
   1167         }
   1168     }
   1169     delete iter;
   1170 }
   1171 
   1172 // Ticket 7189
   1173 //
   1174 // nextSortKeyPart incorrect for EO_S1 collation
   1175 static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) {
   1176     UCharIterator uiter;
   1177     uint32_t state[2] = { 0, 0 };
   1178     int32_t keyLen;
   1179     int32_t count = 8;
   1180 
   1181     uiter_setString(&uiter, text, len);
   1182     keyLen = 0;
   1183     while (TRUE) {
   1184         int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status);
   1185         if (U_FAILURE(status)) {
   1186             return -1;
   1187         }
   1188         if (keyPartLen == 0) {
   1189             break;
   1190         }
   1191         keyLen += keyPartLen;
   1192     }
   1193     return keyLen;
   1194 }
   1195 
   1196 void CollationRegressionTest::TestT7189() {
   1197     UErrorCode status = U_ZERO_ERROR;
   1198     UCollator *coll;
   1199     uint32_t i;
   1200 
   1201     static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = {
   1202     // "Achter De Hoven"
   1203         { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
   1204         // "ABC"
   1205         { 0x41, 0x42, 0x43, 0x00 },
   1206         // "HELLO world!"
   1207         { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
   1208     };
   1209 
   1210     static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = {
   1211     // "Achter de Hoven"
   1212         { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
   1213         // "abc"
   1214         { 0x61, 0x62, 0x63, 0x00 },
   1215         // "hello world!"
   1216         { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
   1217     };
   1218 
   1219     // Open the collator
   1220     coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status);
   1221     if (U_FAILURE(status)) {
   1222         errln("Failed to create a collator for short string EO_S1");
   1223         return;
   1224     }
   1225 
   1226     for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) {
   1227         uint8_t key1[100], key2[100];
   1228         int32_t len1, len2;
   1229 
   1230         len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status);
   1231         if (U_FAILURE(status)) {
   1232             errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]);
   1233             break;
   1234         }
   1235         len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status);
   1236         if (U_FAILURE(status)) {
   1237             errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]);
   1238             break;
   1239         }
   1240 
   1241         if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) {
   1242             errln(UnicodeString("Failed: Identical key\n") + "    text1: " + text1[i] + "\n" + "    text2: " + text2[i] + "\n" + "    key  : " + TestUtility::hex(key1, len1));
   1243         } else {
   1244             logln(UnicodeString("Keys produced -\n") + "    text1: " + text1[i] + "\n" + "    key1 : " + TestUtility::hex(key1, len1) + "\n" + "    text2: " + text2[i] + "\n" + "    key2 : "
   1245                     + TestUtility::hex(key2, len2));
   1246         }
   1247     }
   1248     ucol_close(coll);
   1249 }
   1250 
   1251 void CollationRegressionTest::TestCaseFirstCompression() {
   1252     RuleBasedCollator *col = (RuleBasedCollator *) en_us->clone();
   1253     UErrorCode status = U_ZERO_ERROR;
   1254 
   1255     // default
   1256     caseFirstCompressionSub(col, "default");
   1257 
   1258     // Upper first
   1259     col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
   1260     if (U_FAILURE(status)) {
   1261         errln("Failed to set UCOL_UPPER_FIRST");
   1262         return;
   1263     }
   1264     caseFirstCompressionSub(col, "upper first");
   1265 
   1266     // Lower first
   1267     col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
   1268     if (U_FAILURE(status)) {
   1269         errln("Failed to set UCOL_LOWER_FIRST");
   1270         return;
   1271     }
   1272     caseFirstCompressionSub(col, "lower first");
   1273 
   1274     delete col;
   1275 }
   1276 
   1277 void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) {
   1278     const int32_t maxLength = 50;
   1279 
   1280     UChar str1[maxLength];
   1281     UChar str2[maxLength];
   1282 
   1283     CollationKey key1, key2;
   1284 
   1285     for (int32_t len = 1; len <= maxLength; len++) {
   1286         int32_t i = 0;
   1287         for (; i < len - 1; i++) {
   1288             str1[i] = str2[i] = (UChar)0x61; // 'a'
   1289         }
   1290         str1[i] = (UChar)0x41; // 'A'
   1291         str2[i] = (UChar)0x61; // 'a'
   1292 
   1293         UErrorCode status = U_ZERO_ERROR;
   1294         col->getCollationKey(str1, len, key1, status);
   1295         col->getCollationKey(str2, len, key2, status);
   1296 
   1297         UCollationResult cmpKey = key1.compareTo(key2, status);
   1298         UCollationResult cmpCol = col->compare(str1, len, str2, len, status);
   1299 
   1300         if (U_FAILURE(status)) {
   1301             errln("Error in caseFirstCompressionSub");
   1302         } else if (cmpKey != cmpCol) {
   1303             errln((UnicodeString)"Inconsistent comparison(" + opt
   1304                 + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len)
   1305                 + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol);
   1306         }
   1307     }
   1308 }
   1309 
   1310 void CollationRegressionTest::TestTrailingComment() {
   1311     // ICU ticket #8070:
   1312     // Check that the rule parser handles a comment without terminating end-of-line.
   1313     IcuTestErrorCode errorCode(*this, "TestTrailingComment");
   1314     RuleBasedCollator coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"), errorCode);
   1315     UnicodeString a((UChar)0x61), b((UChar)0x62), c((UChar)0x63);
   1316     assertTrue("c<b", coll.compare(c, b) < 0);
   1317     assertTrue("b<a", coll.compare(b, a) < 0);
   1318 }
   1319 
   1320 void CollationRegressionTest::TestBeforeWithTooStrongAfter() {
   1321     // ICU ticket #9959:
   1322     // Forbid rules with a before-reset followed by a stronger relation.
   1323     IcuTestErrorCode errorCode(*this, "TestBeforeWithTooStrongAfter");
   1324     RuleBasedCollator before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorCode);
   1325     if(errorCode.isSuccess()) {
   1326         errln("should forbid before-2-reset followed by primary relation");
   1327     } else {
   1328         errorCode.reset();
   1329     }
   1330     RuleBasedCollator before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), errorCode);
   1331     if(errorCode.isSuccess()) {
   1332         errln("should forbid before-3-reset followed by primary or secondary relation");
   1333     } else {
   1334         errorCode.reset();
   1335     }
   1336 }
   1337 
   1338 void CollationRegressionTest::compareArray(Collator &c,
   1339                                            const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
   1340                                            int32_t testCount)
   1341 {
   1342     int32_t i;
   1343     Collator::EComparisonResult expectedResult = Collator::EQUAL;
   1344 
   1345     for (i = 0; i < testCount; i += 3)
   1346     {
   1347         UnicodeString source(tests[i]);
   1348         UnicodeString comparison(tests[i + 1]);
   1349         UnicodeString target(tests[i + 2]);
   1350 
   1351         if (comparison == "<")
   1352         {
   1353             expectedResult = Collator::LESS;
   1354         }
   1355         else if (comparison == ">")
   1356         {
   1357             expectedResult = Collator::GREATER;
   1358         }
   1359         else if (comparison == "=")
   1360         {
   1361             expectedResult = Collator::EQUAL;
   1362         }
   1363         else
   1364         {
   1365             UnicodeString bogus1("Bogus comparison string \"");
   1366             UnicodeString bogus2("\"");
   1367             errln(bogus1 + comparison + bogus2);
   1368         }
   1369 
   1370         Collator::EComparisonResult compareResult = c.compare(source, target);
   1371 
   1372         CollationKey sourceKey, targetKey;
   1373         UErrorCode status = U_ZERO_ERROR;
   1374 
   1375         c.getCollationKey(source, sourceKey, status);
   1376 
   1377         if (U_FAILURE(status))
   1378         {
   1379             errln("Couldn't get collationKey for source");
   1380             continue;
   1381         }
   1382 
   1383         c.getCollationKey(target, targetKey, status);
   1384 
   1385         if (U_FAILURE(status))
   1386         {
   1387             errln("Couldn't get collationKey for target");
   1388             continue;
   1389         }
   1390 
   1391         Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);
   1392 
   1393         reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
   1394 
   1395     }
   1396 }
   1397 
   1398 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
   1399 {
   1400     int32_t c1, c2, count = 0;
   1401     UErrorCode status = U_ZERO_ERROR;
   1402 
   1403     do
   1404     {
   1405         c1 = i1.next(status);
   1406         c2 = i2.next(status);
   1407 
   1408         if (c1 != c2)
   1409         {
   1410             UnicodeString msg, msg1("    ");
   1411 
   1412             msg += msg1 + count;
   1413             msg += ": strength(0x";
   1414             appendHex(c1, 8, msg);
   1415             msg += ") != strength(0x";
   1416             appendHex(c2, 8, msg);
   1417             msg += ")";
   1418 
   1419             errln(msg);
   1420             break;
   1421         }
   1422 
   1423         count += 1;
   1424     }
   1425     while (c1 != CollationElementIterator::NULLORDER);
   1426 }
   1427 
   1428 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
   1429 {
   1430     if (exec)
   1431     {
   1432         logln("Collation Regression Tests: ");
   1433     }
   1434 
   1435     if(en_us == NULL) {
   1436         dataerrln("Class collator not instantiated");
   1437         name = "";
   1438         return;
   1439     }
   1440     TESTCASE_AUTO_BEGIN;
   1441     TESTCASE_AUTO(Test4048446);
   1442     TESTCASE_AUTO(Test4051866);
   1443     TESTCASE_AUTO(Test4053636);
   1444     TESTCASE_AUTO(Test4054238);
   1445     TESTCASE_AUTO(Test4054734);
   1446     TESTCASE_AUTO(Test4054736);
   1447     TESTCASE_AUTO(Test4058613);
   1448     TESTCASE_AUTO(Test4059820);
   1449     TESTCASE_AUTO(Test4060154);
   1450     TESTCASE_AUTO(Test4062418);
   1451     TESTCASE_AUTO(Test4065540);
   1452     TESTCASE_AUTO(Test4066189);
   1453     TESTCASE_AUTO(Test4066696);
   1454     TESTCASE_AUTO(Test4076676);
   1455     TESTCASE_AUTO(Test4078588);
   1456     TESTCASE_AUTO(Test4079231);
   1457     TESTCASE_AUTO(Test4081866);
   1458     TESTCASE_AUTO(Test4087241);
   1459     TESTCASE_AUTO(Test4087243);
   1460     TESTCASE_AUTO(Test4092260);
   1461     TESTCASE_AUTO(Test4095316);
   1462     TESTCASE_AUTO(Test4101940);
   1463     TESTCASE_AUTO(Test4103436);
   1464     TESTCASE_AUTO(Test4114076);
   1465     TESTCASE_AUTO(Test4114077);
   1466     TESTCASE_AUTO(Test4124632);
   1467     TESTCASE_AUTO(Test4132736);
   1468     TESTCASE_AUTO(Test4133509);
   1469     TESTCASE_AUTO(Test4139572);
   1470     TESTCASE_AUTO(Test4141640);
   1471     TESTCASE_AUTO(Test4146160);
   1472     TESTCASE_AUTO(Test4179216);
   1473     TESTCASE_AUTO(TestT7189);
   1474     TESTCASE_AUTO(TestCaseFirstCompression);
   1475     TESTCASE_AUTO(TestTrailingComment);
   1476     TESTCASE_AUTO(TestBeforeWithTooStrongAfter);
   1477     TESTCASE_AUTO_END;
   1478 }
   1479 
   1480 #endif /* #if !UCONFIG_NO_COLLATION */
   1481