Home | History | Annotate | Download | only in intltest
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2014, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 
      7 #include "unicode/utypes.h"
      8 
      9 #if !UCONFIG_NO_COLLATION
     10 
     11 #include "unicode/coll.h"
     12 #include "unicode/localpointer.h"
     13 #include "unicode/tblcoll.h"
     14 #include "unicode/unistr.h"
     15 #include "unicode/sortkey.h"
     16 #include "regcoll.h"
     17 #include "sfwdchit.h"
     18 #include "testutil.h"
     19 #include "cmemory.h"
     20 
     21 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
     22 
     23 CollationRegressionTest::CollationRegressionTest()
     24 {
     25     UErrorCode status = U_ZERO_ERROR;
     26 
     27     en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
     28     if(U_FAILURE(status)) {
     29       delete en_us;
     30       en_us = 0;
     31       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
     32       return;
     33     }
     34 }
     35 
     36 CollationRegressionTest::~CollationRegressionTest()
     37 {
     38     delete en_us;
     39 }
     40 
     41 
     42     // @bug 4048446
     43 //
     44 // CollationElementIterator.reset() doesn't work
     45 //
     46 void CollationRegressionTest::Test4048446(/* char* par */)
     47 {
     48     const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
     49     const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
     50     CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
     51     CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
     52     UErrorCode status = U_ZERO_ERROR;
     53 
     54     if (i1 == NULL|| i2 == NULL)
     55     {
     56         errln("Could not create CollationElementIterator's");
     57         delete i1;
     58         delete i2;
     59         return;
     60     }
     61 
     62     while (i1->next(status) != CollationElementIterator::NULLORDER)
     63     {
     64         if (U_FAILURE(status))
     65         {
     66             errln("error calling next()");
     67 
     68             delete i1;
     69             delete i2;
     70             return;
     71         }
     72     }
     73 
     74     i1->reset();
     75 
     76     assertEqual(*i1, *i2);
     77 
     78     delete i1;
     79     delete i2;
     80 }
     81 
     82 // @bug 4051866
     83 //
     84 // Collator -> rules -> Collator round-trip broken for expanding characters
     85 //
     86 void CollationRegressionTest::Test4051866(/* char* par */)
     87 {
     88     UnicodeString rules;
     89     UErrorCode status = U_ZERO_ERROR;
     90 
     91     rules += "&n < o ";
     92     rules += "& oe ,o";
     93     rules += (UChar)0x3080;
     94     rules += "& oe ,";
     95     rules += (UChar)0x1530;
     96     rules += " ,O";
     97     rules += "& OE ,O";
     98     rules += (UChar)0x3080;
     99     rules += "& OE ,";
    100     rules += (UChar)0x1520;
    101     rules += "< p ,P";
    102 
    103     // Build a collator containing expanding characters
    104     LocalPointer<RuleBasedCollator> c1(new RuleBasedCollator(rules, status), status);
    105     if (U_FAILURE(status)) {
    106         errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status));
    107         return;
    108     }
    109 
    110     // Build another using the rules from  the first
    111     LocalPointer<RuleBasedCollator> c2(new RuleBasedCollator(c1->getRules(), status), status);
    112     if (U_FAILURE(status)) {
    113         errln("RuleBasedCollator(rule string from other RBC) failed - %s", u_errorName(status));
    114         return;
    115     }
    116 
    117     // Make sure they're the same
    118     if (!(c1->getRules() == c2->getRules()))
    119     {
    120         errln("Rules are not equal");
    121     }
    122 }
    123 
    124 // @bug 4053636
    125 //
    126 // Collator thinks "black-bird" == "black"
    127 //
    128 void CollationRegressionTest::Test4053636(/* char* par */)
    129 {
    130     if (en_us->equals("black_bird", "black"))
    131     {
    132         errln("black-bird == black");
    133     }
    134 }
    135 
    136 // @bug 4054238
    137 //
    138 // CollationElementIterator will not work correctly if the associated
    139 // Collator object's mode is changed
    140 //
    141 void CollationRegressionTest::Test4054238(/* char* par */)
    142 {
    143     const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
    144     const UnicodeString test3(chars3);
    145     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    146 
    147     // NOTE: The Java code uses en_us to create the CollationElementIterators
    148     // but I'm pretty sure that's wrong, so I've changed this to use c.
    149     UErrorCode status = U_ZERO_ERROR;
    150     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    151     CollationElementIterator *i1 = c->createCollationElementIterator(test3);
    152     delete i1;
    153     delete c;
    154 }
    155 
    156 // @bug 4054734
    157 //
    158 // Collator::IDENTICAL documented but not implemented
    159 //
    160 void CollationRegressionTest::Test4054734(/* char* par */)
    161 {
    162     /*
    163         Here's the original Java:
    164 
    165         String[] decomp = {
    166             "\u0001",   "<",    "\u0002",
    167             "\u0001",   "=",    "\u0001",
    168             "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
    169             "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
    170         };
    171 
    172         String[] nodecomp = {
    173             "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
    174         };
    175     */
    176 
    177     static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
    178     {
    179         {0x0001, 0},      {0x3c, 0}, {0x0002, 0},
    180         {0x0001, 0},      {0x3d, 0}, {0x0001, 0},
    181         {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
    182         {0x00c0, 0},      {0x3d, 0}, {0x41, 0x0300, 0}
    183     };
    184 
    185 
    186     UErrorCode status = U_ZERO_ERROR;
    187     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    188 
    189     c->setStrength(Collator::IDENTICAL);
    190 
    191     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    192     compareArray(*c, decomp, ARRAY_LENGTH(decomp));
    193 
    194     delete c;
    195 }
    196 
    197 // @bug 4054736
    198 //
    199 // Full Decomposition mode not implemented
    200 //
    201 void CollationRegressionTest::Test4054736(/* char* par */)
    202 {
    203     UErrorCode status = U_ZERO_ERROR;
    204     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    205 
    206     c->setStrength(Collator::SECONDARY);
    207     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    208 
    209     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    210     {
    211         {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC}  // Alef-Lamed vs. Alef, Lamed
    212     };
    213 
    214     compareArray(*c, tests, ARRAY_LENGTH(tests));
    215 
    216     delete c;
    217 }
    218 
    219 // @bug 4058613
    220 //
    221 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
    222 //
    223 void CollationRegressionTest::Test4058613(/* char* par */)
    224 {
    225     // Creating a default collator doesn't work when Korean is the default
    226     // locale
    227 
    228     Locale oldDefault = Locale::getDefault();
    229     UErrorCode status = U_ZERO_ERROR;
    230 
    231     Locale::setDefault(Locale::getKorean(), status);
    232 
    233     if (U_FAILURE(status))
    234     {
    235         errln("Could not set default locale to Locale::KOREAN");
    236         return;
    237     }
    238 
    239     Collator *c = NULL;
    240 
    241     c = Collator::createInstance("en_US", status);
    242 
    243     if (c == NULL || U_FAILURE(status))
    244     {
    245         errln("Could not create a Korean collator");
    246         Locale::setDefault(oldDefault, status);
    247         delete c;
    248         return;
    249     }
    250 
    251     // Since the fix to this bug was to turn off decomposition for Korean collators,
    252     // ensure that's what we got
    253     if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
    254     {
    255       errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
    256     }
    257 
    258     delete c;
    259 
    260     Locale::setDefault(oldDefault, status);
    261 }
    262 
    263 // @bug 4059820
    264 //
    265 // RuleBasedCollator.getRules does not return the exact pattern as input
    266 // for expanding character sequences
    267 //
    268 void CollationRegressionTest::Test4059820(/* char* par */)
    269 {
    270     UErrorCode status = U_ZERO_ERROR;
    271 
    272     RuleBasedCollator *c = NULL;
    273     UnicodeString rules = "&9 < a < b , c/a < d < z";
    274 
    275     c = new RuleBasedCollator(rules, status);
    276 
    277     if (c == NULL || U_FAILURE(status))
    278     {
    279         errln("Failure building a collator.");
    280         delete c;
    281         return;
    282     }
    283 
    284     if ( c->getRules().indexOf("c/a") == -1)
    285     {
    286         errln("returned rules do not contain 'c/a'");
    287     }
    288 
    289     delete c;
    290 }
    291 
    292 // @bug 4060154
    293 //
    294 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
    295 //
    296 void CollationRegressionTest::Test4060154(/* char* par */)
    297 {
    298     UErrorCode status = U_ZERO_ERROR;
    299     UnicodeString rules;
    300 
    301     rules += "&f < g, G < h, H < i, I < j, J";
    302     rules +=  " & H < ";
    303     rules += (UChar)0x0131;
    304     rules += ", ";
    305     rules += (UChar)0x0130;
    306     rules += ", i, I";
    307 
    308     RuleBasedCollator *c = NULL;
    309 
    310     c = new RuleBasedCollator(rules, status);
    311 
    312     if (c == NULL || U_FAILURE(status))
    313     {
    314         errln("failure building collator.");
    315         delete c;
    316         return;
    317     }
    318 
    319     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    320 
    321  /*
    322     String[] tertiary = {
    323         "A",        "<",    "B",
    324         "H",        "<",    "\u0131",
    325         "H",        "<",    "I",
    326         "\u0131",   "<",    "\u0130",
    327         "\u0130",   "<",    "i",
    328         "\u0130",   ">",    "H",
    329     };
    330 */
    331 
    332     static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
    333     {
    334         {0x41, 0},    {0x3c, 0}, {0x42, 0},
    335         {0x48, 0},    {0x3c, 0}, {0x0131, 0},
    336         {0x48, 0},    {0x3c, 0}, {0x49, 0},
    337         {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
    338         {0x0130, 0}, {0x3c, 0}, {0x69, 0},
    339         {0x0130, 0}, {0x3e, 0}, {0x48, 0}
    340     };
    341 
    342     c->setStrength(Collator::TERTIARY);
    343     compareArray(*c, tertiary, ARRAY_LENGTH(tertiary));
    344 
    345     /*
    346     String[] secondary = {
    347         "H",        "<",    "I",
    348         "\u0131",   "=",    "\u0130",
    349     };
    350 */
    351     static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
    352     {
    353         {0x48, 0},    {0x3c, 0}, {0x49, 0},
    354         {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
    355     };
    356 
    357     c->setStrength(Collator::PRIMARY);
    358     compareArray(*c, secondary, ARRAY_LENGTH(secondary));
    359 
    360     delete c;
    361 }
    362 
    363 // @bug 4062418
    364 //
    365 // Secondary/Tertiary comparison incorrect in French Secondary
    366 //
    367 void CollationRegressionTest::Test4062418(/* char* par */)
    368 {
    369     UErrorCode status = U_ZERO_ERROR;
    370 
    371     RuleBasedCollator *c = NULL;
    372 
    373     c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status);
    374 
    375     if (c == NULL || U_FAILURE(status))
    376     {
    377         errln("Failed to create collator for Locale::getCanadaFrench()");
    378         delete c;
    379         return;
    380     }
    381 
    382     c->setStrength(Collator::SECONDARY);
    383 
    384 /*
    385     String[] tests = {
    386             "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
    387     };
    388 */
    389     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    390     {
    391         {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
    392     };
    393 
    394     compareArray(*c, tests, ARRAY_LENGTH(tests));
    395 
    396     delete c;
    397 }
    398 
    399 // @bug 4065540
    400 //
    401 // Collator::compare() method broken if either string contains spaces
    402 //
    403 void CollationRegressionTest::Test4065540(/* char* par */)
    404 {
    405     if (en_us->compare("abcd e", "abcd f") == 0)
    406     {
    407         errln("'abcd e' == 'abcd f'");
    408     }
    409 }
    410 
    411 // @bug 4066189
    412 //
    413 // Unicode characters need to be recursively decomposed to get the
    414 // correct result. For example,
    415 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
    416 //
    417 void CollationRegressionTest::Test4066189(/* char* par */)
    418 {
    419     static const UChar chars1[] = {0x1EB1, 0};
    420     static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0};
    421     const UnicodeString test1(chars1);
    422     const UnicodeString test2(chars2);
    423     UErrorCode status = U_ZERO_ERROR;
    424 
    425     // NOTE: The java code used en_us to create the
    426     // CollationElementIterator's. I'm pretty sure that
    427     // was wrong, so I've change the code to use c1 and c2
    428     RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone();
    429     c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    430     CollationElementIterator *i1 = c1->createCollationElementIterator(test1);
    431 
    432     RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone();
    433     c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
    434     CollationElementIterator *i2 = c2->createCollationElementIterator(test2);
    435 
    436     assertEqual(*i1, *i2);
    437 
    438     delete i2;
    439     delete c2;
    440     delete i1;
    441     delete c1;
    442 }
    443 
    444 // @bug 4066696
    445 //
    446 // French secondary collation checking at the end of compare iteration fails
    447 //
    448 void CollationRegressionTest::Test4066696(/* char* par */)
    449 {
    450     UErrorCode status = U_ZERO_ERROR;
    451     RuleBasedCollator *c = NULL;
    452 
    453     c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status);
    454 
    455     if (c == NULL || U_FAILURE(status))
    456     {
    457         errln("Failure creating collator for Locale::getCanadaFrench()");
    458         delete c;
    459         return;
    460     }
    461 
    462     c->setStrength(Collator::SECONDARY);
    463 
    464 /*
    465     String[] tests = {
    466         "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
    467     };
    468 
    469   should be:
    470 
    471     String[] tests = {
    472         "\u00e0",   ">",     "\u01fa",       // a-grave <  A-ring-acute
    473     };
    474 
    475 */
    476 
    477     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    478     {
    479         {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
    480     };
    481 
    482     compareArray(*c, tests, ARRAY_LENGTH(tests));
    483 
    484     delete c;
    485 }
    486 
    487 // @bug 4076676
    488 //
    489 // Bad canonicalization of same-class combining characters
    490 //
    491 void CollationRegressionTest::Test4076676(/* char* par */)
    492 {
    493     // These combining characters are all in the same class, so they should not
    494     // be reordered, and they should compare as unequal.
    495     static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
    496     static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
    497 
    498     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    499     c->setStrength(Collator::TERTIARY);
    500 
    501     if (c->compare(s1,s2) == 0)
    502     {
    503         errln("Same-class combining chars were reordered");
    504     }
    505 
    506     delete c;
    507 }
    508 
    509 // @bug 4079231
    510 //
    511 // RuleBasedCollator::operator==(NULL) throws NullPointerException
    512 //
    513 void CollationRegressionTest::Test4079231(/* char* par */)
    514 {
    515     // I don't think there's any way to write this test
    516     // in C++. The following is equivalent to the Java,
    517     // but doesn't compile 'cause NULL can't be converted
    518     // to Collator&
    519     //
    520     // if (en_us->operator==(NULL))
    521     // {
    522     //     errln("en_us->operator==(NULL) returned TRUE");
    523     // }
    524 
    525  /*
    526    try {
    527         if (en_us->equals(null)) {
    528             errln("en_us->equals(null) returned true");
    529         }
    530     }
    531     catch (Exception e) {
    532         errln("en_us->equals(null) threw " + e.toString());
    533     }
    534 */
    535 }
    536 
    537 // @bug 4078588
    538 //
    539 // RuleBasedCollator breaks on "< a < bb" rule
    540 //
    541 void CollationRegressionTest::Test4078588(/* char *par */)
    542 {
    543     UErrorCode status = U_ZERO_ERROR;
    544     RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status);
    545 
    546     if (rbc == NULL || U_FAILURE(status))
    547     {
    548         errln("Failed to create RuleBasedCollator.");
    549         delete rbc;
    550         return;
    551     }
    552 
    553     Collator::EComparisonResult result = rbc->compare("a","bb");
    554 
    555     if (result != Collator::LESS)
    556     {
    557         errln((UnicodeString)"Compare(a,bb) returned " + (int)result
    558             + (UnicodeString)"; expected -1");
    559     }
    560 
    561     delete rbc;
    562 }
    563 
    564 // @bug 4081866
    565 //
    566 // Combining characters in different classes not reordered properly.
    567 //
    568 void CollationRegressionTest::Test4081866(/* char* par */)
    569 {
    570     // These combining characters are all in different classes,
    571     // so they should be reordered and the strings should compare as equal.
    572     static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
    573     static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
    574 
    575     UErrorCode status = U_ZERO_ERROR;
    576     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    577     c->setStrength(Collator::TERTIARY);
    578 
    579     // Now that the default collators are set to NO_DECOMPOSITION
    580     // (as a result of fixing bug 4114077), we must set it explicitly
    581     // when we're testing reordering behavior.  -- lwerner, 5/5/98
    582     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    583 
    584     if (c->compare(s1,s2) != 0)
    585     {
    586         errln("Combining chars were not reordered");
    587     }
    588 
    589     delete c;
    590 }
    591 
    592 // @bug 4087241
    593 //
    594 // string comparison errors in Scandinavian collators
    595 //
    596 void CollationRegressionTest::Test4087241(/* char* par */)
    597 {
    598     UErrorCode status = U_ZERO_ERROR;
    599     Locale da_DK("da", "DK");
    600     RuleBasedCollator *c = NULL;
    601 
    602     c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);
    603 
    604     if (c == NULL || U_FAILURE(status))
    605     {
    606         errln("Failed to create collator for da_DK locale");
    607         delete c;
    608         return;
    609     }
    610 
    611     c->setStrength(Collator::SECONDARY);
    612 
    613     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    614     {
    615         {0x7a, 0},          {0x3c, 0}, {0x00E6, 0},            // z        < ae
    616         {0x61, 0x0308, 0},  {0x3c, 0}, {0x61, 0x030A, 0},      // a-umlaut < a-ring
    617         {0x59, 0},          {0x3c, 0}, {0x75, 0x0308, 0},      // Y        < u-umlaut
    618     };
    619 
    620     compareArray(*c, tests, ARRAY_LENGTH(tests));
    621 
    622     delete c;
    623 }
    624 
    625 // @bug 4087243
    626 //
    627 // CollationKey takes ignorable strings into account when it shouldn't
    628 //
    629 void CollationRegressionTest::Test4087243(/* char* par */)
    630 {
    631     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    632     c->setStrength(Collator::TERTIARY);
    633 
    634     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    635     {
    636         {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0}    // 1 2 3  =  1 2 3 ctrl-A
    637     };
    638 
    639     compareArray(*c, tests, ARRAY_LENGTH(tests));
    640 
    641     delete c;
    642 }
    643 
    644 // @bug 4092260
    645 //
    646 // Mu/micro conflict
    647 // Micro symbol and greek lowercase letter Mu should sort identically
    648 //
    649 void CollationRegressionTest::Test4092260(/* char* par */)
    650 {
    651     UErrorCode status = U_ZERO_ERROR;
    652     Locale el("el", "");
    653     Collator *c = NULL;
    654 
    655     c = Collator::createInstance(el, status);
    656 
    657     if (c == NULL || U_FAILURE(status))
    658     {
    659         errln("Failed to create collator for el locale.");
    660         delete c;
    661         return;
    662     }
    663 
    664     // These now have tertiary differences in UCA
    665     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
    666 
    667     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    668     {
    669         {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
    670     };
    671 
    672     compareArray(*c, tests, ARRAY_LENGTH(tests));
    673 
    674     delete c;
    675 }
    676 
    677 // @bug 4095316
    678 //
    679 void CollationRegressionTest::Test4095316(/* char* par */)
    680 {
    681     UErrorCode status = U_ZERO_ERROR;
    682     Locale el_GR("el", "GR");
    683     Collator *c = Collator::createInstance(el_GR, status);
    684 
    685     if (c == NULL || U_FAILURE(status))
    686     {
    687         errln("Failed to create collator for el_GR locale");
    688         delete c;
    689         return;
    690     }
    691     // These now have tertiary differences in UCA
    692     //c->setStrength(Collator::TERTIARY);
    693     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
    694 
    695     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    696     {
    697         {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
    698     };
    699 
    700     compareArray(*c, tests, ARRAY_LENGTH(tests));
    701 
    702     delete c;
    703 }
    704 
    705 // @bug 4101940
    706 //
    707 void CollationRegressionTest::Test4101940(/* char* par */)
    708 {
    709     UErrorCode status = U_ZERO_ERROR;
    710     RuleBasedCollator *c = NULL;
    711     UnicodeString rules = "&9 < a < b";
    712     UnicodeString nothing = "";
    713 
    714     c = new RuleBasedCollator(rules, status);
    715 
    716     if (c == NULL || U_FAILURE(status))
    717     {
    718         errln("Failed to create RuleBasedCollator");
    719         delete c;
    720         return;
    721     }
    722 
    723     CollationElementIterator *i = c->createCollationElementIterator(nothing);
    724     i->reset();
    725 
    726     if (i->next(status) != CollationElementIterator::NULLORDER)
    727     {
    728         errln("next did not return NULLORDER");
    729     }
    730 
    731     delete i;
    732     delete c;
    733 }
    734 
    735 // @bug 4103436
    736 //
    737 // Collator::compare not handling spaces properly
    738 //
    739 void CollationRegressionTest::Test4103436(/* char* par */)
    740 {
    741     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    742     c->setStrength(Collator::TERTIARY);
    743 
    744     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    745     {
    746         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
    747         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
    748     };
    749 
    750     compareArray(*c, tests, ARRAY_LENGTH(tests));
    751 
    752     delete c;
    753 }
    754 
    755 // @bug 4114076
    756 //
    757 // Collation not Unicode conformant with Hangul syllables
    758 //
    759 void CollationRegressionTest::Test4114076(/* char* par */)
    760 {
    761     UErrorCode status = U_ZERO_ERROR;
    762     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    763     c->setStrength(Collator::TERTIARY);
    764 
    765     //
    766     // With Canonical decomposition, Hangul syllables should get decomposed
    767     // into Jamo, but Jamo characters should not be decomposed into
    768     // conjoining Jamo
    769     //
    770     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
    771     {
    772         {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
    773     };
    774 
    775     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    776     compareArray(*c, test1, ARRAY_LENGTH(test1));
    777 
    778     // From UTR #15:
    779     // *In earlier versions of Unicode, jamo characters like ksf
    780     //  had compatibility mappings to kf + sf. These mappings were
    781     //  removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
    782     // That is, the following test is obsolete as of 2.1.9
    783 
    784 //obsolete-    // With Full decomposition, it should go all the way down to
    785 //obsolete-    // conjoining Jamo characters.
    786 //obsolete-    //
    787 //obsolete-    static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
    788 //obsolete-    {
    789 //obsolete-        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
    790 //obsolete-    };
    791 //obsolete-
    792 //obsolete-    c->setDecomposition(Normalizer::DECOMP_COMPAT);
    793 //obsolete-    compareArray(*c, test2, ARRAY_LENGTH(test2));
    794 
    795     delete c;
    796 }
    797 
    798 
    799 // @bug 4124632
    800 //
    801 // Collator::getCollationKey was hanging on certain character sequences
    802 //
    803 void CollationRegressionTest::Test4124632(/* char* par */)
    804 {
    805     UErrorCode status = U_ZERO_ERROR;
    806     Collator *coll = NULL;
    807 
    808     coll = Collator::createInstance(Locale::getJapan(), status);
    809 
    810     if (coll == NULL || U_FAILURE(status))
    811     {
    812         errln("Failed to create collator for Locale::JAPAN");
    813         delete coll;
    814         return;
    815     }
    816 
    817     static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0};
    818     CollationKey key;
    819 
    820     coll->getCollationKey(test, key, status);
    821 
    822     if (key.isBogus() || U_FAILURE(status))
    823     {
    824         errln("CollationKey creation failed.");
    825     }
    826 
    827     delete coll;
    828 }
    829 
    830 // @bug 4132736
    831 //
    832 // sort order of french words with multiple accents has errors
    833 //
    834 void CollationRegressionTest::Test4132736(/* char* par */)
    835 {
    836     UErrorCode status = U_ZERO_ERROR;
    837 
    838     Collator *c = NULL;
    839 
    840     c = Collator::createInstance(Locale::getCanadaFrench(), status);
    841     c->setStrength(Collator::TERTIARY);
    842 
    843     if (c == NULL || U_FAILURE(status))
    844     {
    845         errln("Failed to create a collator for Locale::getCanadaFrench()");
    846         delete c;
    847         return;
    848     }
    849 
    850     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
    851     {
    852         {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
    853         {0x65, 0x0300, 0x0301, 0},       {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
    854     };
    855 
    856     compareArray(*c, test1, ARRAY_LENGTH(test1));
    857 
    858     delete c;
    859 }
    860 
    861 // @bug 4133509
    862 //
    863 // The sorting using java.text.CollationKey is not in the exact order
    864 //
    865 void CollationRegressionTest::Test4133509(/* char* par */)
    866 {
    867     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
    868     {
    869         {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
    870         {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0},      {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
    871         {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0},                  {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
    872     };
    873 
    874     compareArray(*en_us, test1, ARRAY_LENGTH(test1));
    875 }
    876 
    877 // @bug 4114077
    878 //
    879 // Collation with decomposition off doesn't work for Europe
    880 //
    881 void CollationRegressionTest::Test4114077(/* char* par */)
    882 {
    883     // Ensure that we get the same results with decomposition off
    884     // as we do with it on....
    885 
    886     UErrorCode status = U_ZERO_ERROR;
    887     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    888     c->setStrength(Collator::TERTIARY);
    889 
    890     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
    891     {
    892         {0x00C0, 0},                     {0x3d, 0}, {0x41, 0x0300, 0},            // Should be equivalent
    893         {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
    894         {0x0204, 0},                     {0x3d, 0}, {0x45, 0x030F, 0},
    895         {0x01fa, 0},                     {0x3d, 0}, {0x41, 0x030a, 0x0301, 0},    // a-ring-acute -> a-ring, acute
    896                                                 //   -> a, ring, acute
    897         {0x41, 0x0300, 0x0316, 0},         {0x3c, 0}, {0x41, 0x0316, 0x0300, 0}        // No reordering --> unequal
    898     };
    899 
    900     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
    901     compareArray(*c, test1, ARRAY_LENGTH(test1));
    902 
    903     static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
    904     {
    905         {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0}      // Reordering --> equal
    906     };
    907 
    908     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    909     compareArray(*c, test2, ARRAY_LENGTH(test2));
    910 
    911     delete c;
    912 }
    913 
    914 // @bug 4141640
    915 //
    916 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
    917 //
    918 void CollationRegressionTest::Test4141640(/* char* par */)
    919 {
    920     //
    921     // Rather than just creating a Swedish collator, we might as well
    922     // try to instantiate one for every locale available on the system
    923     // in order to prevent this sort of bug from cropping up in the future
    924     //
    925     UErrorCode status = U_ZERO_ERROR;
    926     int32_t i, localeCount;
    927     const Locale *locales = Locale::getAvailableLocales(localeCount);
    928 
    929     for (i = 0; i < localeCount; i += 1)
    930     {
    931         Collator *c = NULL;
    932 
    933         status = U_ZERO_ERROR;
    934         c = Collator::createInstance(locales[i], status);
    935 
    936         if (c == NULL || U_FAILURE(status))
    937         {
    938             UnicodeString msg, localeName;
    939 
    940             msg += "Could not create collator for locale ";
    941             msg += locales[i].getName();
    942 
    943             errln(msg);
    944         }
    945 
    946         delete c;
    947     }
    948 }
    949 
    950 // @bug 4139572
    951 //
    952 // getCollationKey throws exception for spanish text
    953 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
    954 //
    955 void CollationRegressionTest::Test4139572(/* char* par */)
    956 {
    957     //
    958     // Code pasted straight from the bug report
    959     // (and then translated to C++ ;-)
    960     //
    961     // create spanish locale and collator
    962     UErrorCode status = U_ZERO_ERROR;
    963     Locale l("es", "es");
    964     Collator *col = NULL;
    965 
    966     col = Collator::createInstance(l, status);
    967 
    968     if (col == NULL || U_FAILURE(status))
    969     {
    970         errln("Failed to create a collator for es_es locale.");
    971         delete col;
    972         return;
    973     }
    974 
    975     CollationKey key;
    976 
    977     // this spanish phrase kills it!
    978     col->getCollationKey("Nombre De Objeto", key, status);
    979 
    980     if (key.isBogus() || U_FAILURE(status))
    981     {
    982         errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
    983     }
    984 
    985     delete col;
    986 }
    987 /* HSYS : RuleBasedCollator::compare() performance enhancements
    988           compare() does not create CollationElementIterator() anymore.*/
    989 
    990 class My4146160Collator : public RuleBasedCollator
    991 {
    992 public:
    993     My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status);
    994     ~My4146160Collator();
    995 
    996     CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const;
    997 
    998     CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const;
    999 
   1000     static int32_t count;
   1001 };
   1002 
   1003 int32_t My4146160Collator::count = 0;
   1004 
   1005 My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status)
   1006   : RuleBasedCollator(rbc.getRules(), status)
   1007 {
   1008 }
   1009 
   1010 My4146160Collator::~My4146160Collator()
   1011 {
   1012 }
   1013 
   1014 CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const
   1015 {
   1016     count += 1;
   1017     return RuleBasedCollator::createCollationElementIterator(text);
   1018 }
   1019 
   1020 CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const
   1021 {
   1022     count += 1;
   1023     return RuleBasedCollator::createCollationElementIterator(text);
   1024 }
   1025 
   1026 // @bug 4146160
   1027 //
   1028 // RuleBasedCollator doesn't use createCollationElementIterator internally
   1029 //
   1030 void CollationRegressionTest::Test4146160(/* char* par */)
   1031 {
   1032 #if 0
   1033     //
   1034     // Use a custom collator class whose createCollationElementIterator
   1035     // methods increment a count....
   1036     //
   1037     UErrorCode status = U_ZERO_ERROR;
   1038     CollationKey key;
   1039 
   1040     My4146160Collator::count = 0;
   1041     My4146160Collator *mc = NULL;
   1042 
   1043     mc = new My4146160Collator(*en_us, status);
   1044 
   1045     if (mc == NULL || U_FAILURE(status))
   1046     {
   1047         errln("Failed to create a My4146160Collator.");
   1048         delete mc;
   1049         return;
   1050     }
   1051 
   1052     mc->getCollationKey("1", key, status);
   1053 
   1054     if (key.isBogus() || U_FAILURE(status))
   1055     {
   1056         errln("Failure to get a CollationKey from a My4146160Collator.");
   1057         delete mc;
   1058         return;
   1059     }
   1060 
   1061     if (My4146160Collator::count < 1)
   1062     {
   1063         errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
   1064     }
   1065 
   1066     My4146160Collator::count = 0;
   1067     mc->compare("1", "2");
   1068 
   1069     if (My4146160Collator::count < 1)
   1070     {
   1071         errln("My4146160Collator::createtCollationElementIterator not called for compare");
   1072     }
   1073 
   1074     delete mc;
   1075 #endif
   1076 }
   1077 
   1078 void CollationRegressionTest::Test4179216() {
   1079     // you can position a CollationElementIterator in the middle of
   1080     // a contracting character sequence, yielding a bogus collation
   1081     // element
   1082     IcuTestErrorCode errorCode(*this, "Test4179216");
   1083     RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode);
   1084     UnicodeString testText = "church church catcatcher runcrunchynchy";
   1085     CollationElementIterator *iter = coll.createCollationElementIterator(testText);
   1086 
   1087     // test that the "ch" combination works properly
   1088     iter->setOffset(4, errorCode);
   1089     int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1090 
   1091     iter->reset();
   1092     int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1093 
   1094     iter->setOffset(5, errorCode);
   1095     int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1096 
   1097     // Compares and prints only 16-bit primary weights.
   1098     if (elt4 != elt0 || elt5 != elt0) {
   1099         errln("The collation elements at positions 0 (0x%04x), "
   1100                 "4 (0x%04x), and 5 (0x%04x) don't match.",
   1101                 elt0, elt4, elt5);
   1102     }
   1103 
   1104     // test that the "cat" combination works properly
   1105     iter->setOffset(14, errorCode);
   1106     int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1107 
   1108     iter->setOffset(15, errorCode);
   1109     int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1110 
   1111     iter->setOffset(16, errorCode);
   1112     int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1113 
   1114     iter->setOffset(17, errorCode);
   1115     int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1116 
   1117     iter->setOffset(18, errorCode);
   1118     int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1119 
   1120     iter->setOffset(19, errorCode);
   1121     int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode));
   1122 
   1123     // Compares and prints only 16-bit primary weights.
   1124     if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
   1125             || elt14 != elt18 || elt14 != elt19) {
   1126         errln("\"cat\" elements don't match: elt14 = 0x%04x, "
   1127                 "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, "
   1128                 "elt18 = 0x%04x, elt19 = 0x%04x",
   1129                 elt14, elt15, elt16, elt17, elt18, elt19);
   1130     }
   1131 
   1132     // now generate a complete list of the collation elements,
   1133     // first using next() and then using setOffset(), and
   1134     // make sure both interfaces return the same set of elements
   1135     iter->reset();
   1136 
   1137     int32_t elt = iter->next(errorCode);
   1138     int32_t count = 0;
   1139     while (elt != CollationElementIterator::NULLORDER) {
   1140         ++count;
   1141         elt = iter->next(errorCode);
   1142     }
   1143 
   1144     LocalArray<UnicodeString> nextElements(new UnicodeString[count]);
   1145     LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]);
   1146     int32_t lastPos = 0;
   1147 
   1148     iter->reset();
   1149     elt = iter->next(errorCode);
   1150     count = 0;
   1151     while (elt != CollationElementIterator::NULLORDER) {
   1152         nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
   1153         lastPos = iter->getOffset();
   1154         elt = iter->next(errorCode);
   1155     }
   1156     int32_t nextElementsLength = count;
   1157     count = 0;
   1158     for (int32_t i = 0; i < testText.length(); ) {
   1159         iter->setOffset(i, errorCode);
   1160         lastPos = iter->getOffset();
   1161         elt = iter->next(errorCode);
   1162         setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
   1163         i = iter->getOffset();
   1164     }
   1165     for (int32_t i = 0; i < nextElementsLength; i++) {
   1166         if (nextElements[i] == setOffsetElements[i]) {
   1167             logln(nextElements[i]);
   1168         } else {
   1169             errln(UnicodeString("Error: next() yielded ") + nextElements[i] +
   1170                 ", but setOffset() yielded " + setOffsetElements[i]);
   1171         }
   1172     }
   1173     delete iter;
   1174 }
   1175 
   1176 // Ticket 7189
   1177 //
   1178 // nextSortKeyPart incorrect for EO_S1 collation
   1179 static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) {
   1180     UCharIterator uiter;
   1181     uint32_t state[2] = { 0, 0 };
   1182     int32_t keyLen;
   1183     int32_t count = 8;
   1184 
   1185     uiter_setString(&uiter, text, len);
   1186     keyLen = 0;
   1187     while (TRUE) {
   1188         int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status);
   1189         if (U_FAILURE(status)) {
   1190             return -1;
   1191         }
   1192         if (keyPartLen == 0) {
   1193             break;
   1194         }
   1195         keyLen += keyPartLen;
   1196     }
   1197     return keyLen;
   1198 }
   1199 
   1200 void CollationRegressionTest::TestT7189() {
   1201     UErrorCode status = U_ZERO_ERROR;
   1202     UCollator *coll;
   1203     uint32_t i;
   1204 
   1205     static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = {
   1206     // "Achter De Hoven"
   1207         { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
   1208         // "ABC"
   1209         { 0x41, 0x42, 0x43, 0x00 },
   1210         // "HELLO world!"
   1211         { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
   1212     };
   1213 
   1214     static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = {
   1215     // "Achter de Hoven"
   1216         { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
   1217         // "abc"
   1218         { 0x61, 0x62, 0x63, 0x00 },
   1219         // "hello world!"
   1220         { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
   1221     };
   1222 
   1223     // Open the collator
   1224     coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status);
   1225     if (U_FAILURE(status)) {
   1226         errln("Failed to create a collator for short string EO_S1");
   1227         return;
   1228     }
   1229 
   1230     for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) {
   1231         uint8_t key1[100], key2[100];
   1232         int32_t len1, len2;
   1233 
   1234         len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status);
   1235         if (U_FAILURE(status)) {
   1236             errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]);
   1237             break;
   1238         }
   1239         len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status);
   1240         if (U_FAILURE(status)) {
   1241             errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]);
   1242             break;
   1243         }
   1244 
   1245         if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) {
   1246             errln(UnicodeString("Failed: Identical key\n") + "    text1: " + text1[i] + "\n" + "    text2: " + text2[i] + "\n" + "    key  : " + TestUtility::hex(key1, len1));
   1247         } else {
   1248             logln(UnicodeString("Keys produced -\n") + "    text1: " + text1[i] + "\n" + "    key1 : " + TestUtility::hex(key1, len1) + "\n" + "    text2: " + text2[i] + "\n" + "    key2 : "
   1249                     + TestUtility::hex(key2, len2));
   1250         }
   1251     }
   1252     ucol_close(coll);
   1253 }
   1254 
   1255 void CollationRegressionTest::TestCaseFirstCompression() {
   1256     RuleBasedCollator *col = (RuleBasedCollator *) en_us->clone();
   1257     UErrorCode status = U_ZERO_ERROR;
   1258 
   1259     // default
   1260     caseFirstCompressionSub(col, "default");
   1261 
   1262     // Upper first
   1263     col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
   1264     if (U_FAILURE(status)) {
   1265         errln("Failed to set UCOL_UPPER_FIRST");
   1266         return;
   1267     }
   1268     caseFirstCompressionSub(col, "upper first");
   1269 
   1270     // Lower first
   1271     col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
   1272     if (U_FAILURE(status)) {
   1273         errln("Failed to set UCOL_LOWER_FIRST");
   1274         return;
   1275     }
   1276     caseFirstCompressionSub(col, "lower first");
   1277 
   1278     delete col;
   1279 }
   1280 
   1281 void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) {
   1282     const int32_t maxLength = 50;
   1283 
   1284     UChar str1[maxLength];
   1285     UChar str2[maxLength];
   1286 
   1287     CollationKey key1, key2;
   1288 
   1289     for (int32_t len = 1; len <= maxLength; len++) {
   1290         int32_t i = 0;
   1291         for (; i < len - 1; i++) {
   1292             str1[i] = str2[i] = (UChar)0x61; // 'a'
   1293         }
   1294         str1[i] = (UChar)0x41; // 'A'
   1295         str2[i] = (UChar)0x61; // 'a'
   1296 
   1297         UErrorCode status = U_ZERO_ERROR;
   1298         col->getCollationKey(str1, len, key1, status);
   1299         col->getCollationKey(str2, len, key2, status);
   1300 
   1301         UCollationResult cmpKey = key1.compareTo(key2, status);
   1302         UCollationResult cmpCol = col->compare(str1, len, str2, len, status);
   1303 
   1304         if (U_FAILURE(status)) {
   1305             errln("Error in caseFirstCompressionSub");
   1306         } else if (cmpKey != cmpCol) {
   1307             errln((UnicodeString)"Inconsistent comparison(" + opt
   1308                 + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len)
   1309                 + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol);
   1310         }
   1311     }
   1312 }
   1313 
   1314 void CollationRegressionTest::TestTrailingComment() {
   1315     // ICU ticket #8070:
   1316     // Check that the rule parser handles a comment without terminating end-of-line.
   1317     IcuTestErrorCode errorCode(*this, "TestTrailingComment");
   1318     RuleBasedCollator coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"), errorCode);
   1319     UnicodeString a((UChar)0x61), b((UChar)0x62), c((UChar)0x63);
   1320     assertTrue("c<b", coll.compare(c, b) < 0);
   1321     assertTrue("b<a", coll.compare(b, a) < 0);
   1322 }
   1323 
   1324 void CollationRegressionTest::TestBeforeWithTooStrongAfter() {
   1325     // ICU ticket #9959:
   1326     // Forbid rules with a before-reset followed by a stronger relation.
   1327     IcuTestErrorCode errorCode(*this, "TestBeforeWithTooStrongAfter");
   1328     RuleBasedCollator before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorCode);
   1329     if(errorCode.isSuccess()) {
   1330         errln("should forbid before-2-reset followed by primary relation");
   1331     } else {
   1332         errorCode.reset();
   1333     }
   1334     RuleBasedCollator before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), errorCode);
   1335     if(errorCode.isSuccess()) {
   1336         errln("should forbid before-3-reset followed by primary or secondary relation");
   1337     } else {
   1338         errorCode.reset();
   1339     }
   1340 }
   1341 
   1342 void CollationRegressionTest::compareArray(Collator &c,
   1343                                            const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
   1344                                            int32_t testCount)
   1345 {
   1346     int32_t i;
   1347     Collator::EComparisonResult expectedResult = Collator::EQUAL;
   1348 
   1349     for (i = 0; i < testCount; i += 3)
   1350     {
   1351         UnicodeString source(tests[i]);
   1352         UnicodeString comparison(tests[i + 1]);
   1353         UnicodeString target(tests[i + 2]);
   1354 
   1355         if (comparison == "<")
   1356         {
   1357             expectedResult = Collator::LESS;
   1358         }
   1359         else if (comparison == ">")
   1360         {
   1361             expectedResult = Collator::GREATER;
   1362         }
   1363         else if (comparison == "=")
   1364         {
   1365             expectedResult = Collator::EQUAL;
   1366         }
   1367         else
   1368         {
   1369             UnicodeString bogus1("Bogus comparison string \"");
   1370             UnicodeString bogus2("\"");
   1371             errln(bogus1 + comparison + bogus2);
   1372         }
   1373 
   1374         Collator::EComparisonResult compareResult = c.compare(source, target);
   1375 
   1376         CollationKey sourceKey, targetKey;
   1377         UErrorCode status = U_ZERO_ERROR;
   1378 
   1379         c.getCollationKey(source, sourceKey, status);
   1380 
   1381         if (U_FAILURE(status))
   1382         {
   1383             errln("Couldn't get collationKey for source");
   1384             continue;
   1385         }
   1386 
   1387         c.getCollationKey(target, targetKey, status);
   1388 
   1389         if (U_FAILURE(status))
   1390         {
   1391             errln("Couldn't get collationKey for target");
   1392             continue;
   1393         }
   1394 
   1395         Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);
   1396 
   1397         reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
   1398 
   1399     }
   1400 }
   1401 
   1402 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
   1403 {
   1404     int32_t c1, c2, count = 0;
   1405     UErrorCode status = U_ZERO_ERROR;
   1406 
   1407     do
   1408     {
   1409         c1 = i1.next(status);
   1410         c2 = i2.next(status);
   1411 
   1412         if (c1 != c2)
   1413         {
   1414             UnicodeString msg, msg1("    ");
   1415 
   1416             msg += msg1 + count;
   1417             msg += ": strength(0x";
   1418             appendHex(c1, 8, msg);
   1419             msg += ") != strength(0x";
   1420             appendHex(c2, 8, msg);
   1421             msg += ")";
   1422 
   1423             errln(msg);
   1424             break;
   1425         }
   1426 
   1427         count += 1;
   1428     }
   1429     while (c1 != CollationElementIterator::NULLORDER);
   1430 }
   1431 
   1432 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
   1433 {
   1434     if (exec)
   1435     {
   1436         logln("Collation Regression Tests: ");
   1437     }
   1438 
   1439     if(en_us == NULL) {
   1440         dataerrln("Class collator not instantiated");
   1441         name = "";
   1442         return;
   1443     }
   1444     TESTCASE_AUTO_BEGIN;
   1445     TESTCASE_AUTO(Test4048446);
   1446     TESTCASE_AUTO(Test4051866);
   1447     TESTCASE_AUTO(Test4053636);
   1448     TESTCASE_AUTO(Test4054238);
   1449     TESTCASE_AUTO(Test4054734);
   1450     TESTCASE_AUTO(Test4054736);
   1451     TESTCASE_AUTO(Test4058613);
   1452     TESTCASE_AUTO(Test4059820);
   1453     TESTCASE_AUTO(Test4060154);
   1454     TESTCASE_AUTO(Test4062418);
   1455     TESTCASE_AUTO(Test4065540);
   1456     TESTCASE_AUTO(Test4066189);
   1457     TESTCASE_AUTO(Test4066696);
   1458     TESTCASE_AUTO(Test4076676);
   1459     TESTCASE_AUTO(Test4078588);
   1460     TESTCASE_AUTO(Test4079231);
   1461     TESTCASE_AUTO(Test4081866);
   1462     TESTCASE_AUTO(Test4087241);
   1463     TESTCASE_AUTO(Test4087243);
   1464     TESTCASE_AUTO(Test4092260);
   1465     TESTCASE_AUTO(Test4095316);
   1466     TESTCASE_AUTO(Test4101940);
   1467     TESTCASE_AUTO(Test4103436);
   1468     TESTCASE_AUTO(Test4114076);
   1469     TESTCASE_AUTO(Test4114077);
   1470     TESTCASE_AUTO(Test4124632);
   1471     TESTCASE_AUTO(Test4132736);
   1472     TESTCASE_AUTO(Test4133509);
   1473     TESTCASE_AUTO(Test4139572);
   1474     TESTCASE_AUTO(Test4141640);
   1475     TESTCASE_AUTO(Test4146160);
   1476     TESTCASE_AUTO(Test4179216);
   1477     TESTCASE_AUTO(TestT7189);
   1478     TESTCASE_AUTO(TestCaseFirstCompression);
   1479     TESTCASE_AUTO(TestTrailingComment);
   1480     TESTCASE_AUTO(TestBeforeWithTooStrongAfter);
   1481     TESTCASE_AUTO_END;
   1482 }
   1483 
   1484 #endif /* #if !UCONFIG_NO_COLLATION */
   1485