Home | History | Annotate | Download | only in intltest
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2009, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 
      7 #include "unicode/utypes.h"
      8 
      9 #if !UCONFIG_NO_COLLATION
     10 
     11 #include "unicode/coll.h"
     12 #include "unicode/tblcoll.h"
     13 #include "unicode/unistr.h"
     14 #include "unicode/sortkey.h"
     15 #include "regcoll.h"
     16 #include "sfwdchit.h"
     17 
     18 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
     19 
     20 CollationRegressionTest::CollationRegressionTest()
     21 {
     22     UErrorCode status = U_ZERO_ERROR;
     23 
     24     en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
     25     if(U_FAILURE(status)) {
     26       delete en_us;
     27       en_us = 0;
     28       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
     29       return;
     30     }
     31 }
     32 
     33 CollationRegressionTest::~CollationRegressionTest()
     34 {
     35     delete en_us;
     36 }
     37 
     38 
     39     // @bug 4048446
     40 //
     41 // CollationElementIterator.reset() doesn't work
     42 //
     43 void CollationRegressionTest::Test4048446(/* char* par */)
     44 {
     45     const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
     46     const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
     47     CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
     48     CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
     49     UErrorCode status = U_ZERO_ERROR;
     50 
     51     if (i1 == NULL|| i2 == NULL)
     52     {
     53         errln("Could not create CollationElementIterator's");
     54         delete i1;
     55         delete i2;
     56         return;
     57     }
     58 
     59     while (i1->next(status) != CollationElementIterator::NULLORDER)
     60     {
     61         if (U_FAILURE(status))
     62         {
     63             errln("error calling next()");
     64 
     65             delete i1;
     66             delete i2;
     67             return;
     68         }
     69     }
     70 
     71     i1->reset();
     72 
     73     assertEqual(*i1, *i2);
     74 
     75     delete i1;
     76     delete i2;
     77 }
     78 
     79 // @bug 4051866
     80 //
     81 // Collator -> rules -> Collator round-trip broken for expanding characters
     82 //
     83 void CollationRegressionTest::Test4051866(/* char* par */)
     84 {
     85 /*
     86     RuleBasedCollator c1 = new RuleBasedCollator("< o "
     87                                                 +"& oe ,o\u3080"
     88                                                 +"& oe ,\u1530 ,O"
     89                                                 +"& OE ,O\u3080"
     90                                                 +"& OE ,\u1520"
     91                                                 +"< p ,P");
     92 */
     93 
     94     UnicodeString rules;
     95     UErrorCode status = U_ZERO_ERROR;
     96 
     97     rules += "< o ";
     98     rules += "& oe ,o";
     99     rules += (UChar)0x3080;
    100     rules += "& oe ,";
    101     rules += (UChar)0x1530;
    102     rules += " ,O";
    103     rules += "& OE ,O";
    104     rules += (UChar)0x3080;
    105     rules += "& OE ,";
    106     rules += (UChar)0x1520;
    107     rules += "< p ,P";
    108 
    109     // Build a collator containing expanding characters
    110     RuleBasedCollator *c1 = new RuleBasedCollator(rules, status);
    111 
    112     // Build another using the rules from  the first
    113     RuleBasedCollator *c2 = new RuleBasedCollator(c1->getRules(), status);
    114 
    115     // Make sure they're the same
    116     if (!(c1->getRules() == c2->getRules()))
    117     {
    118         errln("Rules are not equal");
    119     }
    120 
    121     delete c2;
    122     delete c1;
    123 }
    124 
    125 // @bug 4053636
    126 //
    127 // Collator thinks "black-bird" == "black"
    128 //
    129 void CollationRegressionTest::Test4053636(/* char* par */)
    130 {
    131     if (en_us->equals("black_bird", "black"))
    132     {
    133         errln("black-bird == black");
    134     }
    135 }
    136 
    137 // @bug 4054238
    138 //
    139 // CollationElementIterator will not work correctly if the associated
    140 // Collator object's mode is changed
    141 //
    142 void CollationRegressionTest::Test4054238(/* char* par */)
    143 {
    144     const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
    145     const UnicodeString test3(chars3);
    146     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    147 
    148     // NOTE: The Java code uses en_us to create the CollationElementIterators
    149     // but I'm pretty sure that's wrong, so I've changed this to use c.
    150     UErrorCode status = U_ZERO_ERROR;
    151     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    152     CollationElementIterator *i1 = c->createCollationElementIterator(test3);
    153     delete i1;
    154     delete c;
    155 }
    156 
    157 // @bug 4054734
    158 //
    159 // Collator::IDENTICAL documented but not implemented
    160 //
    161 void CollationRegressionTest::Test4054734(/* char* par */)
    162 {
    163     /*
    164         Here's the original Java:
    165 
    166         String[] decomp = {
    167             "\u0001",   "<",    "\u0002",
    168             "\u0001",   "=",    "\u0001",
    169             "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
    170             "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
    171         };
    172 
    173         String[] nodecomp = {
    174             "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
    175         };
    176     */
    177 
    178     static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
    179     {
    180         {0x0001, 0},      {0x3c, 0}, {0x0002, 0},
    181         {0x0001, 0},      {0x3d, 0}, {0x0001, 0},
    182         {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
    183         {0x00c0, 0},      {0x3d, 0}, {0x41, 0x0300, 0}
    184     };
    185 
    186 
    187     UErrorCode status = U_ZERO_ERROR;
    188     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    189 
    190     c->setStrength(Collator::IDENTICAL);
    191 
    192     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    193     compareArray(*c, decomp, ARRAY_LENGTH(decomp));
    194 
    195     delete c;
    196 }
    197 
    198 // @bug 4054736
    199 //
    200 // Full Decomposition mode not implemented
    201 //
    202 void CollationRegressionTest::Test4054736(/* char* par */)
    203 {
    204     UErrorCode status = U_ZERO_ERROR;
    205     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    206 
    207     c->setStrength(Collator::SECONDARY);
    208     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    209 
    210     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    211     {
    212         {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC}  // Alef-Lamed vs. Alef, Lamed
    213     };
    214 
    215     compareArray(*c, tests, ARRAY_LENGTH(tests));
    216 
    217     delete c;
    218 }
    219 
    220 // @bug 4058613
    221 //
    222 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
    223 //
    224 void CollationRegressionTest::Test4058613(/* char* par */)
    225 {
    226     // Creating a default collator doesn't work when Korean is the default
    227     // locale
    228 
    229     Locale oldDefault = Locale::getDefault();
    230     UErrorCode status = U_ZERO_ERROR;
    231 
    232     Locale::setDefault(Locale::getKorean(), status);
    233 
    234     if (U_FAILURE(status))
    235     {
    236         errln("Could not set default locale to Locale::KOREAN");
    237         return;
    238     }
    239 
    240     Collator *c = NULL;
    241 
    242     c = Collator::createInstance("en_US", status);
    243 
    244     if (c == NULL || U_FAILURE(status))
    245     {
    246         errln("Could not create a Korean collator");
    247         Locale::setDefault(oldDefault, status);
    248         delete c;
    249         return;
    250     }
    251 
    252     // Since the fix to this bug was to turn off decomposition for Korean collators,
    253     // ensure that's what we got
    254     if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
    255     {
    256       errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
    257     }
    258 
    259     delete c;
    260 
    261     Locale::setDefault(oldDefault, status);
    262 }
    263 
    264 // @bug 4059820
    265 //
    266 // RuleBasedCollator.getRules does not return the exact pattern as input
    267 // for expanding character sequences
    268 //
    269 void CollationRegressionTest::Test4059820(/* char* par */)
    270 {
    271     UErrorCode status = U_ZERO_ERROR;
    272 
    273     RuleBasedCollator *c = NULL;
    274     UnicodeString rules = "< a < b , c/a < d < z";
    275 
    276     c = new RuleBasedCollator(rules, status);
    277 
    278     if (c == NULL || U_FAILURE(status))
    279     {
    280         errln("Failure building a collator.");
    281         delete c;
    282         return;
    283     }
    284 
    285     if ( c->getRules().indexOf("c/a") == -1)
    286     {
    287         errln("returned rules do not contain 'c/a'");
    288     }
    289 
    290     delete c;
    291 }
    292 
    293 // @bug 4060154
    294 //
    295 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
    296 //
    297 void CollationRegressionTest::Test4060154(/* char* par */)
    298 {
    299     UErrorCode status = U_ZERO_ERROR;
    300     UnicodeString rules;
    301 
    302     rules += "< g, G < h, H < i, I < j, J";
    303     rules +=  " & H < ";
    304     rules += (UChar)0x0131;
    305     rules += ", ";
    306     rules += (UChar)0x0130;
    307     rules += ", i, I";
    308 
    309     RuleBasedCollator *c = NULL;
    310 
    311     c = new RuleBasedCollator(rules, status);
    312 
    313     if (c == NULL || U_FAILURE(status))
    314     {
    315         errln("failure building collator.");
    316         delete c;
    317         return;
    318     }
    319 
    320     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    321 
    322  /*
    323     String[] tertiary = {
    324         "A",        "<",    "B",
    325         "H",        "<",    "\u0131",
    326         "H",        "<",    "I",
    327         "\u0131",   "<",    "\u0130",
    328         "\u0130",   "<",    "i",
    329         "\u0130",   ">",    "H",
    330     };
    331 */
    332 
    333     static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
    334     {
    335         {0x41, 0},    {0x3c, 0}, {0x42, 0},
    336         {0x48, 0},    {0x3c, 0}, {0x0131, 0},
    337         {0x48, 0},    {0x3c, 0}, {0x49, 0},
    338         {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
    339         {0x0130, 0}, {0x3c, 0}, {0x69, 0},
    340         {0x0130, 0}, {0x3e, 0}, {0x48, 0}
    341     };
    342 
    343     c->setStrength(Collator::TERTIARY);
    344     compareArray(*c, tertiary, ARRAY_LENGTH(tertiary));
    345 
    346     /*
    347     String[] secondary = {
    348         "H",        "<",    "I",
    349         "\u0131",   "=",    "\u0130",
    350     };
    351 */
    352     static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
    353     {
    354         {0x48, 0},    {0x3c, 0}, {0x49, 0},
    355         {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
    356     };
    357 
    358     c->setStrength(Collator::PRIMARY);
    359     compareArray(*c, secondary, ARRAY_LENGTH(secondary));
    360 
    361     delete c;
    362 }
    363 
    364 // @bug 4062418
    365 //
    366 // Secondary/Tertiary comparison incorrect in French Secondary
    367 //
    368 void CollationRegressionTest::Test4062418(/* char* par */)
    369 {
    370     UErrorCode status = U_ZERO_ERROR;
    371 
    372     RuleBasedCollator *c = NULL;
    373 
    374     c = (RuleBasedCollator *) Collator::createInstance(Locale::getFrance(), status);
    375 
    376     if (c == NULL || U_FAILURE(status))
    377     {
    378         errln("Failed to create collator for Locale::getFrance()");
    379         delete c;
    380         return;
    381     }
    382 
    383     c->setStrength(Collator::SECONDARY);
    384 
    385 /*
    386     String[] tests = {
    387             "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
    388     };
    389 */
    390     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    391     {
    392         {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
    393     };
    394 
    395     compareArray(*c, tests, ARRAY_LENGTH(tests));
    396 
    397     delete c;
    398 }
    399 
    400 // @bug 4065540
    401 //
    402 // Collator::compare() method broken if either string contains spaces
    403 //
    404 void CollationRegressionTest::Test4065540(/* char* par */)
    405 {
    406     if (en_us->compare("abcd e", "abcd f") == 0)
    407     {
    408         errln("'abcd e' == 'abcd f'");
    409     }
    410 }
    411 
    412 // @bug 4066189
    413 //
    414 // Unicode characters need to be recursively decomposed to get the
    415 // correct result. For example,
    416 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
    417 //
    418 void CollationRegressionTest::Test4066189(/* char* par */)
    419 {
    420     static const UChar chars1[] = {0x1EB1, 0};
    421     static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0};
    422     const UnicodeString test1(chars1);
    423     const UnicodeString test2(chars2);
    424     UErrorCode status = U_ZERO_ERROR;
    425 
    426     // NOTE: The java code used en_us to create the
    427     // CollationElementIterator's. I'm pretty sure that
    428     // was wrong, so I've change the code to use c1 and c2
    429     RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone();
    430     c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    431     CollationElementIterator *i1 = c1->createCollationElementIterator(test1);
    432 
    433     RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone();
    434     c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
    435     CollationElementIterator *i2 = c2->createCollationElementIterator(test2);
    436 
    437     assertEqual(*i1, *i2);
    438 
    439     delete i2;
    440     delete c2;
    441     delete i1;
    442     delete c1;
    443 }
    444 
    445 // @bug 4066696
    446 //
    447 // French secondary collation checking at the end of compare iteration fails
    448 //
    449 void CollationRegressionTest::Test4066696(/* char* par */)
    450 {
    451     UErrorCode status = U_ZERO_ERROR;
    452     RuleBasedCollator *c = NULL;
    453 
    454     c = (RuleBasedCollator *)Collator::createInstance(Locale::getFrance(), status);
    455 
    456     if (c == NULL || U_FAILURE(status))
    457     {
    458         errln("Failure creating collator for Locale::getFrance()");
    459         delete c;
    460         return;
    461     }
    462 
    463     c->setStrength(Collator::SECONDARY);
    464 
    465 /*
    466     String[] tests = {
    467         "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
    468     };
    469 
    470   should be:
    471 
    472     String[] tests = {
    473         "\u00e0",   ">",     "\u01fa",       // a-grave <  A-ring-acute
    474     };
    475 
    476 */
    477 
    478     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    479     {
    480         {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
    481     };
    482 
    483     compareArray(*c, tests, ARRAY_LENGTH(tests));
    484 
    485     delete c;
    486 }
    487 
    488 // @bug 4076676
    489 //
    490 // Bad canonicalization of same-class combining characters
    491 //
    492 void CollationRegressionTest::Test4076676(/* char* par */)
    493 {
    494     // These combining characters are all in the same class, so they should not
    495     // be reordered, and they should compare as unequal.
    496     static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
    497     static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
    498 
    499     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    500     c->setStrength(Collator::TERTIARY);
    501 
    502     if (c->compare(s1,s2) == 0)
    503     {
    504         errln("Same-class combining chars were reordered");
    505     }
    506 
    507     delete c;
    508 }
    509 
    510 // @bug 4079231
    511 //
    512 // RuleBasedCollator::operator==(NULL) throws NullPointerException
    513 //
    514 void CollationRegressionTest::Test4079231(/* char* par */)
    515 {
    516     // I don't think there's any way to write this test
    517     // in C++. The following is equivalent to the Java,
    518     // but doesn't compile 'cause NULL can't be converted
    519     // to Collator&
    520     //
    521     // if (en_us->operator==(NULL))
    522     // {
    523     //     errln("en_us->operator==(NULL) returned TRUE");
    524     // }
    525 
    526  /*
    527    try {
    528         if (en_us->equals(null)) {
    529             errln("en_us->equals(null) returned true");
    530         }
    531     }
    532     catch (Exception e) {
    533         errln("en_us->equals(null) threw " + e.toString());
    534     }
    535 */
    536 }
    537 
    538 // @bug 4078588
    539 //
    540 // RuleBasedCollator breaks on "< a < bb" rule
    541 //
    542 void CollationRegressionTest::Test4078588(/* char *par */)
    543 {
    544     UErrorCode status = U_ZERO_ERROR;
    545     RuleBasedCollator *rbc = new RuleBasedCollator((UnicodeString)"< a < bb", status);
    546 
    547     if (rbc == NULL || U_FAILURE(status))
    548     {
    549         errln("Failed to create RuleBasedCollator.");
    550         delete rbc;
    551         return;
    552     }
    553 
    554     Collator::EComparisonResult result = rbc->compare("a","bb");
    555 
    556     if (result != Collator::LESS)
    557     {
    558         errln((UnicodeString)"Compare(a,bb) returned " + (int)result
    559             + (UnicodeString)"; expected -1");
    560     }
    561 
    562     delete rbc;
    563 }
    564 
    565 // @bug 4081866
    566 //
    567 // Combining characters in different classes not reordered properly.
    568 //
    569 void CollationRegressionTest::Test4081866(/* char* par */)
    570 {
    571     // These combining characters are all in different classes,
    572     // so they should be reordered and the strings should compare as equal.
    573     static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
    574     static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
    575 
    576     UErrorCode status = U_ZERO_ERROR;
    577     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    578     c->setStrength(Collator::TERTIARY);
    579 
    580     // Now that the default collators are set to NO_DECOMPOSITION
    581     // (as a result of fixing bug 4114077), we must set it explicitly
    582     // when we're testing reordering behavior.  -- lwerner, 5/5/98
    583     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    584 
    585     if (c->compare(s1,s2) != 0)
    586     {
    587         errln("Combining chars were not reordered");
    588     }
    589 
    590     delete c;
    591 }
    592 
    593 // @bug 4087241
    594 //
    595 // string comparison errors in Scandinavian collators
    596 //
    597 void CollationRegressionTest::Test4087241(/* char* par */)
    598 {
    599     UErrorCode status = U_ZERO_ERROR;
    600     Locale da_DK("da", "DK");
    601     RuleBasedCollator *c = NULL;
    602 
    603     c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);
    604 
    605     if (c == NULL || U_FAILURE(status))
    606     {
    607         errln("Failed to create collator for da_DK locale");
    608         delete c;
    609         return;
    610     }
    611 
    612     c->setStrength(Collator::SECONDARY);
    613 
    614     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    615     {
    616         {0x7a, 0},          {0x3c, 0}, {0x00E6, 0},            // z        < ae
    617         {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0},      // a-unlaut < a-ring
    618         {0x59, 0},          {0x3c, 0}, {0x75, 0x0308, 0},      // Y        < u-umlaut
    619     };
    620 
    621     compareArray(*c, tests, ARRAY_LENGTH(tests));
    622 
    623     delete c;
    624 }
    625 
    626 // @bug 4087243
    627 //
    628 // CollationKey takes ignorable strings into account when it shouldn't
    629 //
    630 void CollationRegressionTest::Test4087243(/* char* par */)
    631 {
    632     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    633     c->setStrength(Collator::TERTIARY);
    634 
    635     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    636     {
    637         {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0}    // 1 2 3  =  1 2 3 ctrl-A
    638     };
    639 
    640     compareArray(*c, tests, ARRAY_LENGTH(tests));
    641 
    642     delete c;
    643 }
    644 
    645 // @bug 4092260
    646 //
    647 // Mu/micro conflict
    648 // Micro symbol and greek lowercase letter Mu should sort identically
    649 //
    650 void CollationRegressionTest::Test4092260(/* char* par */)
    651 {
    652     UErrorCode status = U_ZERO_ERROR;
    653     Locale el("el", "");
    654     Collator *c = NULL;
    655 
    656     c = Collator::createInstance(el, status);
    657 
    658     if (c == NULL || U_FAILURE(status))
    659     {
    660         errln("Failed to create collator for el locale.");
    661         delete c;
    662         return;
    663     }
    664 
    665     // These now have tertiary differences in UCA
    666     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
    667 
    668     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    669     {
    670         {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
    671     };
    672 
    673     compareArray(*c, tests, ARRAY_LENGTH(tests));
    674 
    675     delete c;
    676 }
    677 
    678 // @bug 4095316
    679 //
    680 void CollationRegressionTest::Test4095316(/* char* par */)
    681 {
    682     UErrorCode status = U_ZERO_ERROR;
    683     Locale el_GR("el", "GR");
    684     Collator *c = Collator::createInstance(el_GR, status);
    685 
    686     if (c == NULL || U_FAILURE(status))
    687     {
    688         errln("Failed to create collator for el_GR locale");
    689         delete c;
    690         return;
    691     }
    692     // These now have tertiary differences in UCA
    693     //c->setStrength(Collator::TERTIARY);
    694     c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
    695 
    696     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    697     {
    698         {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
    699     };
    700 
    701     compareArray(*c, tests, ARRAY_LENGTH(tests));
    702 
    703     delete c;
    704 }
    705 
    706 // @bug 4101940
    707 //
    708 void CollationRegressionTest::Test4101940(/* char* par */)
    709 {
    710     UErrorCode status = U_ZERO_ERROR;
    711     RuleBasedCollator *c = NULL;
    712     UnicodeString rules = "< a < b";
    713     UnicodeString nothing = "";
    714 
    715     c = new RuleBasedCollator(rules, status);
    716 
    717     if (c == NULL || U_FAILURE(status))
    718     {
    719         errln("Failed to create RuleBasedCollator");
    720         delete c;
    721         return;
    722     }
    723 
    724     CollationElementIterator *i = c->createCollationElementIterator(nothing);
    725     i->reset();
    726 
    727     if (i->next(status) != CollationElementIterator::NULLORDER)
    728     {
    729         errln("next did not return NULLORDER");
    730     }
    731 
    732     delete i;
    733     delete c;
    734 }
    735 
    736 // @bug 4103436
    737 //
    738 // Collator::compare not handling spaces properly
    739 //
    740 void CollationRegressionTest::Test4103436(/* char* par */)
    741 {
    742     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    743     c->setStrength(Collator::TERTIARY);
    744 
    745     static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
    746     {
    747         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
    748         {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
    749     };
    750 
    751     compareArray(*c, tests, ARRAY_LENGTH(tests));
    752 
    753     delete c;
    754 }
    755 
    756 // @bug 4114076
    757 //
    758 // Collation not Unicode conformant with Hangul syllables
    759 //
    760 void CollationRegressionTest::Test4114076(/* char* par */)
    761 {
    762     UErrorCode status = U_ZERO_ERROR;
    763     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    764     c->setStrength(Collator::TERTIARY);
    765 
    766     //
    767     // With Canonical decomposition, Hangul syllables should get decomposed
    768     // into Jamo, but Jamo characters should not be decomposed into
    769     // conjoining Jamo
    770     //
    771     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
    772     {
    773         {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
    774     };
    775 
    776     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    777     compareArray(*c, test1, ARRAY_LENGTH(test1));
    778 
    779     // From UTR #15:
    780     // *In earlier versions of Unicode, jamo characters like ksf
    781     //  had compatibility mappings to kf + sf. These mappings were
    782     //  removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
    783     // That is, the following test is obsolete as of 2.1.9
    784 
    785 //obsolete-    // With Full decomposition, it should go all the way down to
    786 //obsolete-    // conjoining Jamo characters.
    787 //obsolete-    //
    788 //obsolete-    static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
    789 //obsolete-    {
    790 //obsolete-        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
    791 //obsolete-    };
    792 //obsolete-
    793 //obsolete-    c->setDecomposition(Normalizer::DECOMP_COMPAT);
    794 //obsolete-    compareArray(*c, test2, ARRAY_LENGTH(test2));
    795 
    796     delete c;
    797 }
    798 
    799 
    800 // @bug 4124632
    801 //
    802 // Collator::getCollationKey was hanging on certain character sequences
    803 //
    804 void CollationRegressionTest::Test4124632(/* char* par */)
    805 {
    806     UErrorCode status = U_ZERO_ERROR;
    807     Collator *coll = NULL;
    808 
    809     coll = Collator::createInstance(Locale::getJapan(), status);
    810 
    811     if (coll == NULL || U_FAILURE(status))
    812     {
    813         errln("Failed to create collator for Locale::JAPAN");
    814         delete coll;
    815         return;
    816     }
    817 
    818     static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0};
    819     CollationKey key;
    820 
    821     coll->getCollationKey(test, key, status);
    822 
    823     if (key.isBogus() || U_FAILURE(status))
    824     {
    825         errln("CollationKey creation failed.");
    826     }
    827 
    828     delete coll;
    829 }
    830 
    831 // @bug 4132736
    832 //
    833 // sort order of french words with multiple accents has errors
    834 //
    835 void CollationRegressionTest::Test4132736(/* char* par */)
    836 {
    837     UErrorCode status = U_ZERO_ERROR;
    838 
    839     Collator *c = NULL;
    840 
    841     c = Collator::createInstance(Locale::getFrance(), status);
    842     c->setStrength(Collator::TERTIARY);
    843 
    844     if (c == NULL || U_FAILURE(status))
    845     {
    846         errln("Failed to create a collator for Locale::getFrance()");
    847         delete c;
    848         return;
    849     }
    850 
    851     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
    852     {
    853         {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
    854         {0x65, 0x0300, 0x0301, 0},       {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
    855     };
    856 
    857     compareArray(*c, test1, ARRAY_LENGTH(test1));
    858 
    859     delete c;
    860 }
    861 
    862 // @bug 4133509
    863 //
    864 // The sorting using java.text.CollationKey is not in the exact order
    865 //
    866 void CollationRegressionTest::Test4133509(/* char* par */)
    867 {
    868     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
    869     {
    870         {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
    871         {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0},      {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
    872         {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0},                  {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
    873     };
    874 
    875     compareArray(*en_us, test1, ARRAY_LENGTH(test1));
    876 }
    877 
    878 // @bug 4114077
    879 //
    880 // Collation with decomposition off doesn't work for Europe
    881 //
    882 void CollationRegressionTest::Test4114077(/* char* par */)
    883 {
    884     // Ensure that we get the same results with decomposition off
    885     // as we do with it on....
    886 
    887     UErrorCode status = U_ZERO_ERROR;
    888     RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
    889     c->setStrength(Collator::TERTIARY);
    890 
    891     static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
    892     {
    893         {0x00C0, 0},                     {0x3d, 0}, {0x41, 0x0300, 0},            // Should be equivalent
    894         {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
    895         {0x0204, 0},                     {0x3d, 0}, {0x45, 0x030F, 0},
    896         {0x01fa, 0},                     {0x3d, 0}, {0x41, 0x030a, 0x0301, 0},    // a-ring-acute -> a-ring, acute
    897                                                 //   -> a, ring, acute
    898         {0x41, 0x0300, 0x0316, 0},         {0x3c, 0}, {0x41, 0x0316, 0x0300, 0}        // No reordering --> unequal
    899     };
    900 
    901     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
    902     compareArray(*c, test1, ARRAY_LENGTH(test1));
    903 
    904     static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
    905     {
    906         {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0}      // Reordering --> equal
    907     };
    908 
    909     c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    910     compareArray(*c, test2, ARRAY_LENGTH(test2));
    911 
    912     delete c;
    913 }
    914 
    915 // @bug 4141640
    916 //
    917 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
    918 //
    919 void CollationRegressionTest::Test4141640(/* char* par */)
    920 {
    921     //
    922     // Rather than just creating a Swedish collator, we might as well
    923     // try to instantiate one for every locale available on the system
    924     // in order to prevent this sort of bug from cropping up in the future
    925     //
    926     UErrorCode status = U_ZERO_ERROR;
    927     int32_t i, localeCount;
    928     const Locale *locales = Locale::getAvailableLocales(localeCount);
    929 
    930     for (i = 0; i < localeCount; i += 1)
    931     {
    932         Collator *c = NULL;
    933 
    934         status = U_ZERO_ERROR;
    935         c = Collator::createInstance(locales[i], status);
    936 
    937         if (c == NULL || U_FAILURE(status))
    938         {
    939             UnicodeString msg, localeName;
    940 
    941             msg += "Could not create collator for locale ";
    942             msg += locales[i].getName();
    943 
    944             errln(msg);
    945         }
    946 
    947         delete c;
    948     }
    949 }
    950 
    951 // @bug 4139572
    952 //
    953 // getCollationKey throws exception for spanish text
    954 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
    955 //
    956 void CollationRegressionTest::Test4139572(/* char* par */)
    957 {
    958     //
    959     // Code pasted straight from the bug report
    960     // (and then translated to C++ ;-)
    961     //
    962     // create spanish locale and collator
    963     UErrorCode status = U_ZERO_ERROR;
    964     Locale l("es", "es");
    965     Collator *col = NULL;
    966 
    967     col = Collator::createInstance(l, status);
    968 
    969     if (col == NULL || U_FAILURE(status))
    970     {
    971         errln("Failed to create a collator for es_es locale.");
    972         delete col;
    973         return;
    974     }
    975 
    976     CollationKey key;
    977 
    978     // this spanish phrase kills it!
    979     col->getCollationKey("Nombre De Objeto", key, status);
    980 
    981     if (key.isBogus() || U_FAILURE(status))
    982     {
    983         errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
    984     }
    985 
    986     delete col;
    987 }
    988 /* HSYS : RuleBasedCollator::compare() performance enhancements
    989           compare() does not create CollationElementIterator() anymore.*/
    990 
    991 class My4146160Collator : public RuleBasedCollator
    992 {
    993 public:
    994     My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status);
    995     ~My4146160Collator();
    996 
    997     CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const;
    998 
    999     CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const;
   1000 
   1001     static int32_t count;
   1002 };
   1003 
   1004 int32_t My4146160Collator::count = 0;
   1005 
   1006 My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status)
   1007   : RuleBasedCollator(rbc.getRules(), status)
   1008 {
   1009 }
   1010 
   1011 My4146160Collator::~My4146160Collator()
   1012 {
   1013 }
   1014 
   1015 CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const
   1016 {
   1017     count += 1;
   1018     return RuleBasedCollator::createCollationElementIterator(text);
   1019 }
   1020 
   1021 CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const
   1022 {
   1023     count += 1;
   1024     return RuleBasedCollator::createCollationElementIterator(text);
   1025 }
   1026 
   1027 // @bug 4146160
   1028 //
   1029 // RuleBasedCollator doesn't use createCollationElementIterator internally
   1030 //
   1031 void CollationRegressionTest::Test4146160(/* char* par */)
   1032 {
   1033 #if 0
   1034     //
   1035     // Use a custom collator class whose createCollationElementIterator
   1036     // methods increment a count....
   1037     //
   1038     UErrorCode status = U_ZERO_ERROR;
   1039     CollationKey key;
   1040 
   1041     My4146160Collator::count = 0;
   1042     My4146160Collator *mc = NULL;
   1043 
   1044     mc = new My4146160Collator(*en_us, status);
   1045 
   1046     if (mc == NULL || U_FAILURE(status))
   1047     {
   1048         errln("Failed to create a My4146160Collator.");
   1049         delete mc;
   1050         return;
   1051     }
   1052 
   1053     mc->getCollationKey("1", key, status);
   1054 
   1055     if (key.isBogus() || U_FAILURE(status))
   1056     {
   1057         errln("Failure to get a CollationKey from a My4146160Collator.");
   1058         delete mc;
   1059         return;
   1060     }
   1061 
   1062     if (My4146160Collator::count < 1)
   1063     {
   1064         errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
   1065     }
   1066 
   1067     My4146160Collator::count = 0;
   1068     mc->compare("1", "2");
   1069 
   1070     if (My4146160Collator::count < 1)
   1071     {
   1072         errln("My4146160Collator::createtCollationElementIterator not called for compare");
   1073     }
   1074 
   1075     delete mc;
   1076 #endif
   1077 }
   1078 void CollationRegressionTest::compareArray(Collator &c,
   1079                                            const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
   1080                                            int32_t testCount)
   1081 {
   1082     int32_t i;
   1083     Collator::EComparisonResult expectedResult = Collator::EQUAL;
   1084 
   1085     for (i = 0; i < testCount; i += 3)
   1086     {
   1087         UnicodeString source(tests[i]);
   1088         UnicodeString comparison(tests[i + 1]);
   1089         UnicodeString target(tests[i + 2]);
   1090 
   1091         if (comparison == "<")
   1092         {
   1093             expectedResult = Collator::LESS;
   1094         }
   1095         else if (comparison == ">")
   1096         {
   1097             expectedResult = Collator::GREATER;
   1098         }
   1099         else if (comparison == "=")
   1100         {
   1101             expectedResult = Collator::EQUAL;
   1102         }
   1103         else
   1104         {
   1105             UnicodeString bogus1("Bogus comparison string \"");
   1106             UnicodeString bogus2("\"");
   1107             errln(bogus1 + comparison + bogus2);
   1108         }
   1109 
   1110         Collator::EComparisonResult compareResult = c.compare(source, target);
   1111 
   1112         CollationKey sourceKey, targetKey;
   1113         UErrorCode status = U_ZERO_ERROR;
   1114 
   1115         c.getCollationKey(source, sourceKey, status);
   1116 
   1117         if (U_FAILURE(status))
   1118         {
   1119             errln("Couldn't get collationKey for source");
   1120             continue;
   1121         }
   1122 
   1123         c.getCollationKey(target, targetKey, status);
   1124 
   1125         if (U_FAILURE(status))
   1126         {
   1127             errln("Couldn't get collationKey for target");
   1128             continue;
   1129         }
   1130 
   1131         Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);
   1132 
   1133         reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
   1134 
   1135     }
   1136 }
   1137 
   1138 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
   1139 {
   1140     int32_t c1, c2, count = 0;
   1141     UErrorCode status = U_ZERO_ERROR;
   1142 
   1143     do
   1144     {
   1145         c1 = i1.next(status);
   1146         c2 = i2.next(status);
   1147 
   1148         if (c1 != c2)
   1149         {
   1150             UnicodeString msg, msg1("    ");
   1151 
   1152             msg += msg1 + count;
   1153             msg += ": strength(0x";
   1154             appendHex(c1, 8, msg);
   1155             msg += ") != strength(0x";
   1156             appendHex(c2, 8, msg);
   1157             msg += ")";
   1158 
   1159             errln(msg);
   1160             break;
   1161         }
   1162 
   1163         count += 1;
   1164     }
   1165     while (c1 != CollationElementIterator::NULLORDER);
   1166 }
   1167 
   1168 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
   1169 {
   1170     if (exec)
   1171     {
   1172         logln("Collation Regression Tests: ");
   1173     }
   1174 
   1175     if(en_us) {
   1176       switch (index)
   1177       {
   1178           case  0: name = "Test4048446"; if (exec) Test4048446(/* par */); break;
   1179           case  1: name = "Test4051866"; if (exec) Test4051866(/* par */); break;
   1180           case  2: name = "Test4053636"; if (exec) Test4053636(/* par */); break;
   1181           case  3: name = "Test4054238"; if (exec) Test4054238(/* par */); break;
   1182           case  4: name = "Test4054734"; if (exec) Test4054734(/* par */); break;
   1183           case  5: name = "Test4054736"; if (exec) Test4054736(/* par */); break;
   1184           case  6: name = "Test4058613"; if (exec) Test4058613(/* par */); break;
   1185           case  7: name = "Test4059820"; if (exec) Test4059820(/* par */); break;
   1186           case  8: name = "Test4060154"; if (exec) Test4060154(/* par */); break;
   1187           case  9: name = "Test4062418"; if (exec) Test4062418(/* par */); break;
   1188           case 10: name = "Test4065540"; if (exec) Test4065540(/* par */); break;
   1189           case 11: name = "Test4066189"; if (exec) Test4066189(/* par */); break;
   1190           case 12: name = "Test4066696"; if (exec) Test4066696(/* par */); break;
   1191           case 13: name = "Test4076676"; if (exec) Test4076676(/* par */); break;
   1192           case 14: name = "Test4078588"; if (exec) Test4078588(/* par */); break;
   1193           case 15: name = "Test4079231"; if (exec) Test4079231(/* par */); break;
   1194           case 16: name = "Test4081866"; if (exec) Test4081866(/* par */); break;
   1195           case 17: name = "Test4087241"; if (exec) Test4087241(/* par */); break;
   1196           case 18: name = "Test4087243"; if (exec) Test4087243(/* par */); break;
   1197           case 19: name = "Test4092260"; if (exec) Test4092260(/* par */); break;
   1198           case 20: name = "Test4095316"; if (exec) Test4095316(/* par */); break;
   1199           case 21: name = "Test4101940"; if (exec) Test4101940(/* par */); break;
   1200           case 22: name = "Test4103436"; if (exec) Test4103436(/* par */); break;
   1201           case 23: name = "Test4114076"; if (exec) Test4114076(/* par */); break;
   1202           case 24: name = "Test4114077"; if (exec) Test4114077(/* par */); break;
   1203           case 25: name = "Test4124632"; if (exec) Test4124632(/* par */); break;
   1204           case 26: name = "Test4132736"; if (exec) Test4132736(/* par */); break;
   1205           case 27: name = "Test4133509"; if (exec) Test4133509(/* par */); break;
   1206           case 28: name = "Test4139572"; if (exec) Test4139572(/* par */); break;
   1207           case 29: name = "Test4141640"; if (exec) Test4141640(/* par */); break;
   1208           case 30: name = "Test4146160"; if (exec) Test4146160(/* par */); break;
   1209           default: name = ""; break;
   1210       }
   1211     } else {
   1212       dataerrln("Class collator not instantiated");
   1213       name = "";
   1214     }
   1215 }
   1216 
   1217 #endif /* #if !UCONFIG_NO_COLLATION */
   1218