Home | History | Annotate | Download | only in intltest
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2009, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 
      7 #include "unicode/utypes.h"
      8 
      9 #if !UCONFIG_NO_COLLATION
     10 
     11 #include "unicode/coll.h"
     12 #include "unicode/tblcoll.h"
     13 #include "unicode/unistr.h"
     14 #include "unicode/sortkey.h"
     15 #include "itercoll.h"
     16 #include "unicode/schriter.h"
     17 #include "unicode/chariter.h"
     18 #include "unicode/uchar.h"
     19 #include "cmemory.h"
     20 
     21 #define ARRAY_LENGTH(array) (sizeof array / sizeof array[0])
     22 
     23 static UErrorCode status = U_ZERO_ERROR;
     24 
     25 CollationIteratorTest::CollationIteratorTest()
     26  : test1("What subset of all possible test cases?", ""),
     27    test2("has the highest probability of detecting", "")
     28 {
     29     en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
     30     if(U_FAILURE(status)) {
     31       delete en_us;
     32       en_us = 0;
     33       errcheckln(status, "Collator creation failed with %s", u_errorName(status));
     34       return;
     35     }
     36 
     37 }
     38 
     39 CollationIteratorTest::~CollationIteratorTest()
     40 {
     41     delete en_us;
     42 }
     43 
     44 /**
     45  * Test for CollationElementIterator previous and next for the whole set of
     46  * unicode characters.
     47  */
     48 void CollationIteratorTest::TestUnicodeChar()
     49 {
     50     CollationElementIterator *iter;
     51     UChar codepoint;
     52     UnicodeString source;
     53 
     54     for (codepoint = 1; codepoint < 0xFFFE;)
     55     {
     56       source.remove();
     57 
     58       while (codepoint % 0xFF != 0)
     59       {
     60         if (u_isdefined(codepoint))
     61           source += codepoint;
     62         codepoint ++;
     63       }
     64 
     65       if (u_isdefined(codepoint))
     66         source += codepoint;
     67 
     68       if (codepoint != 0xFFFF)
     69         codepoint ++;
     70 
     71       iter = en_us->createCollationElementIterator(source);
     72       /* A basic test to see if it's working at all */
     73       backAndForth(*iter);
     74       delete iter;
     75     }
     76 }
     77 
     78 /**
     79  * Test for CollationElementIterator.previous()
     80  *
     81  * @bug 4108758 - Make sure it works with contracting characters
     82  *
     83  */
     84 void CollationIteratorTest::TestPrevious(/* char* par */)
     85 {
     86     UErrorCode status = U_ZERO_ERROR;
     87     CollationElementIterator *iter = en_us->createCollationElementIterator(test1);
     88 
     89     // A basic test to see if it's working at all
     90     backAndForth(*iter);
     91     delete iter;
     92 
     93     // Test with a contracting character sequence
     94     UnicodeString source;
     95     RuleBasedCollator *c1 = NULL;
     96     c1 = new RuleBasedCollator(
     97         (UnicodeString)"&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH", status);
     98 
     99     if (c1 == NULL || U_FAILURE(status))
    100     {
    101         errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
    102         delete c1;
    103         return;
    104     }
    105 
    106     source = "abchdcba";
    107     iter = c1->createCollationElementIterator(source);
    108     backAndForth(*iter);
    109     delete iter;
    110     delete c1;
    111 
    112     // Test with an expanding character sequence
    113     RuleBasedCollator *c2 = NULL;
    114     c2 = new RuleBasedCollator((UnicodeString)"&a < b < c/abd < d", status);
    115 
    116     if (c2 == NULL || U_FAILURE(status))
    117     {
    118         errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
    119         delete c2;
    120         return;
    121     }
    122 
    123     source = "abcd";
    124     iter = c2->createCollationElementIterator(source);
    125     backAndForth(*iter);
    126     delete iter;
    127     delete c2;
    128 
    129     // Now try both
    130     RuleBasedCollator *c3 = NULL;
    131     c3 = new RuleBasedCollator((UnicodeString)"&a < b < c/aba < d < z < ch", status);
    132 
    133     if (c3 == NULL || U_FAILURE(status))
    134     {
    135         errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
    136         delete c3;
    137         return;
    138     }
    139 
    140     source = "abcdbchdc";
    141     iter = c3->createCollationElementIterator(source);
    142     backAndForth(*iter);
    143     delete iter;
    144     delete c3;
    145 
    146     status=U_ZERO_ERROR;
    147     source= CharsToUnicodeString("\\u0e41\\u0e02\\u0e41\\u0e02\\u0e27abc");
    148 
    149     Collator *c4 = Collator::createInstance(Locale("th", "TH", ""), status);
    150     if(U_FAILURE(status)){
    151         errln("Couldn't create a collator");
    152     }
    153     iter = ((RuleBasedCollator*)c4)->createCollationElementIterator(source);
    154     backAndForth(*iter);
    155     delete iter;
    156     delete c4;
    157 
    158     source= CharsToUnicodeString("\\u0061\\u30CF\\u3099\\u30FC");
    159     Collator *c5 = Collator::createInstance(Locale("ja", "JP", ""), status);
    160 
    161     iter = ((RuleBasedCollator*)c5)->createCollationElementIterator(source);
    162     if(U_FAILURE(status)){
    163         errln("Couldn't create Japanese collator\n");
    164     }
    165     backAndForth(*iter);
    166     delete iter;
    167     delete c5;
    168 }
    169 
    170 /**
    171  * Test for getOffset() and setOffset()
    172  */
    173 void CollationIteratorTest::TestOffset(/* char* par */)
    174 {
    175     CollationElementIterator *iter = en_us->createCollationElementIterator(test1);
    176     UErrorCode status = U_ZERO_ERROR;
    177     // testing boundaries
    178     iter->setOffset(0, status);
    179     if (U_FAILURE(status) || iter->previous(status) != UCOL_NULLORDER) {
    180         errln("Error: After setting offset to 0, we should be at the end "
    181                 "of the backwards iteration");
    182     }
    183     iter->setOffset(test1.length(), status);
    184     if (U_FAILURE(status) || iter->next(status) != UCOL_NULLORDER) {
    185         errln("Error: After setting offset to end of the string, we should "
    186                 "be at the end of the backwards iteration");
    187     }
    188 
    189     // Run all the way through the iterator, then get the offset
    190     int32_t orderLength = 0;
    191     Order *orders = getOrders(*iter, orderLength);
    192 
    193     int32_t offset = iter->getOffset();
    194 
    195     if (offset != test1.length())
    196     {
    197         UnicodeString msg1("offset at end != length: ");
    198         UnicodeString msg2(" vs ");
    199 
    200         errln(msg1 + offset + msg2 + test1.length());
    201     }
    202 
    203     // Now set the offset back to the beginning and see if it works
    204     CollationElementIterator *pristine = en_us->createCollationElementIterator(test1);
    205 
    206     iter->setOffset(0, status);
    207 
    208     if (U_FAILURE(status))
    209     {
    210         errln("setOffset failed.");
    211     }
    212     else
    213     {
    214         assertEqual(*iter, *pristine);
    215     }
    216 
    217     // TODO: try iterating halfway through a messy string.
    218 
    219     delete pristine;
    220     delete[] orders;
    221     delete iter;
    222 }
    223 
    224 /**
    225  * Test for setText()
    226  */
    227 void CollationIteratorTest::TestSetText(/* char* par */)
    228 {
    229     CollationElementIterator *iter1 = en_us->createCollationElementIterator(test1);
    230     CollationElementIterator *iter2 = en_us->createCollationElementIterator(test2);
    231     UErrorCode status = U_ZERO_ERROR;
    232 
    233     // Run through the second iterator just to exercise it
    234     int32_t c = iter2->next(status);
    235     int32_t i = 0;
    236 
    237     while ( ++i < 10 && c != CollationElementIterator::NULLORDER)
    238     {
    239         if (U_FAILURE(status))
    240         {
    241             errln("iter2->next() returned an error.");
    242             delete iter2;
    243             delete iter1;
    244         }
    245 
    246         c = iter2->next(status);
    247     }
    248 
    249     // Now set it to point to the same string as the first iterator
    250     iter2->setText(test1, status);
    251 
    252     if (U_FAILURE(status))
    253     {
    254         errln("call to iter2->setText(test1) failed.");
    255     }
    256     else
    257     {
    258         assertEqual(*iter1, *iter2);
    259     }
    260     iter1->reset();
    261     //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
    262     CharacterIterator* chariter = new StringCharacterIterator(test1);
    263     iter2->setText(*chariter, status);
    264     if (U_FAILURE(status))
    265     {
    266         errln("call to iter2->setText(chariter(test1)) failed.");
    267     }
    268     else
    269     {
    270         assertEqual(*iter1, *iter2);
    271     }
    272 
    273     // test for an empty string
    274     UnicodeString empty("");
    275     iter1->setText(empty, status);
    276     if (U_FAILURE(status)
    277         || iter1->next(status) != (int32_t)UCOL_NULLORDER) {
    278         errln("Empty string should have no CEs.");
    279     }
    280     ((StringCharacterIterator *)chariter)->setText(empty);
    281     iter1->setText(*chariter, status);
    282     if (U_FAILURE(status)
    283         || iter1->next(status) != (int32_t)UCOL_NULLORDER) {
    284         errln("Empty string should have no CEs.");
    285     }
    286     delete chariter;
    287     delete iter2;
    288     delete iter1;
    289 }
    290 
    291 /** @bug 4108762
    292  * Test for getMaxExpansion()
    293  */
    294 void CollationIteratorTest::TestMaxExpansion(/* char* par */)
    295 {
    296     UErrorCode          status = U_ZERO_ERROR;
    297     UnicodeString rule("&a < ab < c/aba < d < z < ch");
    298     RuleBasedCollator  *coll   = new RuleBasedCollator(rule, status);
    299     UChar               ch     = 0;
    300     UnicodeString       str(ch);
    301 
    302     CollationElementIterator *iter   = coll->createCollationElementIterator(str);
    303 
    304     while (ch < 0xFFFF && U_SUCCESS(status)) {
    305         int      count = 1;
    306         uint32_t order;
    307         ch ++;
    308         UnicodeString str(ch);
    309         iter->setText(str, status);
    310         order = iter->previous(status);
    311 
    312         /* thai management */
    313         if (CollationElementIterator::isIgnorable(order))
    314             order = iter->previous(status);
    315 
    316         while (U_SUCCESS(status)
    317             && iter->previous(status) != (int32_t)UCOL_NULLORDER)
    318         {
    319             count ++;
    320         }
    321 
    322         if (U_FAILURE(status) && iter->getMaxExpansion(order) < count) {
    323             errln("Failure at codepoint %d, maximum expansion count < %d\n",
    324                 ch, count);
    325         }
    326     }
    327 
    328     delete iter;
    329     delete coll;
    330 }
    331 
    332 /*
    333  * @bug 4157299
    334  */
    335 void CollationIteratorTest::TestClearBuffers(/* char* par */)
    336 {
    337     UErrorCode status = U_ZERO_ERROR;
    338     RuleBasedCollator *c = new RuleBasedCollator((UnicodeString)"&a < b < c & ab = d", status);
    339 
    340     if (c == NULL || U_FAILURE(status))
    341     {
    342         errln("Couldn't create a RuleBasedCollator.");
    343         delete c;
    344         return;
    345     }
    346 
    347     UnicodeString source("abcd");
    348     CollationElementIterator *i = c->createCollationElementIterator(source);
    349     int32_t e0 = i->next(status);    // save the first collation element
    350 
    351     if (U_FAILURE(status))
    352     {
    353         errln("call to i->next() failed. err=%s", u_errorName(status));
    354     }
    355     else
    356     {
    357         i->setOffset(3, status);        // go to the expanding character
    358 
    359         if (U_FAILURE(status))
    360         {
    361             errln("call to i->setOffset(3) failed. err=%s", u_errorName(status));
    362         }
    363         else
    364         {
    365             i->next(status);                // but only use up half of it
    366 
    367             if (U_FAILURE(status))
    368             {
    369                 errln("call to i->next() failed. err=%s", u_errorName(status));
    370             }
    371             else
    372             {
    373                 i->setOffset(0, status);        // go back to the beginning
    374 
    375                 if (U_FAILURE(status))
    376                 {
    377                     errln("call to i->setOffset(0) failed. err=%s", u_errorName(status));
    378                 }
    379                 else
    380                 {
    381                     int32_t e = i->next(status);    // and get this one again
    382 
    383                     if (U_FAILURE(status))
    384                     {
    385                         errln("call to i->next() failed. err=%s", u_errorName(status));
    386                     }
    387                     else if (e != e0)
    388                     {
    389                         errln("got 0x%X, expected 0x%X", e, e0);
    390                     }
    391                 }
    392             }
    393         }
    394     }
    395 
    396     delete i;
    397     delete c;
    398 }
    399 
    400 /**
    401  * Testing the assignment operator
    402  */
    403 void CollationIteratorTest::TestAssignment()
    404 {
    405     UErrorCode status = U_ZERO_ERROR;
    406     RuleBasedCollator *coll =
    407         (RuleBasedCollator *)Collator::createInstance(status);
    408 
    409     if (coll == NULL || U_FAILURE(status))
    410     {
    411         errln("Couldn't create a default collator.");
    412         return;
    413     }
    414 
    415     UnicodeString source("abcd");
    416     CollationElementIterator *iter1 =
    417         coll->createCollationElementIterator(source);
    418 
    419     CollationElementIterator iter2 = *iter1;
    420 
    421     if (*iter1 != iter2) {
    422         errln("Fail collation iterator assignment does not produce the same elements");
    423     }
    424 
    425     CollationElementIterator iter3(*iter1);
    426 
    427     if (*iter1 != iter3) {
    428         errln("Fail collation iterator copy constructor does not produce the same elements");
    429     }
    430 
    431     source = CharsToUnicodeString("a\\u0300\\u0325");
    432     coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    433     CollationElementIterator *iter4
    434                         = coll->createCollationElementIterator(source);
    435     CollationElementIterator iter5(*iter4);
    436     if (*iter4 != iter5) {
    437         errln("collation iterator assignment does not produce the same elements");
    438     }
    439     iter4->next(status);
    440     if (U_FAILURE(status) || *iter4 == iter5) {
    441         errln("collation iterator not equal");
    442     }
    443     iter5.next(status);
    444     if (U_FAILURE(status) || *iter4 != iter5) {
    445         errln("collation iterator equal");
    446     }
    447     iter4->next(status);
    448     if (U_FAILURE(status) || *iter4 == iter5) {
    449         errln("collation iterator not equal");
    450     }
    451     iter5.next(status);
    452     if (U_FAILURE(status) || *iter4 != iter5) {
    453         errln("collation iterator equal");
    454     }
    455     CollationElementIterator iter6(*iter4);
    456     if (*iter4 != iter6) {
    457         errln("collation iterator equal");
    458     }
    459     iter4->next(status);
    460     if (U_FAILURE(status) || *iter4 == iter5) {
    461         errln("collation iterator not equal");
    462     }
    463     iter5.next(status);
    464     if (U_FAILURE(status) || *iter4 != iter5) {
    465         errln("collation iterator equal");
    466     }
    467     iter4->next(status);
    468     if (U_FAILURE(status) || *iter4 == iter5) {
    469         errln("collation iterator not equal");
    470     }
    471     iter5.next(status);
    472     if (U_FAILURE(status) || *iter4 != iter5) {
    473         errln("collation iterator equal");
    474     }
    475     delete iter1;
    476     delete iter4;
    477     delete coll;
    478 }
    479 
    480 /**
    481  * Testing the constructors
    482  */
    483 void CollationIteratorTest::TestConstructors()
    484 {
    485     UErrorCode status = U_ZERO_ERROR;
    486     RuleBasedCollator *coll =
    487         (RuleBasedCollator *)Collator::createInstance(status);
    488     if (coll == NULL || U_FAILURE(status))
    489     {
    490         errln("Couldn't create a default collator.");
    491         return;
    492     }
    493 
    494     // testing protected constructor with character iterator as argument
    495     StringCharacterIterator chariter(test1);
    496     CollationElementIterator *iter1 =
    497         coll->createCollationElementIterator(chariter);
    498     if (U_FAILURE(status)) {
    499         errln("Couldn't create collation element iterator with character iterator.");
    500         return;
    501     }
    502     CollationElementIterator *iter2 =
    503         coll->createCollationElementIterator(test1);
    504 
    505     // initially the 2 collation element iterators should be the same
    506     if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2
    507         || *iter2 != *iter1) {
    508         errln("CollationElementIterators constructed with the same string data should be the same at the start");
    509     }
    510     assertEqual(*iter1, *iter2);
    511 
    512     delete iter1;
    513     delete iter2;
    514 
    515     // tests empty strings
    516     UnicodeString empty("");
    517     iter1 = coll->createCollationElementIterator(empty);
    518     chariter.setText(empty);
    519     iter2 = coll->createCollationElementIterator(chariter);
    520     if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2
    521         || *iter2 != *iter1) {
    522         errln("CollationElementIterators constructed with the same string data should be the same at the start");
    523     }
    524     if (iter1->next(status) != (int32_t)UCOL_NULLORDER) {
    525         errln("Empty string should have no CEs.");
    526     }
    527     if (iter2->next(status) != (int32_t)UCOL_NULLORDER) {
    528         errln("Empty string should have no CEs.");
    529     }
    530     delete iter1;
    531     delete iter2;
    532     delete coll;
    533 }
    534 
    535 /**
    536  * Testing the strength order
    537  */
    538 void CollationIteratorTest::TestStrengthOrder()
    539 {
    540     int order = 0x0123ABCD;
    541 
    542     UErrorCode status = U_ZERO_ERROR;
    543     RuleBasedCollator *coll =
    544         (RuleBasedCollator *)Collator::createInstance(status);
    545     if (coll == NULL || U_FAILURE(status))
    546     {
    547         errln("Couldn't create a default collator.");
    548         return;
    549     }
    550 
    551     coll->setStrength(Collator::PRIMARY);
    552     CollationElementIterator *iter =
    553         coll->createCollationElementIterator(test1);
    554 
    555     if (iter == NULL) {
    556         errln("Couldn't create a collation element iterator from default collator");
    557         return;
    558     }
    559 
    560     if (iter->strengthOrder(order) != 0x01230000) {
    561         errln("Strength order for a primary strength collator should be the first 2 bytes");
    562         return;
    563     }
    564 
    565     coll->setStrength(Collator::SECONDARY);
    566     if (iter->strengthOrder(order) != 0x0123AB00) {
    567         errln("Strength order for a secondary strength collator should be the third byte");
    568         return;
    569     }
    570 
    571     coll->setStrength(Collator::TERTIARY);
    572     if (iter->strengthOrder(order) != order) {
    573         errln("Strength order for a tertiary strength collator should be the third byte");
    574         return;
    575     }
    576     delete iter;
    577     delete coll;
    578 }
    579 
    580 /**
    581  * Return a string containing all of the collation orders
    582  * returned by calls to next on the specified iterator
    583  */
    584 UnicodeString &CollationIteratorTest::orderString(CollationElementIterator &iter, UnicodeString &target)
    585 {
    586     int32_t order;
    587     UErrorCode status = U_ZERO_ERROR;
    588 
    589     while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
    590     {
    591         target += "0x";
    592         appendHex(order, 8, target);
    593         target += " ";
    594     }
    595 
    596     return target;
    597 }
    598 
    599 void CollationIteratorTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
    600 {
    601     int32_t c1, c2, count = 0;
    602     UErrorCode status = U_ZERO_ERROR;
    603 
    604     do
    605     {
    606         c1 = i1.next(status);
    607         c2 = i2.next(status);
    608 
    609         if (c1 != c2)
    610         {
    611             errln("    %d: strength(0x%X) != strength(0x%X)", count, c1, c2);
    612             break;
    613         }
    614 
    615         count += 1;
    616     }
    617     while (c1 != CollationElementIterator::NULLORDER);
    618 }
    619 
    620 void CollationIteratorTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/)
    621 {
    622     if (exec)
    623     {
    624         logln("Collation Iteration Tests: ");
    625     }
    626 
    627     if(en_us) {
    628       switch (index)
    629       {
    630           case  0: name = "TestPrevious";      if (exec) TestPrevious(/* par */);     break;
    631           case  1: name = "TestOffset";        if (exec) TestOffset(/* par */);       break;
    632           case  2: name = "TestSetText";       if (exec) TestSetText(/* par */);      break;
    633           case  3: name = "TestMaxExpansion";  if (exec) TestMaxExpansion(/* par */); break;
    634           case  4: name = "TestClearBuffers";  if (exec) TestClearBuffers(/* par */); break;
    635           case  5: name = "TestUnicodeChar";   if (exec) TestUnicodeChar(/* par */);  break;
    636           case  6: name = "TestAssignment";    if (exec) TestAssignment(/* par */);    break;
    637           case  7: name = "TestConstructors";  if (exec) TestConstructors(/* par */); break;
    638           case  8: name = "TestStrengthOrder"; if (exec) TestStrengthOrder(/* par */); break;
    639           default: name = ""; break;
    640       }
    641     } else {
    642       dataerrln("Class iterator not instantiated");
    643       name = "";
    644     }
    645 }
    646 
    647 #endif /* #if !UCONFIG_NO_COLLATION */
    648