Home | History | Annotate | Download | only in intltest
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2009, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 
      7 /**
      8  * IntlTestCollator is the medium level test class for everything in the directory "collate".
      9  */
     10 
     11 /***********************************************************************
     12 * Modification history
     13 * Date        Name        Description
     14 * 02/14/2001  synwee      Compare with cintltst and commented away tests
     15 *                         that are not run.
     16 ***********************************************************************/
     17 
     18 #include "unicode/utypes.h"
     19 
     20 #if !UCONFIG_NO_COLLATION
     21 
     22 #include "unicode/localpointer.h"
     23 #include "unicode/uchar.h"
     24 #include "unicode/ustring.h"
     25 
     26 #include "dadrcoll.h"
     27 
     28 #include "encoll.h"
     29 #include "frcoll.h"
     30 #include "decoll.h"
     31 #include "escoll.h"
     32 #include "ficoll.h"
     33 #include "jacoll.h"
     34 #include "trcoll.h"
     35 #include "allcoll.h"
     36 #include "g7coll.h"
     37 #include "mnkytst.h"
     38 #include "apicoll.h"
     39 #include "regcoll.h"
     40 #include "currcoll.h"
     41 #include "itercoll.h"
     42 #include "tstnorm.h"
     43 #include "normconf.h"
     44 #include "thcoll.h"
     45 #include "srchtest.h"
     46 #include "ssearch.h"
     47 #include "cntabcol.h"
     48 #include "lcukocol.h"
     49 #include "ucaconf.h"
     50 #include "svccoll.h"
     51 #include "cmemory.h"
     52 //#include "rndmcoll.h"
     53 
     54 // Set to 1 to test offsets in backAndForth()
     55 #define TEST_OFFSETS 0
     56 
     57 #define TESTCLASS(n,classname)        \
     58     case n:                           \
     59         name = #classname;            \
     60         if (exec) {                   \
     61             logln(#classname "---");  \
     62             logln("");                \
     63             classname t;              \
     64             callTest(t, par);         \
     65         }                             \
     66         break
     67 
     68 void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
     69 {
     70     if (exec) {
     71         logln("TestSuite Collator: ");
     72     }
     73 
     74     switch (index) {
     75       TESTCLASS(0, CollationEnglishTest);
     76       TESTCLASS(1, CollationFrenchTest);
     77       TESTCLASS(2, CollationGermanTest);
     78       TESTCLASS(3, CollationSpanishTest);
     79       TESTCLASS(4, CollationKanaTest);
     80       TESTCLASS(5, CollationTurkishTest);
     81       TESTCLASS(6, CollationDummyTest);
     82       TESTCLASS(7, G7CollationTest);
     83       TESTCLASS(8, CollationMonkeyTest);
     84       TESTCLASS(9, CollationAPITest);
     85       TESTCLASS(10, CollationRegressionTest);
     86       TESTCLASS(11, CollationCurrencyTest);
     87       TESTCLASS(12, CollationIteratorTest);
     88       TESTCLASS(13, CollationThaiTest);
     89       TESTCLASS(14, LotusCollationKoreanTest);
     90       TESTCLASS(15, StringSearchTest);
     91       TESTCLASS(16, ContractionTableTest);
     92 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
     93       TESTCLASS(17, DataDrivenCollatorTest);
     94 #endif
     95       TESTCLASS(18, UCAConformanceTest);
     96       TESTCLASS(19, CollationServiceTest);
     97       TESTCLASS(20, CollationFinnishTest); // removed by weiv - we have changed Finnish collation
     98       //TESTCLASS(21, RandomCollatorTest); // See ticket 5747 about reenabling this test.
     99       TESTCLASS(21, SSearchTest);
    100 
    101       default: name = ""; break;
    102     }
    103 }
    104 
    105 UCollationResult
    106 IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) {
    107   int32_t partialSKResult = 0;
    108   uint8_t sBuf[512], tBuf[512];
    109   UCharIterator sIter, tIter;
    110   uint32_t sState[2], tState[2];
    111   int32_t sSize = pieceSize, tSize = pieceSize;
    112   int32_t i = 0;
    113   status = U_ZERO_ERROR;
    114   sState[0] = 0; sState[1] = 0;
    115   tState[0] = 0; tState[1] = 0;
    116   while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
    117     uiter_setString(&sIter, source, sLen);
    118     uiter_setString(&tIter, target, tLen);
    119     sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status);
    120     tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status);
    121 
    122     if(sState[0] != 0 || tState[0] != 0) {
    123       log("State != 0 : %08X %08X\n", sState[0], tState[0]);
    124     }
    125     log("%i ", i++);
    126 
    127     partialSKResult = memcmp(sBuf, tBuf, pieceSize);
    128   }
    129 
    130   if(partialSKResult < 0) {
    131       return UCOL_LESS;
    132   } else if(partialSKResult > 0) {
    133     return UCOL_GREATER;
    134   } else {
    135     return UCOL_EQUAL;
    136   }
    137 }
    138 
    139 void
    140 IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
    141 {
    142   UErrorCode status = U_ZERO_ERROR;
    143 
    144   UCollator *myCollation = (UCollator *)((RuleBasedCollator *)col)->getUCollator();
    145 
    146   Collator::EComparisonResult compareResult = col->compare(source, target);
    147 
    148   CollationKey srckey, tgtkey;
    149   col->getCollationKey(source, srckey, status);
    150   col->getCollationKey(target, tgtkey, status);
    151   if (U_FAILURE(status)){
    152     errln("Creation of collation keys failed\n");
    153   }
    154   Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey);
    155 
    156   reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result);
    157 
    158     UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
    159 
    160     int32_t sLen = source.length(), tLen = target.length();
    161     const UChar* src = source.getBuffer();
    162     const UChar* trg = target.getBuffer();
    163     UCollationResult compareResultIter = (UCollationResult)result;
    164 
    165     {
    166       UCharIterator sIter, tIter;
    167       uiter_setString(&sIter, src, sLen);
    168       uiter_setString(&tIter, trg, tLen);
    169       compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
    170       if(compareResultIter != (UCollationResult)result) {
    171         errln("Different result for iterative comparison "+source+" "+target);
    172       }
    173     }
    174     /* convert the strings to UTF-8 and do try comparing with char iterator */
    175     if(!quick) { /*!QUICK*/
    176       char utf8Source[256], utf8Target[256];
    177       int32_t utf8SourceLen = 0, utf8TargetLen = 0;
    178       u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status);
    179       if(U_FAILURE(status)) { /* probably buffer is not big enough */
    180         log("Src UTF-8 buffer too small! Will not compare!\n");
    181       } else {
    182         u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status);
    183         if(U_SUCCESS(status)) { /* probably buffer is not big enough */
    184           UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result;
    185           UCharIterator sIter, tIter;
    186           /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
    187           uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
    188           uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
    189        /*uiter_setString(&sIter, source, sLen);
    190       uiter_setString(&tIter, target, tLen);*/
    191           compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
    192           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    193           sIter.move(&sIter, 0, UITER_START);
    194           tIter.move(&tIter, 0, UITER_START);
    195           compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
    196           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
    197           if(compareResultUTF8 != compareResultIter) {
    198             errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target);
    199           }
    200           if(compareResultUTF8 != compareResultUTF8Norm) {
    201             errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target);
    202           }
    203         } else {
    204           log("Target UTF-8 buffer too small! Did not compare!\n");
    205         }
    206         if(U_FAILURE(status)) {
    207           log("UTF-8 strcoll failed! Ignoring result\n");
    208         }
    209       }
    210     }
    211 
    212     /* testing the partial sortkeys */
    213     { /*!QUICK*/
    214       int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
    215       int32_t partialSizesSize = 1;
    216       if(!quick) {
    217         partialSizesSize = 7;
    218       }
    219       int32_t i = 0;
    220       log("partial sortkey test piecesize=");
    221       for(i = 0; i < partialSizesSize; i++) {
    222         UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result;
    223         log("%i ", partialSizes[i]);
    224 
    225         partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
    226         if(partialSKResult != (UCollationResult)result) {
    227           errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")");
    228         }
    229 
    230         if(norm != UCOL_ON && !quick) {
    231           log("N ");
    232           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    233           partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
    234           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
    235           if(partialSKResult != partialNormalizedSKResult) {
    236             errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")");
    237           }
    238         }
    239       }
    240       log("\n");
    241     }
    242 /*
    243   if (compareResult != result) {
    244     errln("String comparison failed in variant test\n");
    245   }
    246   if (keyResult != result) {
    247     errln("Collation key comparison failed in variant test\n");
    248   }
    249 */
    250 }
    251 
    252 void
    253 IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) {
    254   doTest(col, UnicodeString(source), UnicodeString(target), result);
    255 }
    256 
    257 void
    258 IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
    259 {
    260   if(col) {
    261     doTestVariant(col, source, target, result);
    262     if(result == Collator::LESS) {
    263       doTestVariant(col, target, source, Collator::GREATER);
    264     } else if (result == Collator::GREATER) {
    265       doTestVariant(col, target, source, Collator::LESS);
    266     }
    267 
    268     UErrorCode status = U_ZERO_ERROR;
    269     LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source));
    270     logln("Testing iterating source: "+source);
    271     backAndForth(*c);
    272     c->setText(target, status);
    273     logln("Testing iterating target: "+target);
    274     backAndForth(*c);
    275   }
    276 }
    277 
    278 
    279 // used for collation result reporting, defined here for convenience
    280 // (maybe moved later)
    281 void
    282 IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target,
    283              CollationKey &sourceKey, CollationKey &targetKey,
    284              Collator::EComparisonResult compareResult,
    285              Collator::EComparisonResult keyResult,
    286                                 Collator::EComparisonResult incResult,
    287                          Collator::EComparisonResult expectedResult )
    288 {
    289     if (expectedResult < -1 || expectedResult > 1)
    290     {
    291         errln("***** invalid call to reportCResult ****");
    292         return;
    293     }
    294 
    295     UBool ok1 = (compareResult == expectedResult);
    296     UBool ok2 = (keyResult == expectedResult);
    297     UBool ok3 = (incResult == expectedResult);
    298 
    299 
    300     if (ok1 && ok2 && ok3 && !verbose) {
    301         // Keep non-verbose, passing tests fast
    302         return;
    303     } else {
    304         UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare(");
    305         UnicodeString msg2(", "), msg3(") returned "), msg4("; expected ");
    306         UnicodeString prettySource, prettyTarget, sExpect, sResult;
    307 
    308         IntlTest::prettify(source, prettySource);
    309         IntlTest::prettify(target, prettyTarget);
    310         appendCompareResult(compareResult, sResult);
    311         appendCompareResult(expectedResult, sExpect);
    312 
    313         if (ok1) {
    314             logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
    315         } else {
    316             errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
    317         }
    318 
    319         msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key(");
    320         msg2 = ").compareTo(key(";
    321         msg3 = ")) returned ";
    322 
    323         appendCompareResult(keyResult, sResult);
    324 
    325         if (ok2) {
    326             logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
    327         } else {
    328             errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
    329 
    330             msg1 = "  ";
    331             msg2 = " vs. ";
    332 
    333             prettify(sourceKey, prettySource);
    334             prettify(targetKey, prettyTarget);
    335 
    336             errln(msg1 + prettySource + msg2 + prettyTarget);
    337         }
    338         msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare(");
    339         msg2 = ", ";
    340         msg3 = ") returned ";
    341 
    342         appendCompareResult(incResult, sResult);
    343 
    344         if (ok3) {
    345             logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
    346         } else {
    347             errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
    348         }
    349     }
    350 }
    351 
    352 UnicodeString&
    353 IntlTestCollator::appendCompareResult(Collator::EComparisonResult result,
    354                   UnicodeString& target)
    355 {
    356     if (result == Collator::LESS)
    357     {
    358         target += "LESS";
    359     }
    360     else if (result == Collator::EQUAL)
    361     {
    362         target += "EQUAL";
    363     }
    364     else if (result == Collator::GREATER)
    365     {
    366         target += "GREATER";
    367     }
    368     else
    369     {
    370         UnicodeString huh = "?";
    371 
    372         target += (huh + (int32_t)result);
    373     }
    374 
    375     return target;
    376 }
    377 
    378 // Produce a printable representation of a CollationKey
    379 UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target)
    380 {
    381     int32_t i, byteCount;
    382     const uint8_t *bytes = source.getByteArray(byteCount);
    383 
    384     target.remove();
    385     target += "[";
    386 
    387     for (i = 0; i < byteCount; i += 1)
    388     {
    389         if (i != 0) {
    390             target += " ";
    391         }
    392         appendHex(bytes[i], 2, target);
    393     }
    394 
    395     target += "]";
    396 
    397     return target;
    398 }
    399 
    400 void IntlTestCollator::backAndForth(CollationElementIterator &iter)
    401 {
    402     // Run through the iterator forwards and stick it into an array
    403     int32_t orderLength = 0;
    404     LocalArray<Order> orders(getOrders(iter, orderLength));
    405     UErrorCode status = U_ZERO_ERROR;
    406 
    407     // Now go through it backwards and make sure we get the same values
    408     int32_t index = orderLength;
    409     int32_t o;
    410 
    411     // reset the iterator
    412     iter.reset();
    413 
    414     while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
    415     {
    416         /*int32_t offset = */iter.getOffset();
    417 
    418         if (index == 0) {
    419           if(o == 0) {
    420             continue;
    421           } else { // this is an error, orders exhausted but there are non-ignorable CEs from
    422             // going backwards
    423             errln("Backward iteration returned a non ignorable after orders are exhausted");
    424             break;
    425           }
    426         }
    427 
    428         index -= 1;
    429         if (o != orders[index].order) {
    430             if (o == 0)
    431                 index += 1;
    432             else {
    433                 while (index > 0 && orders[--index].order == 0) {
    434                   // nothing...
    435                 }
    436 
    437                 if (o != orders[index].order) {
    438                     errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index,
    439                     orders[index].order, o);
    440                 //break;
    441                   return;
    442                 }
    443             }
    444         }
    445 
    446 #if TEST_OFFSETS
    447         if (offset != orders[index].offset) {
    448           errln("Mismatched offset at index %d: %d vs. %d", index,
    449             orders[index].offset, offset);
    450        //break;
    451          return;
    452         }
    453 #endif
    454 
    455     }
    456 
    457     while (index != 0 && orders[index - 1].order == 0)
    458     {
    459       index --;
    460     }
    461 
    462     if (index != 0)
    463     {
    464         UnicodeString msg("Didn't get back to beginning - index is ");
    465         errln(msg + index);
    466 
    467         iter.reset();
    468         err("next: ");
    469         while ((o = iter.next(status)) != CollationElementIterator::NULLORDER)
    470         {
    471             UnicodeString hexString("0x");
    472 
    473             appendHex(o, 8, hexString);
    474             hexString += " ";
    475             err(hexString);
    476         }
    477         errln("");
    478 
    479         err("prev: ");
    480         while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
    481         {
    482             UnicodeString hexString("0x");
    483 
    484             appendHex(o, 8, hexString);
    485             hexString += " ";
    486              err(hexString);
    487         }
    488         errln("");
    489     }
    490 }
    491 
    492 
    493 /**
    494  * Return an integer array containing all of the collation orders
    495  * returned by calls to next on the specified iterator
    496  */
    497 IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength)
    498 {
    499     int32_t maxSize = 100;
    500     int32_t size = 0;
    501     LocalArray<Order> orders(new Order[maxSize]);
    502     UErrorCode status = U_ZERO_ERROR;
    503     int32_t offset = iter.getOffset();
    504 
    505     int32_t order;
    506     while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
    507     {
    508         if (size == maxSize)
    509         {
    510             maxSize *= 2;
    511             Order *temp = new Order[maxSize];
    512 
    513             uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
    514             orders.adoptInstead(temp);
    515         }
    516 
    517         orders[size].order  = order;
    518         orders[size].offset = offset;
    519 
    520         offset = iter.getOffset();
    521         size += 1;
    522     }
    523 
    524     if (maxSize > size)
    525     {
    526         Order *temp = new Order[size];
    527 
    528         uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
    529         orders.adoptInstead(temp);
    530     }
    531 
    532     orderLength = size;
    533     return orders.orphan();
    534 }
    535 
    536 #endif /* #if !UCONFIG_NO_COLLATION */
    537