Home | History | Annotate | Download | only in intltest
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2009, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 
      7 /**
      8  * IntlTestCollator is the medium level test class for everything in the directory "collate".
      9  */
     10 
     11 /***********************************************************************
     12 * Modification history
     13 * Date        Name        Description
     14 * 02/14/2001  synwee      Compare with cintltst and commented away tests
     15 *                         that are not run.
     16 ***********************************************************************/
     17 
     18 #include "unicode/utypes.h"
     19 
     20 #if !UCONFIG_NO_COLLATION
     21 
     22 #include "unicode/uchar.h"
     23 #include "unicode/ustring.h"
     24 
     25 #include "dadrcoll.h"
     26 
     27 #include "encoll.h"
     28 #include "frcoll.h"
     29 #include "decoll.h"
     30 #include "escoll.h"
     31 #include "ficoll.h"
     32 #include "jacoll.h"
     33 #include "trcoll.h"
     34 #include "allcoll.h"
     35 #include "g7coll.h"
     36 #include "mnkytst.h"
     37 #include "apicoll.h"
     38 #include "regcoll.h"
     39 #include "currcoll.h"
     40 #include "itercoll.h"
     41 #include "tstnorm.h"
     42 #include "normconf.h"
     43 #include "thcoll.h"
     44 #include "srchtest.h"
     45 #include "ssearch.h"
     46 #include "cntabcol.h"
     47 #include "lcukocol.h"
     48 #include "ucaconf.h"
     49 #include "svccoll.h"
     50 #include "cmemory.h"
     51 //#include "rndmcoll.h"
     52 
     53 // Set to 1 to test offsets in backAndForth()
     54 #define TEST_OFFSETS 0
     55 
     56 #define TESTCLASS(n,classname)        \
     57     case n:                           \
     58         name = #classname;            \
     59         if (exec) {                   \
     60             logln(#classname "---");  \
     61             logln("");                \
     62             classname t;              \
     63             callTest(t, par);         \
     64         }                             \
     65         break
     66 
     67 void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
     68 {
     69     if (exec) {
     70         logln("TestSuite Collator: ");
     71     }
     72 
     73     switch (index) {
     74       TESTCLASS(0, CollationEnglishTest);
     75       TESTCLASS(1, CollationFrenchTest);
     76       TESTCLASS(2, CollationGermanTest);
     77       TESTCLASS(3, CollationSpanishTest);
     78       TESTCLASS(4, CollationKanaTest);
     79       TESTCLASS(5, CollationTurkishTest);
     80       TESTCLASS(6, CollationDummyTest);
     81       TESTCLASS(7, G7CollationTest);
     82       TESTCLASS(8, CollationMonkeyTest);
     83       TESTCLASS(9, CollationAPITest);
     84       TESTCLASS(10, CollationRegressionTest);
     85       TESTCLASS(11, CollationCurrencyTest);
     86       TESTCLASS(12, CollationIteratorTest);
     87       TESTCLASS(13, CollationThaiTest);
     88       TESTCLASS(14, LotusCollationKoreanTest);
     89       TESTCLASS(15, StringSearchTest);
     90       TESTCLASS(16, ContractionTableTest);
     91       TESTCLASS(17, DataDrivenCollatorTest);
     92       TESTCLASS(18, UCAConformanceTest);
     93       TESTCLASS(19, CollationServiceTest);
     94       TESTCLASS(20, CollationFinnishTest); // removed by weiv - we have changed Finnish collation
     95       //TESTCLASS(21, RandomCollatorTest); // See ticket 5747 about reenabling this test.
     96       TESTCLASS(21, SSearchTest);
     97 
     98       default: name = ""; break;
     99     }
    100 }
    101 
    102 UCollationResult
    103 IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) {
    104   int32_t partialSKResult = 0;
    105   uint8_t sBuf[512], tBuf[512];
    106   UCharIterator sIter, tIter;
    107   uint32_t sState[2], tState[2];
    108   int32_t sSize = pieceSize, tSize = pieceSize;
    109   int32_t i = 0;
    110   status = U_ZERO_ERROR;
    111   sState[0] = 0; sState[1] = 0;
    112   tState[0] = 0; tState[1] = 0;
    113   while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
    114     uiter_setString(&sIter, source, sLen);
    115     uiter_setString(&tIter, target, tLen);
    116     sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status);
    117     tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status);
    118 
    119     if(sState[0] != 0 || tState[0] != 0) {
    120       log("State != 0 : %08X %08X\n", sState[0], tState[0]);
    121     }
    122     log("%i ", i++);
    123 
    124     partialSKResult = memcmp(sBuf, tBuf, pieceSize);
    125   }
    126 
    127   if(partialSKResult < 0) {
    128       return UCOL_LESS;
    129   } else if(partialSKResult > 0) {
    130     return UCOL_GREATER;
    131   } else {
    132     return UCOL_EQUAL;
    133   }
    134 }
    135 
    136 void
    137 IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
    138 {
    139   UErrorCode status = U_ZERO_ERROR;
    140 
    141   UCollator *myCollation = (UCollator *)((RuleBasedCollator *)col)->getUCollator();
    142 
    143   Collator::EComparisonResult compareResult = col->compare(source, target);
    144 
    145   CollationKey srckey, tgtkey;
    146   col->getCollationKey(source, srckey, status);
    147   col->getCollationKey(target, tgtkey, status);
    148   if (U_FAILURE(status)){
    149     errln("Creation of collation keys failed\n");
    150   }
    151   Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey);
    152 
    153   reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result);
    154 
    155     UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
    156 
    157     int32_t sLen = source.length(), tLen = target.length();
    158     const UChar* src = source.getBuffer();
    159     const UChar* trg = target.getBuffer();
    160     UCollationResult compareResultIter = (UCollationResult)result;
    161 
    162     {
    163       UCharIterator sIter, tIter;
    164       uiter_setString(&sIter, src, sLen);
    165       uiter_setString(&tIter, trg, tLen);
    166       compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
    167       if(compareResultIter != (UCollationResult)result) {
    168         errln("Different result for iterative comparison "+source+" "+target);
    169       }
    170     }
    171     /* convert the strings to UTF-8 and do try comparing with char iterator */
    172     if(!quick) { /*!QUICK*/
    173       char utf8Source[256], utf8Target[256];
    174       int32_t utf8SourceLen = 0, utf8TargetLen = 0;
    175       u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status);
    176       if(U_FAILURE(status)) { /* probably buffer is not big enough */
    177         log("Src UTF-8 buffer too small! Will not compare!\n");
    178       } else {
    179         u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status);
    180         if(U_SUCCESS(status)) { /* probably buffer is not big enough */
    181           UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result;
    182           UCharIterator sIter, tIter;
    183           /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
    184           uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
    185           uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
    186        /*uiter_setString(&sIter, source, sLen);
    187       uiter_setString(&tIter, target, tLen);*/
    188           compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
    189           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    190           sIter.move(&sIter, 0, UITER_START);
    191           tIter.move(&tIter, 0, UITER_START);
    192           compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
    193           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
    194           if(compareResultUTF8 != compareResultIter) {
    195             errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target);
    196           }
    197           if(compareResultUTF8 != compareResultUTF8Norm) {
    198             errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target);
    199           }
    200         } else {
    201           log("Target UTF-8 buffer too small! Did not compare!\n");
    202         }
    203         if(U_FAILURE(status)) {
    204           log("UTF-8 strcoll failed! Ignoring result\n");
    205         }
    206       }
    207     }
    208 
    209     /* testing the partial sortkeys */
    210     { /*!QUICK*/
    211       int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
    212       int32_t partialSizesSize = 1;
    213       if(!quick) {
    214         partialSizesSize = 7;
    215       }
    216       int32_t i = 0;
    217       log("partial sortkey test piecesize=");
    218       for(i = 0; i < partialSizesSize; i++) {
    219         UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result;
    220         log("%i ", partialSizes[i]);
    221 
    222         partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
    223         if(partialSKResult != (UCollationResult)result) {
    224           errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")");
    225         }
    226 
    227         if(norm != UCOL_ON && !quick) {
    228           log("N ");
    229           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    230           partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
    231           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
    232           if(partialSKResult != partialNormalizedSKResult) {
    233             errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")");
    234           }
    235         }
    236       }
    237       log("\n");
    238     }
    239 /*
    240   if (compareResult != result) {
    241     errln("String comparison failed in variant test\n");
    242   }
    243   if (keyResult != result) {
    244     errln("Collation key comparison failed in variant test\n");
    245   }
    246 */
    247 }
    248 
    249 void
    250 IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) {
    251   doTest(col, UnicodeString(source), UnicodeString(target), result);
    252 }
    253 
    254 void
    255 IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
    256 {
    257   if(col) {
    258     doTestVariant(col, source, target, result);
    259     if(result == Collator::LESS) {
    260       doTestVariant(col, target, source, Collator::GREATER);
    261     } else if (result == Collator::GREATER) {
    262       doTestVariant(col, target, source, Collator::LESS);
    263     }
    264 
    265     UErrorCode status = U_ZERO_ERROR;
    266     CollationElementIterator* c = ((RuleBasedCollator *)col)->createCollationElementIterator( source );
    267     logln("Testing iterating source: "+source);
    268     backAndForth(*c);
    269     c->setText(target, status);
    270     logln("Testing iterating target: "+target);
    271     backAndForth(*c);
    272     delete c;
    273   }
    274 }
    275 
    276 
    277 // used for collation result reporting, defined here for convenience
    278 // (maybe moved later)
    279 void
    280 IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target,
    281              CollationKey &sourceKey, CollationKey &targetKey,
    282              Collator::EComparisonResult compareResult,
    283              Collator::EComparisonResult keyResult,
    284                                 Collator::EComparisonResult incResult,
    285                          Collator::EComparisonResult expectedResult )
    286 {
    287     if (expectedResult < -1 || expectedResult > 1)
    288     {
    289         errln("***** invalid call to reportCResult ****");
    290         return;
    291     }
    292 
    293     UBool ok1 = (compareResult == expectedResult);
    294     UBool ok2 = (keyResult == expectedResult);
    295     UBool ok3 = (incResult == expectedResult);
    296 
    297 
    298     if (ok1 && ok2 && ok3 && !verbose) {
    299         // Keep non-verbose, passing tests fast
    300         return;
    301     } else {
    302         UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare(");
    303         UnicodeString msg2(", "), msg3(") returned "), msg4("; expected ");
    304         UnicodeString prettySource, prettyTarget, sExpect, sResult;
    305 
    306         IntlTest::prettify(source, prettySource);
    307         IntlTest::prettify(target, prettyTarget);
    308         appendCompareResult(compareResult, sResult);
    309         appendCompareResult(expectedResult, sExpect);
    310 
    311         if (ok1) {
    312             logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
    313         } else {
    314             errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
    315         }
    316 
    317         msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key(");
    318         msg2 = ").compareTo(key(";
    319         msg3 = ")) returned ";
    320 
    321         appendCompareResult(keyResult, sResult);
    322 
    323         if (ok2) {
    324             logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
    325         } else {
    326             errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
    327 
    328             msg1 = "  ";
    329             msg2 = " vs. ";
    330 
    331             prettify(sourceKey, prettySource);
    332             prettify(targetKey, prettyTarget);
    333 
    334             errln(msg1 + prettySource + msg2 + prettyTarget);
    335         }
    336         msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare(");
    337         msg2 = ", ";
    338         msg3 = ") returned ";
    339 
    340         appendCompareResult(incResult, sResult);
    341 
    342         if (ok3) {
    343             logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
    344         } else {
    345             errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
    346         }
    347     }
    348 }
    349 
    350 UnicodeString&
    351 IntlTestCollator::appendCompareResult(Collator::EComparisonResult result,
    352                   UnicodeString& target)
    353 {
    354     if (result == Collator::LESS)
    355     {
    356         target += "LESS";
    357     }
    358     else if (result == Collator::EQUAL)
    359     {
    360         target += "EQUAL";
    361     }
    362     else if (result == Collator::GREATER)
    363     {
    364         target += "GREATER";
    365     }
    366     else
    367     {
    368         UnicodeString huh = "?";
    369 
    370         target += (huh + (int32_t)result);
    371     }
    372 
    373     return target;
    374 }
    375 
    376 // Produce a printable representation of a CollationKey
    377 UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target)
    378 {
    379     int32_t i, byteCount;
    380     const uint8_t *bytes = source.getByteArray(byteCount);
    381 
    382     target.remove();
    383     target += "[";
    384 
    385     for (i = 0; i < byteCount; i += 1)
    386     {
    387         appendHex(bytes[i], 2, target);
    388         target += " ";
    389     }
    390 
    391     target += "]";
    392 
    393     return target;
    394 }
    395 
    396 void IntlTestCollator::backAndForth(CollationElementIterator &iter)
    397 {
    398     // Run through the iterator forwards and stick it into an array
    399     int32_t orderLength = 0;
    400     Order *orders = getOrders(iter, orderLength);
    401     UErrorCode status = U_ZERO_ERROR;
    402 
    403     // Now go through it backwards and make sure we get the same values
    404     int32_t index = orderLength;
    405     int32_t o;
    406 
    407     // reset the iterator
    408     iter.reset();
    409 
    410     while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
    411     {
    412         /*int32_t offset = */iter.getOffset();
    413 
    414         if (index == 0) {
    415           if(o == 0) {
    416             continue;
    417           } else { // this is an error, orders exhausted but there are non-ignorable CEs from
    418             // going backwards
    419             errln("Backward iteration returned a non ignorable after orders are exhausted");
    420             break;
    421           }
    422         }
    423 
    424         index -= 1;
    425         if (o != orders[index].order) {
    426             if (o == 0)
    427                 index += 1;
    428             else {
    429                 while (index > 0 && orders[--index].order == 0) {
    430                   // nothing...
    431                 }
    432 
    433                 if (o != orders[index].order) {
    434                     errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index,
    435                     orders[index].order, o);
    436                 //break;
    437                   goto bail;
    438                 }
    439             }
    440         }
    441 
    442 #if TEST_OFFSETS
    443         if (offset != orders[index].offset) {
    444           errln("Mismatched offset at index %d: %d vs. %d", index,
    445             orders[index].offset, offset);
    446        //break;
    447          goto bail;
    448         }
    449 #endif
    450 
    451     }
    452 
    453     while (index != 0 && orders[index - 1].order == 0)
    454     {
    455       index --;
    456     }
    457 
    458     if (index != 0)
    459     {
    460         UnicodeString msg("Didn't get back to beginning - index is ");
    461         errln(msg + index);
    462 
    463         iter.reset();
    464         err("next: ");
    465         while ((o = iter.next(status)) != CollationElementIterator::NULLORDER)
    466         {
    467             UnicodeString hexString("0x");
    468 
    469             appendHex(o, 8, hexString);
    470             hexString += " ";
    471             err(hexString);
    472         }
    473         errln("");
    474 
    475         err("prev: ");
    476         while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
    477         {
    478             UnicodeString hexString("0x");
    479 
    480             appendHex(o, 8, hexString);
    481             hexString += " ";
    482              err(hexString);
    483         }
    484         errln("");
    485     }
    486 
    487 bail:
    488     delete[] orders;
    489 }
    490 
    491 
    492 /**
    493  * Return an integer array containing all of the collation orders
    494  * returned by calls to next on the specified iterator
    495  */
    496 IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength)
    497 {
    498     int32_t maxSize = 100;
    499     int32_t size = 0;
    500     Order *orders = new Order[maxSize];
    501     UErrorCode status = U_ZERO_ERROR;
    502     int32_t offset = iter.getOffset();
    503 
    504     int32_t order;
    505     while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
    506     {
    507         if (size == maxSize)
    508         {
    509             maxSize *= 2;
    510             Order *temp = new Order[maxSize];
    511 
    512             uprv_memcpy(temp, orders, size * sizeof(Order));
    513             delete[] orders;
    514             orders = temp;
    515         }
    516 
    517         orders[size].order  = order;
    518         orders[size].offset = offset;
    519 
    520         offset = iter.getOffset();
    521         size += 1;
    522     }
    523 
    524     if (maxSize > size)
    525     {
    526         Order *temp = new Order[size];
    527 
    528         uprv_memcpy(temp, orders, size * sizeof(Order));
    529         delete[] orders;
    530         orders = temp;
    531     }
    532 
    533     orderLength = size;
    534     return orders;
    535 }
    536 
    537 #endif /* #if !UCONFIG_NO_COLLATION */
    538