Home | History | Annotate | Download | only in intltest
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2011, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 
      7 /**
      8  * IntlTestCollator is the medium level test class for everything in the directory "collate".
      9  */
     10 
     11 /***********************************************************************
     12 * Modification history
     13 * Date        Name        Description
     14 * 02/14/2001  synwee      Compare with cintltst and commented away tests
     15 *                         that are not run.
     16 ***********************************************************************/
     17 
     18 #include "unicode/utypes.h"
     19 
     20 #if !UCONFIG_NO_COLLATION
     21 
     22 #include "unicode/localpointer.h"
     23 #include "unicode/uchar.h"
     24 #include "unicode/ustring.h"
     25 
     26 #include "dadrcoll.h"
     27 
     28 #include "encoll.h"
     29 #include "frcoll.h"
     30 #include "decoll.h"
     31 #include "escoll.h"
     32 #include "ficoll.h"
     33 #include "jacoll.h"
     34 #include "trcoll.h"
     35 #include "allcoll.h"
     36 #include "g7coll.h"
     37 #include "mnkytst.h"
     38 #include "apicoll.h"
     39 #include "regcoll.h"
     40 #include "currcoll.h"
     41 #include "itercoll.h"
     42 #include "tstnorm.h"
     43 #include "normconf.h"
     44 #include "thcoll.h"
     45 #include "srchtest.h"
     46 #include "ssearch.h"
     47 #include "cntabcol.h"
     48 #include "lcukocol.h"
     49 #include "ucaconf.h"
     50 #include "svccoll.h"
     51 #include "cmemory.h"
     52 #include "alphaindextst.h"
     53 //#include "rndmcoll.h"
     54 
     55 // Set to 1 to test offsets in backAndForth()
     56 #define TEST_OFFSETS 0
     57 
     58 #define TESTCLASS(n,classname)        \
     59     case n:                           \
     60         name = #classname;            \
     61         if (exec) {                   \
     62             logln(#classname "---");  \
     63             logln("");                \
     64             classname t;              \
     65             callTest(t, par);         \
     66         }                             \
     67         break
     68 
     69 void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
     70 {
     71     if (exec) {
     72         logln("TestSuite Collator: ");
     73     }
     74 
     75     switch (index) {
     76       TESTCLASS(0, CollationEnglishTest);
     77       TESTCLASS(1, CollationFrenchTest);
     78       TESTCLASS(2, CollationGermanTest);
     79       TESTCLASS(3, CollationSpanishTest);
     80       TESTCLASS(4, CollationKanaTest);
     81       TESTCLASS(5, CollationTurkishTest);
     82       TESTCLASS(6, CollationDummyTest);
     83       TESTCLASS(7, G7CollationTest);
     84       TESTCLASS(8, CollationMonkeyTest);
     85       TESTCLASS(9, CollationAPITest);
     86       TESTCLASS(10, CollationRegressionTest);
     87       TESTCLASS(11, CollationCurrencyTest);
     88       TESTCLASS(12, CollationIteratorTest);
     89       TESTCLASS(13, CollationThaiTest);
     90       TESTCLASS(14, LotusCollationKoreanTest);
     91       TESTCLASS(15, StringSearchTest);
     92       TESTCLASS(16, ContractionTableTest);
     93 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
     94       TESTCLASS(17, DataDrivenCollatorTest);
     95 #endif
     96       TESTCLASS(18, UCAConformanceTest);
     97       TESTCLASS(19, CollationServiceTest);
     98       TESTCLASS(20, CollationFinnishTest); // removed by weiv - we have changed Finnish collation
     99       //TESTCLASS(21, RandomCollatorTest); // See ticket 5747 about reenabling this test.
    100       TESTCLASS(21, SSearchTest);
    101       TESTCLASS(22, AlphabeticIndexTest);
    102 
    103       default: name = ""; break;
    104     }
    105 }
    106 
    107 UCollationResult
    108 IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) {
    109   int32_t partialSKResult = 0;
    110   uint8_t sBuf[512], tBuf[512];
    111   UCharIterator sIter, tIter;
    112   uint32_t sState[2], tState[2];
    113   int32_t sSize = pieceSize, tSize = pieceSize;
    114   int32_t i = 0;
    115   status = U_ZERO_ERROR;
    116   sState[0] = 0; sState[1] = 0;
    117   tState[0] = 0; tState[1] = 0;
    118   while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
    119     uiter_setString(&sIter, source, sLen);
    120     uiter_setString(&tIter, target, tLen);
    121     sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status);
    122     tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status);
    123 
    124     if(sState[0] != 0 || tState[0] != 0) {
    125       log("State != 0 : %08X %08X\n", sState[0], tState[0]);
    126     }
    127     log("%i ", i++);
    128 
    129     partialSKResult = memcmp(sBuf, tBuf, pieceSize);
    130   }
    131 
    132   if(partialSKResult < 0) {
    133       return UCOL_LESS;
    134   } else if(partialSKResult > 0) {
    135     return UCOL_GREATER;
    136   } else {
    137     return UCOL_EQUAL;
    138   }
    139 }
    140 
    141 void
    142 IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
    143 {
    144   UErrorCode status = U_ZERO_ERROR;
    145 
    146   UCollator *myCollation = (UCollator *)((RuleBasedCollator *)col)->getUCollator();
    147 
    148   Collator::EComparisonResult compareResult = col->compare(source, target);
    149 
    150   CollationKey srckey, tgtkey;
    151   col->getCollationKey(source, srckey, status);
    152   col->getCollationKey(target, tgtkey, status);
    153   if (U_FAILURE(status)){
    154     errln("Creation of collation keys failed\n");
    155   }
    156   Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey);
    157 
    158   reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result);
    159 
    160     UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
    161 
    162     int32_t sLen = source.length(), tLen = target.length();
    163     const UChar* src = source.getBuffer();
    164     const UChar* trg = target.getBuffer();
    165     UCollationResult compareResultIter = (UCollationResult)result;
    166 
    167     {
    168       UCharIterator sIter, tIter;
    169       uiter_setString(&sIter, src, sLen);
    170       uiter_setString(&tIter, trg, tLen);
    171       compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
    172       if(compareResultIter != (UCollationResult)result) {
    173         errln("Different result for iterative comparison "+source+" "+target);
    174       }
    175     }
    176     /* convert the strings to UTF-8 and do try comparing with char iterator */
    177     if(!quick) { /*!QUICK*/
    178       char utf8Source[256], utf8Target[256];
    179       int32_t utf8SourceLen = 0, utf8TargetLen = 0;
    180       u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status);
    181       if(U_FAILURE(status)) { /* probably buffer is not big enough */
    182         log("Src UTF-8 buffer too small! Will not compare!\n");
    183       } else {
    184         u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status);
    185         if(U_SUCCESS(status)) { /* probably buffer is not big enough */
    186           UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result;
    187           UCharIterator sIter, tIter;
    188           /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
    189           uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
    190           uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
    191        /*uiter_setString(&sIter, source, sLen);
    192       uiter_setString(&tIter, target, tLen);*/
    193           compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
    194           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    195           sIter.move(&sIter, 0, UITER_START);
    196           tIter.move(&tIter, 0, UITER_START);
    197           compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
    198           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
    199           if(compareResultUTF8 != compareResultIter) {
    200             errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target);
    201           }
    202           if(compareResultUTF8 != compareResultUTF8Norm) {
    203             errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target);
    204           }
    205         } else {
    206           log("Target UTF-8 buffer too small! Did not compare!\n");
    207         }
    208         if(U_FAILURE(status)) {
    209           log("UTF-8 strcoll failed! Ignoring result\n");
    210         }
    211       }
    212     }
    213 
    214     /* testing the partial sortkeys */
    215     { /*!QUICK*/
    216       int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
    217       int32_t partialSizesSize = 1;
    218       if(!quick) {
    219         partialSizesSize = 7;
    220       }
    221       int32_t i = 0;
    222       log("partial sortkey test piecesize=");
    223       for(i = 0; i < partialSizesSize; i++) {
    224         UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result;
    225         log("%i ", partialSizes[i]);
    226 
    227         partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
    228         if(partialSKResult != (UCollationResult)result) {
    229           errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")");
    230         }
    231 
    232         if(norm != UCOL_ON && !quick) {
    233           log("N ");
    234           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    235           partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
    236           ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
    237           if(partialSKResult != partialNormalizedSKResult) {
    238             errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")");
    239           }
    240         }
    241       }
    242       log("\n");
    243     }
    244 /*
    245   if (compareResult != result) {
    246     errln("String comparison failed in variant test\n");
    247   }
    248   if (keyResult != result) {
    249     errln("Collation key comparison failed in variant test\n");
    250   }
    251 */
    252 }
    253 
    254 void
    255 IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) {
    256   doTest(col, UnicodeString(source), UnicodeString(target), result);
    257 }
    258 
    259 void
    260 IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
    261 {
    262   if(col) {
    263     doTestVariant(col, source, target, result);
    264     if(result == Collator::LESS) {
    265       doTestVariant(col, target, source, Collator::GREATER);
    266     } else if (result == Collator::GREATER) {
    267       doTestVariant(col, target, source, Collator::LESS);
    268     }
    269 
    270     UErrorCode status = U_ZERO_ERROR;
    271     LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source));
    272     logln("Testing iterating source: "+source);
    273     backAndForth(*c);
    274     c->setText(target, status);
    275     logln("Testing iterating target: "+target);
    276     backAndForth(*c);
    277   }
    278 }
    279 
    280 
    281 // used for collation result reporting, defined here for convenience
    282 // (maybe moved later)
    283 void
    284 IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target,
    285              CollationKey &sourceKey, CollationKey &targetKey,
    286              Collator::EComparisonResult compareResult,
    287              Collator::EComparisonResult keyResult,
    288                                 Collator::EComparisonResult incResult,
    289                          Collator::EComparisonResult expectedResult )
    290 {
    291     if (expectedResult < -1 || expectedResult > 1)
    292     {
    293         errln("***** invalid call to reportCResult ****");
    294         return;
    295     }
    296 
    297     UBool ok1 = (compareResult == expectedResult);
    298     UBool ok2 = (keyResult == expectedResult);
    299     UBool ok3 = (incResult == expectedResult);
    300 
    301 
    302     if (ok1 && ok2 && ok3 && !verbose) {
    303         // Keep non-verbose, passing tests fast
    304         return;
    305     } else {
    306         UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare(");
    307         UnicodeString msg2(", "), msg3(") returned "), msg4("; expected ");
    308         UnicodeString prettySource, prettyTarget, sExpect, sResult;
    309 
    310         IntlTest::prettify(source, prettySource);
    311         IntlTest::prettify(target, prettyTarget);
    312         appendCompareResult(compareResult, sResult);
    313         appendCompareResult(expectedResult, sExpect);
    314 
    315         if (ok1) {
    316             logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
    317         } else {
    318             errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
    319         }
    320 
    321         msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key(");
    322         msg2 = ").compareTo(key(";
    323         msg3 = ")) returned ";
    324 
    325         appendCompareResult(keyResult, sResult);
    326 
    327         if (ok2) {
    328             logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
    329         } else {
    330             errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
    331 
    332             msg1 = "  ";
    333             msg2 = " vs. ";
    334 
    335             prettify(sourceKey, prettySource);
    336             prettify(targetKey, prettyTarget);
    337 
    338             errln(msg1 + prettySource + msg2 + prettyTarget);
    339         }
    340         msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare(");
    341         msg2 = ", ";
    342         msg3 = ") returned ";
    343 
    344         appendCompareResult(incResult, sResult);
    345 
    346         if (ok3) {
    347             logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
    348         } else {
    349             errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
    350         }
    351     }
    352 }
    353 
    354 UnicodeString&
    355 IntlTestCollator::appendCompareResult(Collator::EComparisonResult result,
    356                   UnicodeString& target)
    357 {
    358     if (result == Collator::LESS)
    359     {
    360         target += "LESS";
    361     }
    362     else if (result == Collator::EQUAL)
    363     {
    364         target += "EQUAL";
    365     }
    366     else if (result == Collator::GREATER)
    367     {
    368         target += "GREATER";
    369     }
    370     else
    371     {
    372         UnicodeString huh = "?";
    373 
    374         target += (huh + (int32_t)result);
    375     }
    376 
    377     return target;
    378 }
    379 
    380 // Produce a printable representation of a CollationKey
    381 UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target)
    382 {
    383     int32_t i, byteCount;
    384     const uint8_t *bytes = source.getByteArray(byteCount);
    385 
    386     target.remove();
    387     target += "[";
    388 
    389     for (i = 0; i < byteCount; i += 1)
    390     {
    391         if (i != 0) {
    392             target += " ";
    393         }
    394         appendHex(bytes[i], 2, target);
    395     }
    396 
    397     target += "]";
    398 
    399     return target;
    400 }
    401 
    402 void IntlTestCollator::backAndForth(CollationElementIterator &iter)
    403 {
    404     // Run through the iterator forwards and stick it into an array
    405     int32_t orderLength = 0;
    406     LocalArray<Order> orders(getOrders(iter, orderLength));
    407     UErrorCode status = U_ZERO_ERROR;
    408 
    409     // Now go through it backwards and make sure we get the same values
    410     int32_t index = orderLength;
    411     int32_t o;
    412 
    413     // reset the iterator
    414     iter.reset();
    415 
    416     while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
    417     {
    418         /*int32_t offset = */iter.getOffset();
    419 
    420         if (index == 0) {
    421           if(o == 0) {
    422             continue;
    423           } else { // this is an error, orders exhausted but there are non-ignorable CEs from
    424             // going backwards
    425             errln("Backward iteration returned a non ignorable after orders are exhausted");
    426             break;
    427           }
    428         }
    429 
    430         index -= 1;
    431         if (o != orders[index].order) {
    432             if (o == 0)
    433                 index += 1;
    434             else {
    435                 while (index > 0 && orders[--index].order == 0) {
    436                   // nothing...
    437                 }
    438 
    439                 if (o != orders[index].order) {
    440                     errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index,
    441                     orders[index].order, o);
    442                 //break;
    443                   return;
    444                 }
    445             }
    446         }
    447 
    448 #if TEST_OFFSETS
    449         if (offset != orders[index].offset) {
    450           errln("Mismatched offset at index %d: %d vs. %d", index,
    451             orders[index].offset, offset);
    452        //break;
    453          return;
    454         }
    455 #endif
    456 
    457     }
    458 
    459     while (index != 0 && orders[index - 1].order == 0)
    460     {
    461       index --;
    462     }
    463 
    464     if (index != 0)
    465     {
    466         UnicodeString msg("Didn't get back to beginning - index is ");
    467         errln(msg + index);
    468 
    469         iter.reset();
    470         err("next: ");
    471         while ((o = iter.next(status)) != CollationElementIterator::NULLORDER)
    472         {
    473             UnicodeString hexString("0x");
    474 
    475             appendHex(o, 8, hexString);
    476             hexString += " ";
    477             err(hexString);
    478         }
    479         errln("");
    480 
    481         err("prev: ");
    482         while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
    483         {
    484             UnicodeString hexString("0x");
    485 
    486             appendHex(o, 8, hexString);
    487             hexString += " ";
    488              err(hexString);
    489         }
    490         errln("");
    491     }
    492 }
    493 
    494 
    495 /**
    496  * Return an integer array containing all of the collation orders
    497  * returned by calls to next on the specified iterator
    498  */
    499 IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength)
    500 {
    501     int32_t maxSize = 100;
    502     int32_t size = 0;
    503     LocalArray<Order> orders(new Order[maxSize]);
    504     UErrorCode status = U_ZERO_ERROR;
    505     int32_t offset = iter.getOffset();
    506 
    507     int32_t order;
    508     while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
    509     {
    510         if (size == maxSize)
    511         {
    512             maxSize *= 2;
    513             Order *temp = new Order[maxSize];
    514 
    515             uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
    516             orders.adoptInstead(temp);
    517         }
    518 
    519         orders[size].order  = order;
    520         orders[size].offset = offset;
    521 
    522         offset = iter.getOffset();
    523         size += 1;
    524     }
    525 
    526     if (maxSize > size)
    527     {
    528         Order *temp = new Order[size];
    529 
    530         uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
    531         orders.adoptInstead(temp);
    532     }
    533 
    534     orderLength = size;
    535     return orders.orphan();
    536 }
    537 
    538 #endif /* #if !UCONFIG_NO_COLLATION */
    539