Home | History | Annotate | Download | only in intltest
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 2002-2010, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************
      6  *
      7  * @author Mark E. Davis
      8  * @author Vladimir Weinstein
      9  */
     10 
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_NORMALIZATION
     14 
     15 #include "intltest.h"
     16 #include "cstring.h"
     17 #include "canittst.h"
     18 #include "unicode/caniter.h"
     19 #include "unicode/normlzr.h"
     20 #include "unicode/uchar.h"
     21 #include "hash.h"
     22 
     23 #define ARRAY_LENGTH(array) ((int32_t)(sizeof (array) / sizeof (*array)))
     24 
     25 #define CASE(id,test) case id:                          \
     26                           name = #test;                 \
     27                           if (exec) {                   \
     28                               logln(#test "---");       \
     29                               logln((UnicodeString)""); \
     30                               test();                   \
     31                           }                             \
     32                           break
     33 
     34 void CanonicalIteratorTest::runIndexedTest(int32_t index, UBool exec,
     35                                          const char* &name, char* /*par*/) {
     36     switch (index) {
     37         CASE(0, TestBasic);
     38         CASE(1, TestExhaustive);
     39         CASE(2, TestAPI);
     40       default: name = ""; break;
     41     }
     42 }
     43 
     44 /**
     45  * Convert Java-style strings with \u Unicode escapes into UnicodeString objects
     46 static UnicodeString str(const char *input)
     47 {
     48     UnicodeString str(input, ""); // Invariant conversion
     49     return str.unescape();
     50 }
     51  */
     52 
     53 
     54 CanonicalIteratorTest::CanonicalIteratorTest() :
     55 nameTrans(NULL), hexTrans(NULL)
     56 {
     57 }
     58 
     59 CanonicalIteratorTest::~CanonicalIteratorTest()
     60 {
     61 #if !UCONFIG_NO_TRANSLITERATION
     62   if(nameTrans != NULL) {
     63     delete(nameTrans);
     64   }
     65   if(hexTrans != NULL) {
     66     delete(hexTrans);
     67   }
     68 #endif
     69 }
     70 
     71 void CanonicalIteratorTest::TestExhaustive() {
     72     UErrorCode status = U_ZERO_ERROR;
     73     CanonicalIterator it("", status);
     74     if (U_FAILURE(status)) {
     75         dataerrln("Error creating CanonicalIterator: %s", u_errorName(status));
     76         return;
     77     }
     78     UChar32 i = 0;
     79     UnicodeString s;
     80     // Test static and dynamic class IDs
     81     if(it.getDynamicClassID() != CanonicalIterator::getStaticClassID()){
     82         errln("CanonicalIterator::getStaticClassId ! = CanonicalIterator.getDynamicClassID");
     83     }
     84     for (i = 0; i < 0x10FFFF; quick?i+=0x10:++i) {
     85         //for (i = 0xae00; i < 0xaf00; ++i) {
     86 
     87         if ((i % 0x100) == 0) {
     88             logln("Testing U+%06X", i);
     89         }
     90 
     91         // skip characters we know don't have decomps
     92         int8_t type = u_charType(i);
     93         if (type == U_UNASSIGNED || type == U_PRIVATE_USE_CHAR
     94             || type == U_SURROGATE) continue;
     95 
     96         s = i;
     97         characterTest(s, i, it);
     98 
     99         s += (UChar32)0x0345; //"\\u0345";
    100         characterTest(s, i, it);
    101     }
    102 }
    103 
    104 void CanonicalIteratorTest::TestBasic() {
    105 
    106     UErrorCode status = U_ZERO_ERROR;
    107 
    108     static const char * const testArray[][2] = {
    109         {"\\u00C5d\\u0307\\u0327", "A\\u030Ad\\u0307\\u0327, A\\u030Ad\\u0327\\u0307, A\\u030A\\u1E0B\\u0327, "
    110             "A\\u030A\\u1E11\\u0307, \\u00C5d\\u0307\\u0327, \\u00C5d\\u0327\\u0307, "
    111             "\\u00C5\\u1E0B\\u0327, \\u00C5\\u1E11\\u0307, \\u212Bd\\u0307\\u0327, "
    112             "\\u212Bd\\u0327\\u0307, \\u212B\\u1E0B\\u0327, \\u212B\\u1E11\\u0307"},
    113         {"\\u010d\\u017E", "c\\u030Cz\\u030C, c\\u030C\\u017E, \\u010Dz\\u030C, \\u010D\\u017E"},
    114         {"x\\u0307\\u0327", "x\\u0307\\u0327, x\\u0327\\u0307, \\u1E8B\\u0327"},
    115     };
    116 
    117 #if 0
    118     // This is not interesting for C/C++ as the data is already built beforehand
    119     // check build
    120     UnicodeSet ss = CanonicalIterator.getSafeStart();
    121     logln("Safe Start: " + ss.toPattern(true));
    122     ss = CanonicalIterator.getStarts('a');
    123     expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'),
    124         new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB"
    125         + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]")
    126             );
    127 #endif
    128 
    129     // check permute
    130     // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted!
    131 
    132     Hashtable *permutations = new Hashtable(FALSE, status);
    133     permutations->setValueDeleter(uhash_deleteUnicodeString);
    134     UnicodeString toPermute("ABC");
    135 
    136     CanonicalIterator::permute(toPermute, FALSE, permutations, status);
    137 
    138     logln("testing permutation");
    139 
    140     expectEqual("Simple permutation ", "", collectionToString(permutations), "ABC, ACB, BAC, BCA, CAB, CBA");
    141 
    142     delete permutations;
    143 
    144     // try samples
    145     logln("testing samples");
    146     Hashtable *set = new Hashtable(FALSE, status);
    147     set->setValueDeleter(uhash_deleteUnicodeString);
    148     int32_t i = 0;
    149     CanonicalIterator it("", status);
    150     if(U_SUCCESS(status)) {
    151       for (i = 0; i < ARRAY_LENGTH(testArray); ++i) {
    152           //logln("Results for: " + name.transliterate(testArray[i]));
    153           UnicodeString testStr = CharsToUnicodeString(testArray[i][0]);
    154           it.setSource(testStr, status);
    155           set->removeAll();
    156           for (;;) {
    157               //UnicodeString *result = new UnicodeString(it.next());
    158               UnicodeString result(it.next());
    159               if (result.isBogus()) {
    160                   break;
    161               }
    162               set->put(result, new UnicodeString(result), status); // Add result to the table
    163               //logln(++counter + ": " + hex.transliterate(result));
    164               //logln(" = " + name.transliterate(result));
    165           }
    166           expectEqual(i + ": ", testStr, collectionToString(set), CharsToUnicodeString(testArray[i][1]));
    167 
    168       }
    169     } else {
    170       dataerrln("Couldn't instantiate canonical iterator. Error: %s", u_errorName(status));
    171     }
    172     delete set;
    173 }
    174 
    175 void CanonicalIteratorTest::characterTest(UnicodeString &s, UChar32 ch, CanonicalIterator &it)
    176 {
    177     UErrorCode status = U_ZERO_ERROR;
    178     UnicodeString decomp, comp;
    179     UBool gotDecomp = FALSE;
    180     UBool gotComp = FALSE;
    181     UBool gotSource = FALSE;
    182 
    183     Normalizer::decompose(s, FALSE, 0, decomp, status);
    184     Normalizer::compose(s, FALSE, 0, comp, status);
    185 
    186     // skip characters that don't have either decomp.
    187     // need quick test for this!
    188     if (s == decomp && s == comp) {
    189         return;
    190     }
    191 
    192     it.setSource(s, status);
    193 
    194     for (;;) {
    195         UnicodeString item = it.next();
    196         if (item.isBogus()) break;
    197         if (item == s) gotSource = TRUE;
    198         if (item == decomp) gotDecomp = TRUE;
    199         if (item == comp) gotComp = TRUE;
    200     }
    201 
    202     if (!gotSource || !gotDecomp || !gotComp) {
    203         errln("FAIL CanonicalIterator: " + s + (int)ch);
    204     }
    205 }
    206 
    207 void CanonicalIteratorTest::expectEqual(const UnicodeString &message, const UnicodeString &item, const UnicodeString &a, const UnicodeString &b) {
    208     if (!(a==b)) {
    209         errln("FAIL: " + message + getReadable(item));
    210         errln("\t" + getReadable(a));
    211         errln("\t" + getReadable(b));
    212     } else {
    213         logln("Checked: " + message + getReadable(item));
    214         logln("\t" + getReadable(a));
    215         logln("\t" + getReadable(b));
    216     }
    217 }
    218 
    219 UnicodeString CanonicalIteratorTest::getReadable(const UnicodeString &s) {
    220   UErrorCode status = U_ZERO_ERROR;
    221   UnicodeString result = "[";
    222     if (s.length() == 0) return "";
    223     // set up for readable display
    224 #if !UCONFIG_NO_TRANSLITERATION
    225     if(verbose) {
    226       if (nameTrans == NULL)
    227           nameTrans = Transliterator::createInstance("[^\\ -\\u007F] name", UTRANS_FORWARD, status);
    228       UnicodeString sName = s;
    229       nameTrans->transliterate(sName);
    230       result += sName;
    231       result += ";";
    232     }
    233     if (hexTrans == NULL)
    234         hexTrans = Transliterator::createInstance("[^\\ -\\u007F] hex", UTRANS_FORWARD, status);
    235 #endif
    236     UnicodeString sHex = s;
    237 #if !UCONFIG_NO_TRANSLITERATION
    238     if(hexTrans) { // maybe there is no data and transliterator cannot be instantiated
    239       hexTrans->transliterate(sHex);
    240     }
    241 #endif
    242     result += sHex;
    243     result += "]";
    244     return result;
    245     //return "[" + (verbose ? name->transliterate(s) + "; " : "") + hex->transliterate(s) + "]";
    246 }
    247 
    248 U_CFUNC int U_CALLCONV
    249 compareUnicodeStrings(const void *s1, const void *s2) {
    250   UnicodeString **st1 = (UnicodeString **)s1;
    251   UnicodeString **st2 = (UnicodeString **)s2;
    252 
    253   return (*st1)->compare(**st2);
    254 }
    255 
    256 
    257 UnicodeString CanonicalIteratorTest::collectionToString(Hashtable *col) {
    258     UnicodeString result;
    259 
    260     // Iterate over the Hashtable, then qsort.
    261 
    262     UnicodeString **resArray = new UnicodeString*[col->count()];
    263     int32_t i = 0;
    264 
    265     const UHashElement *ne = NULL;
    266     int32_t el = -1;
    267     //Iterator it = basic.iterator();
    268     ne = col->nextElement(el);
    269     //while (it.hasNext())
    270     while (ne != NULL) {
    271       //String item = (String) it.next();
    272       UnicodeString *item = (UnicodeString *)(ne->value.pointer);
    273       resArray[i++] = item;
    274       ne = col->nextElement(el);
    275     }
    276 
    277     for(i = 0; i<col->count(); ++i) {
    278       logln(*resArray[i]);
    279     }
    280 
    281     qsort(resArray, col->count(), sizeof(UnicodeString *), compareUnicodeStrings);
    282 
    283     result = *resArray[0];
    284 
    285     for(i = 1; i<col->count(); ++i) {
    286       result += ", ";
    287       result += *resArray[i];
    288     }
    289 
    290 /*
    291     Iterator it = col.iterator();
    292     while (it.hasNext()) {
    293         if (result.length() != 0) result.append(", ");
    294         result.append(it.next().toString());
    295     }
    296 */
    297 
    298     delete [] resArray;
    299 
    300     return result;
    301 }
    302 
    303 void CanonicalIteratorTest::TestAPI() {
    304   UErrorCode status = U_ZERO_ERROR;
    305   // Test reset and getSource
    306   UnicodeString start("ljubav");
    307   logln("Testing CanonicalIterator::getSource");
    308   logln("Instantiating canonical iterator with string "+start);
    309   CanonicalIterator can(start, status);
    310   if (U_FAILURE(status)) {
    311       dataerrln("Error creating CanonicalIterator: %s", u_errorName(status));
    312       return;
    313   }
    314   UnicodeString source = can.getSource();
    315   logln("CanonicalIterator::getSource returned "+source);
    316   if(start != source) {
    317     errln("CanonicalIterator.getSource() didn't return the starting string. Expected "+start+", got "+source);
    318   }
    319   logln("Testing CanonicalIterator::reset");
    320   UnicodeString next = can.next();
    321   logln("CanonicalIterator::next returned "+next);
    322 
    323   can.reset();
    324 
    325   UnicodeString afterReset = can.next();
    326   logln("After reset, CanonicalIterator::next returned "+afterReset);
    327 
    328   if(next != afterReset) {
    329     errln("Next after instantiation ("+next+") is different from next after reset ("+afterReset+").");
    330   }
    331 
    332   logln("Testing getStaticClassID and getDynamicClassID");
    333   if(can.getDynamicClassID() != CanonicalIterator::getStaticClassID()){
    334       errln("RTTI failed for CanonicalIterator getDynamicClassID != getStaticClassID");
    335   }
    336 }
    337 
    338 #endif /* #if !UCONFIG_NO_NORMALIZATION */
    339