Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * Copyright (c) 1997-2009, International Business Machines
      3  * Corporation and others. All Rights Reserved.
      4  ********************************************************************
      5  *
      6  * File UCNVSELTST.C
      7  *
      8  * Modification History:
      9  *        Name                     Description
     10  *     MOHAMED ELDAWY               Creation
     11  ********************************************************************
     12  */
     13 
     14 /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/
     15 
     16 #include "ucnvseltst.h"
     17 
     18 #include <stdio.h>
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/ucnvsel.h"
     22 #include "unicode/ustring.h"
     23 #include "cmemory.h"
     24 #include "cstring.h"
     25 
     26 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     27 
     28 #define FILENAME_BUFFER 1024
     29 
     30 #define TDSRCPATH  ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING
     31 
     32 static void TestSelector(void);
     33 
     34 void addCnvSelTest(TestNode** root)
     35 {
     36     addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector");
     37 }
     38 
     39 static const char **gAvailableNames = NULL;
     40 static int32_t gCountAvailable = 0;
     41 
     42 static UBool
     43 getAvailableNames() {
     44   int32_t i;
     45   if (gAvailableNames != NULL) {
     46     return TRUE;
     47   }
     48   gCountAvailable = ucnv_countAvailable();
     49   if (gCountAvailable == 0) {
     50     log_data_err("No converters available.\n");
     51     return FALSE;
     52   }
     53   gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *));
     54   if (gAvailableNames == NULL) {
     55     log_err("unable to allocate memory for %ld available converter names\n",
     56             (long)gCountAvailable);
     57     return FALSE;
     58   }
     59   for (i = 0; i < gCountAvailable; ++i) {
     60     gAvailableNames[i] = ucnv_getAvailableName(i);
     61   }
     62   return TRUE;
     63 }
     64 
     65 static void
     66 releaseAvailableNames() {
     67   uprv_free((void *)gAvailableNames);
     68   gAvailableNames = NULL;
     69   gCountAvailable = 0;
     70 }
     71 
     72 static const char **
     73 getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) {
     74   const char **names;
     75   int32_t i;
     76 
     77   *pCount = 0;
     78   if (count <= 0) {
     79     return NULL;
     80   }
     81   names = (const char **)uprv_malloc(count * sizeof(char *));
     82   if (names == NULL) {
     83     log_err("memory allocation error for %ld pointers\n", (long)count);
     84     return NULL;
     85   }
     86   if (step == 0 && count > 0) {
     87     step = 1;
     88   }
     89   for (i = 0; i < count; ++i) {
     90     if (0 <= start && start < gCountAvailable) {
     91       names[i] = gAvailableNames[start];
     92       start += step;
     93       ++*pCount;
     94     }
     95   }
     96   return names;
     97 }
     98 
     99 #if 0
    100 /*
    101  * ucnvsel_open() does not support "no encodings":
    102  * Given 0 encodings it will open a selector for all available ones.
    103  */
    104 static const char **
    105 getNoEncodings(int32_t *pCount) {
    106   *pCount = 0;
    107   return NULL;
    108 }
    109 #endif
    110 
    111 static const char **
    112 getOneEncoding(int32_t *pCount) {
    113   return getEncodings(1, 0, 1, pCount);
    114 }
    115 
    116 static const char **
    117 getFirstEvenEncodings(int32_t *pCount) {
    118   return getEncodings(0, 2, 25, pCount);
    119 }
    120 
    121 static const char **
    122 getMiddleEncodings(int32_t *pCount) {
    123   return getEncodings(gCountAvailable - 12, 1, 22, pCount);
    124 }
    125 
    126 static const char **
    127 getLastEncodings(int32_t *pCount) {
    128   return getEncodings(gCountAvailable - 1, -1, 25, pCount);
    129 }
    130 
    131 static const char **
    132 getSomeEncodings(int32_t *pCount) {
    133   /* 20 evenly distributed */
    134   return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount);
    135 }
    136 
    137 static const char **
    138 getEveryThirdEncoding(int32_t *pCount) {
    139   return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount);
    140 }
    141 
    142 static const char **
    143 getAllEncodings(int32_t *pCount) {
    144   return getEncodings(0, 1, gCountAvailable, pCount);
    145 }
    146 
    147 typedef const char **GetEncodingsFn(int32_t *);
    148 
    149 static GetEncodingsFn *const getEncodingsFns[] = {
    150   getOneEncoding,
    151   getFirstEvenEncodings,
    152   getMiddleEncodings,
    153   getLastEncodings,
    154   getSomeEncodings,
    155   getEveryThirdEncoding,
    156   getAllEncodings
    157 };
    158 
    159 static FILE *fopenOrError(const char *filename) {
    160     int32_t needLen;
    161     FILE *f;
    162     char fnbuf[FILENAME_BUFFER];
    163     const char* directory= ctest_dataSrcDir();
    164     needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename)+1;
    165     if(needLen > FILENAME_BUFFER) {
    166         log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n",
    167                 filename, needLen, FILENAME_BUFFER);
    168         return NULL;
    169     }
    170 
    171     strcpy(fnbuf, directory);
    172     strcat(fnbuf, TDSRCPATH);
    173     strcat(fnbuf, filename);
    174 
    175     f = fopen(fnbuf, "rb");
    176 
    177     if(f == NULL) {
    178         log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename);
    179     }
    180     return f;
    181 }
    182 
    183 typedef struct TestText {
    184   char *text, *textLimit;
    185   char *limit;
    186   int32_t number;
    187 } TestText;
    188 
    189 static void
    190 text_reset(TestText *tt) {
    191   tt->limit = tt->text;
    192   tt->number = 0;
    193 }
    194 
    195 static char *
    196 text_nextString(TestText *tt, int32_t *pLength) {
    197   char *s = tt->limit;
    198   if (s == tt->textLimit) {
    199     /* we already delivered the last string */
    200     return NULL;
    201   } else if (s == tt->text) {
    202     /* first string */
    203     if ((tt->textLimit - tt->text) >= 3 &&
    204         s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf
    205     ) {
    206       s += 3;  /* skip the UTF-8 signature byte sequence (U+FEFF) */
    207     }
    208   } else {
    209     /* skip the string terminator */
    210     ++s;
    211     ++tt->number;
    212   }
    213 
    214   /* find the end of this string */
    215   tt->limit = uprv_strchr(s, 0);
    216   *pLength = (int32_t)(tt->limit - s);
    217   return s;
    218 }
    219 
    220 static UBool
    221 text_open(TestText *tt) {
    222   FILE *f;
    223   char *s;
    224   int32_t length;
    225   uprv_memset(tt, 0, sizeof(TestText));
    226   f = fopenOrError("ConverterSelectorTestUTF8.txt");
    227   if(!f) {
    228     return FALSE;
    229   }
    230   fseek(f, 0, SEEK_END);
    231   length = (int32_t)ftell(f);
    232   fseek(f, 0, SEEK_SET);
    233   tt->text = (char *)uprv_malloc(length + 1);
    234   if (tt->text == NULL) {
    235     fclose(f);
    236     return FALSE;
    237   }
    238   if (length != fread(tt->text, 1, length, f)) {
    239     log_err("error reading %ld bytes from test text file\n", (long)length);
    240     length = 0;
    241     uprv_free(tt->text);
    242   }
    243   fclose(f);
    244   tt->textLimit = tt->text + length;
    245   *tt->textLimit = 0;
    246   /* replace all Unicode '#' (U+0023) with NUL */
    247   for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {}
    248   text_reset(tt);
    249   return TRUE;
    250 }
    251 
    252 static void
    253 text_close(TestText *tt) {
    254   uprv_free(tt->text);
    255 }
    256 
    257 static int32_t findIndex(const char* converterName) {
    258   int32_t i;
    259   for (i = 0 ; i < gCountAvailable; i++) {
    260     if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) {
    261       return i;
    262     }
    263   }
    264   return -1;
    265 }
    266 
    267 static UBool *
    268 getResultsManually(const char** encodings, int32_t num_encodings,
    269                    const char *utf8, int32_t length,
    270                    const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) {
    271   UBool* resultsManually;
    272   int32_t i;
    273 
    274   resultsManually = (UBool*) uprv_malloc(gCountAvailable);
    275   uprv_memset(resultsManually, 0, gCountAvailable);
    276 
    277   for(i = 0 ; i < num_encodings ; i++) {
    278     UErrorCode status = U_ZERO_ERROR;
    279     /* get unicode set for that converter */
    280     USet* set;
    281     UConverter* test_converter;
    282     UChar32 cp;
    283     int32_t encIndex, offset;
    284 
    285     set = uset_openEmpty();
    286     test_converter = ucnv_open(encodings[i], &status);
    287     ucnv_getUnicodeSet(test_converter, set,
    288                        whichSet, &status);
    289     if (excludedCodePoints != NULL) {
    290       uset_addAll(set, excludedCodePoints);
    291     }
    292     uset_freeze(set);
    293     offset = 0;
    294     cp = 0;
    295 
    296     encIndex = findIndex(encodings[i]);
    297     /*
    298      * The following is almost, but not entirely, the same as
    299      * resultsManually[encIndex] =
    300      *   (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
    301      * They might be different if the set contains strings,
    302      * or if the utf8 string contains an illegal sequence.
    303      *
    304      * The UConverterSelector does not currently handle strings that can be
    305      * converted, and it treats an illegal sequence as convertible
    306      * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
    307      */
    308     resultsManually[encIndex] = TRUE;
    309     while(offset<length) {
    310       U8_NEXT(utf8, offset, length, cp);
    311       if (cp >= 0 && !uset_contains(set, cp)) {
    312         resultsManually[encIndex] = FALSE;
    313         break;
    314       }
    315     }
    316     uset_close(set);
    317     ucnv_close(test_converter);
    318   }
    319   return resultsManually;
    320 }
    321 
    322 /* closes res but does not free resultsManually */
    323 static void verifyResult(UEnumeration* res, const UBool *resultsManually) {
    324   UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool));
    325   const char* name;
    326   UErrorCode status = U_ZERO_ERROR;
    327   int32_t i;
    328 
    329   /* fill the bool for the selector results! */
    330   uprv_memset(resultsFromSystem, 0, gCountAvailable);
    331   while ((name = uenum_next(res,NULL, &status)) != NULL) {
    332     resultsFromSystem[findIndex(name)] = TRUE;
    333   }
    334   for(i = 0 ; i < gCountAvailable; i++) {
    335     if(resultsManually[i] != resultsFromSystem[i]) {
    336       log_err("failure in converter selector\n"
    337               "converter %s had conflicting results -- manual: %d, system %d\n",
    338               gAvailableNames[i], resultsManually[i], resultsFromSystem[i]);
    339     }
    340   }
    341   uprv_free(resultsFromSystem);
    342   uenum_close(res);
    343 }
    344 
    345 static UConverterSelector *
    346 serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) {
    347   char *new_buffer;
    348   int32_t ser_len, ser_len2;
    349   /* preflight */
    350   ser_len = ucnvsel_serialize(sel, NULL, 0, status);
    351   if (*status != U_BUFFER_OVERFLOW_ERROR) {
    352     log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status));
    353     return sel;
    354   }
    355   new_buffer = (char *)uprv_malloc(ser_len);
    356   *status = U_ZERO_ERROR;
    357   ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status);
    358   if (U_FAILURE(*status) || ser_len != ser_len2) {
    359     log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status));
    360     uprv_free(new_buffer);
    361     return sel;
    362   }
    363   ucnvsel_close(sel);
    364   uprv_free(*buffer);
    365   *buffer = new_buffer;
    366   sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status);
    367   if (U_FAILURE(*status)) {
    368     log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status));
    369     return NULL;
    370   }
    371   return sel;
    372 }
    373 
    374 static void TestSelector()
    375 {
    376   TestText text;
    377   USet* excluded_sets[3] = { NULL };
    378   int32_t i, testCaseIdx;
    379 
    380   if (!getAvailableNames()) {
    381     return;
    382   }
    383   if (!text_open(&text)) {
    384     releaseAvailableNames();;
    385   }
    386 
    387   excluded_sets[0] = uset_openEmpty();
    388   for(i = 1 ; i < 3 ; i++) {
    389     excluded_sets[i] = uset_open(i*30, i*30+500);
    390   }
    391 
    392   for(testCaseIdx = 0; testCaseIdx < LENGTHOF(getEncodingsFns); testCaseIdx++)
    393   {
    394     int32_t excluded_set_id;
    395     int32_t num_encodings;
    396     const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings);
    397     if (QUICK && num_encodings > 25) {
    398       uprv_free((void *)encodings);
    399       continue;
    400     }
    401 
    402     /*
    403      * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++)
    404      *
    405      * This loop was replaced by the following statement because
    406      * the loop made the test run longer without adding to the code coverage.
    407      * The handling of the exclusion set is independent of the
    408      * set of encodings, so there is no need to test every combination.
    409      */
    410     excluded_set_id = testCaseIdx % LENGTHOF(excluded_sets);
    411     {
    412       UConverterSelector *sel_rt, *sel_fb;
    413       char *buffer_fb = NULL;
    414       UErrorCode status = U_ZERO_ERROR;
    415       sel_rt = ucnvsel_open(encodings, num_encodings,
    416                             excluded_sets[excluded_set_id],
    417                             UCNV_ROUNDTRIP_SET, &status);
    418       if (num_encodings == gCountAvailable) {
    419         /* test the special "all converters" parameter values */
    420         sel_fb = ucnvsel_open(NULL, 0,
    421                               excluded_sets[excluded_set_id],
    422                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
    423       } else if (uset_isEmpty(excluded_sets[excluded_set_id])) {
    424         /* test that a NULL set gives the same results as an empty set */
    425         sel_fb = ucnvsel_open(encodings, num_encodings,
    426                               NULL,
    427                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
    428       } else {
    429         sel_fb = ucnvsel_open(encodings, num_encodings,
    430                               excluded_sets[excluded_set_id],
    431                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
    432       }
    433       if (U_FAILURE(status)) {
    434         log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status));
    435         ucnvsel_close(sel_rt);
    436         uprv_free((void *)encodings);
    437         continue;
    438       }
    439 
    440       text_reset(&text);
    441       for (;;) {
    442         UBool *manual_rt, *manual_fb;
    443         static UChar utf16[10000];
    444         char *s;
    445         int32_t length8, length16;
    446 
    447         s = text_nextString(&text, &length8);
    448         if (s == NULL || (QUICK && text.number > 3)) {
    449           break;
    450         }
    451 
    452         manual_rt = getResultsManually(encodings, num_encodings,
    453                                        s, length8,
    454                                        excluded_sets[excluded_set_id],
    455                                        UCNV_ROUNDTRIP_SET);
    456         manual_fb = getResultsManually(encodings, num_encodings,
    457                                        s, length8,
    458                                        excluded_sets[excluded_set_id],
    459                                        UCNV_ROUNDTRIP_AND_FALLBACK_SET);
    460         /* UTF-8 with length */
    461         status = U_ZERO_ERROR;
    462         verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt);
    463         verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb);
    464         /* UTF-8 NUL-terminated */
    465         verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt);
    466         verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb);
    467 
    468         u_strFromUTF8(utf16, LENGTHOF(utf16), &length16, s, length8, &status);
    469         if (U_FAILURE(status)) {
    470           log_err("error converting the test text (string %ld) to UTF-16 - %s\n",
    471                   (long)text.number, u_errorName(status));
    472         } else {
    473           if (text.number == 0) {
    474             sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status);
    475           }
    476           if (U_SUCCESS(status)) {
    477             /* UTF-16 with length */
    478             verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt);
    479             verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb);
    480             /* UTF-16 NUL-terminated */
    481             verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt);
    482             verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb);
    483           }
    484         }
    485 
    486         uprv_free(manual_rt);
    487         uprv_free(manual_fb);
    488       }
    489       ucnvsel_close(sel_rt);
    490       ucnvsel_close(sel_fb);
    491       uprv_free(buffer_fb);
    492     }
    493     uprv_free((void *)encodings);
    494   }
    495 
    496   releaseAvailableNames();
    497   text_close(&text);
    498   for(i = 0 ; i < 3 ; i++) {
    499     uset_close(excluded_sets[i]);
    500   }
    501 }
    502