Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * Copyright (c) 1997-2009, International Business Machines
      3  * Corporation and others. All Rights Reserved.
      4  ********************************************************************
      5  *
      6  * File UCNVSELTST.C
      7  *
      8  * Modification History:
      9  *        Name                     Description
     10  *     MOHAMED ELDAWY               Creation
     11  ********************************************************************
     12  */
     13 
     14 /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/
     15 
     16 #include "ucnvseltst.h"
     17 
     18 #include <stdio.h>
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/ucnvsel.h"
     22 #include "unicode/ustring.h"
     23 #include "cmemory.h"
     24 #include "cstring.h"
     25 
     26 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     27 
     28 #define FILENAME_BUFFER 1024
     29 
     30 #define TDSRCPATH  ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING
     31 
     32 static void TestSelector(void);
     33 void addCnvSelTest(TestNode** root);  /* Declaration required to suppress compiler warnings. */
     34 
     35 void addCnvSelTest(TestNode** root)
     36 {
     37     addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector");
     38 }
     39 
     40 static const char **gAvailableNames = NULL;
     41 static int32_t gCountAvailable = 0;
     42 
     43 static UBool
     44 getAvailableNames() {
     45   int32_t i;
     46   if (gAvailableNames != NULL) {
     47     return TRUE;
     48   }
     49   gCountAvailable = ucnv_countAvailable();
     50   if (gCountAvailable == 0) {
     51     log_data_err("No converters available.\n");
     52     return FALSE;
     53   }
     54   gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *));
     55   if (gAvailableNames == NULL) {
     56     log_err("unable to allocate memory for %ld available converter names\n",
     57             (long)gCountAvailable);
     58     return FALSE;
     59   }
     60   for (i = 0; i < gCountAvailable; ++i) {
     61     gAvailableNames[i] = ucnv_getAvailableName(i);
     62   }
     63   return TRUE;
     64 }
     65 
     66 static void
     67 releaseAvailableNames() {
     68   uprv_free((void *)gAvailableNames);
     69   gAvailableNames = NULL;
     70   gCountAvailable = 0;
     71 }
     72 
     73 static const char **
     74 getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) {
     75   const char **names;
     76   int32_t i;
     77 
     78   *pCount = 0;
     79   if (count <= 0) {
     80     return NULL;
     81   }
     82   names = (const char **)uprv_malloc(count * sizeof(char *));
     83   if (names == NULL) {
     84     log_err("memory allocation error for %ld pointers\n", (long)count);
     85     return NULL;
     86   }
     87   if (step == 0 && count > 0) {
     88     step = 1;
     89   }
     90   for (i = 0; i < count; ++i) {
     91     if (0 <= start && start < gCountAvailable) {
     92       names[i] = gAvailableNames[start];
     93       start += step;
     94       ++*pCount;
     95     }
     96   }
     97   return names;
     98 }
     99 
    100 #if 0
    101 /*
    102  * ucnvsel_open() does not support "no encodings":
    103  * Given 0 encodings it will open a selector for all available ones.
    104  */
    105 static const char **
    106 getNoEncodings(int32_t *pCount) {
    107   *pCount = 0;
    108   return NULL;
    109 }
    110 #endif
    111 
    112 static const char **
    113 getOneEncoding(int32_t *pCount) {
    114   return getEncodings(1, 0, 1, pCount);
    115 }
    116 
    117 static const char **
    118 getFirstEvenEncodings(int32_t *pCount) {
    119   return getEncodings(0, 2, 25, pCount);
    120 }
    121 
    122 static const char **
    123 getMiddleEncodings(int32_t *pCount) {
    124   return getEncodings(gCountAvailable - 12, 1, 22, pCount);
    125 }
    126 
    127 static const char **
    128 getLastEncodings(int32_t *pCount) {
    129   return getEncodings(gCountAvailable - 1, -1, 25, pCount);
    130 }
    131 
    132 static const char **
    133 getSomeEncodings(int32_t *pCount) {
    134   /* 20 evenly distributed */
    135   return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount);
    136 }
    137 
    138 static const char **
    139 getEveryThirdEncoding(int32_t *pCount) {
    140   return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount);
    141 }
    142 
    143 static const char **
    144 getAllEncodings(int32_t *pCount) {
    145   return getEncodings(0, 1, gCountAvailable, pCount);
    146 }
    147 
    148 typedef const char **GetEncodingsFn(int32_t *);
    149 
    150 static GetEncodingsFn *const getEncodingsFns[] = {
    151   getOneEncoding,
    152   getFirstEvenEncodings,
    153   getMiddleEncodings,
    154   getLastEncodings,
    155   getSomeEncodings,
    156   getEveryThirdEncoding,
    157   getAllEncodings
    158 };
    159 
    160 static FILE *fopenOrError(const char *filename) {
    161     int32_t needLen;
    162     FILE *f;
    163     char fnbuf[FILENAME_BUFFER];
    164     const char* directory= ctest_dataSrcDir();
    165     needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename)+1;
    166     if(needLen > FILENAME_BUFFER) {
    167         log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n",
    168                 filename, needLen, FILENAME_BUFFER);
    169         return NULL;
    170     }
    171 
    172     strcpy(fnbuf, directory);
    173     strcat(fnbuf, TDSRCPATH);
    174     strcat(fnbuf, filename);
    175 
    176     f = fopen(fnbuf, "rb");
    177 
    178     if(f == NULL) {
    179         log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename);
    180     }
    181     return f;
    182 }
    183 
    184 typedef struct TestText {
    185   char *text, *textLimit;
    186   char *limit;
    187   int32_t number;
    188 } TestText;
    189 
    190 static void
    191 text_reset(TestText *tt) {
    192   tt->limit = tt->text;
    193   tt->number = 0;
    194 }
    195 
    196 static char *
    197 text_nextString(TestText *tt, int32_t *pLength) {
    198   char *s = tt->limit;
    199   if (s == tt->textLimit) {
    200     /* we already delivered the last string */
    201     return NULL;
    202   } else if (s == tt->text) {
    203     /* first string */
    204     if ((tt->textLimit - tt->text) >= 3 &&
    205         s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf
    206     ) {
    207       s += 3;  /* skip the UTF-8 signature byte sequence (U+FEFF) */
    208     }
    209   } else {
    210     /* skip the string terminator */
    211     ++s;
    212     ++tt->number;
    213   }
    214 
    215   /* find the end of this string */
    216   tt->limit = uprv_strchr(s, 0);
    217   *pLength = (int32_t)(tt->limit - s);
    218   return s;
    219 }
    220 
    221 static UBool
    222 text_open(TestText *tt) {
    223   FILE *f;
    224   char *s;
    225   int32_t length;
    226   uprv_memset(tt, 0, sizeof(TestText));
    227   f = fopenOrError("ConverterSelectorTestUTF8.txt");
    228   if(!f) {
    229     return FALSE;
    230   }
    231   fseek(f, 0, SEEK_END);
    232   length = (int32_t)ftell(f);
    233   fseek(f, 0, SEEK_SET);
    234   tt->text = (char *)uprv_malloc(length + 1);
    235   if (tt->text == NULL) {
    236     fclose(f);
    237     return FALSE;
    238   }
    239   if (length != fread(tt->text, 1, length, f)) {
    240     log_err("error reading %ld bytes from test text file\n", (long)length);
    241     length = 0;
    242     uprv_free(tt->text);
    243   }
    244   fclose(f);
    245   tt->textLimit = tt->text + length;
    246   *tt->textLimit = 0;
    247   /* replace all Unicode '#' (U+0023) with NUL */
    248   for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {}
    249   text_reset(tt);
    250   return TRUE;
    251 }
    252 
    253 static void
    254 text_close(TestText *tt) {
    255   uprv_free(tt->text);
    256 }
    257 
    258 static int32_t findIndex(const char* converterName) {
    259   int32_t i;
    260   for (i = 0 ; i < gCountAvailable; i++) {
    261     if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) {
    262       return i;
    263     }
    264   }
    265   return -1;
    266 }
    267 
    268 static UBool *
    269 getResultsManually(const char** encodings, int32_t num_encodings,
    270                    const char *utf8, int32_t length,
    271                    const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) {
    272   UBool* resultsManually;
    273   int32_t i;
    274 
    275   resultsManually = (UBool*) uprv_malloc(gCountAvailable);
    276   uprv_memset(resultsManually, 0, gCountAvailable);
    277 
    278   for(i = 0 ; i < num_encodings ; i++) {
    279     UErrorCode status = U_ZERO_ERROR;
    280     /* get unicode set for that converter */
    281     USet* set;
    282     UConverter* test_converter;
    283     UChar32 cp;
    284     int32_t encIndex, offset;
    285 
    286     set = uset_openEmpty();
    287     test_converter = ucnv_open(encodings[i], &status);
    288     ucnv_getUnicodeSet(test_converter, set,
    289                        whichSet, &status);
    290     if (excludedCodePoints != NULL) {
    291       uset_addAll(set, excludedCodePoints);
    292     }
    293     uset_freeze(set);
    294     offset = 0;
    295     cp = 0;
    296 
    297     encIndex = findIndex(encodings[i]);
    298     /*
    299      * The following is almost, but not entirely, the same as
    300      * resultsManually[encIndex] =
    301      *   (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
    302      * They might be different if the set contains strings,
    303      * or if the utf8 string contains an illegal sequence.
    304      *
    305      * The UConverterSelector does not currently handle strings that can be
    306      * converted, and it treats an illegal sequence as convertible
    307      * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
    308      */
    309     resultsManually[encIndex] = TRUE;
    310     while(offset<length) {
    311       U8_NEXT(utf8, offset, length, cp);
    312       if (cp >= 0 && !uset_contains(set, cp)) {
    313         resultsManually[encIndex] = FALSE;
    314         break;
    315       }
    316     }
    317     uset_close(set);
    318     ucnv_close(test_converter);
    319   }
    320   return resultsManually;
    321 }
    322 
    323 /* closes res but does not free resultsManually */
    324 static void verifyResult(UEnumeration* res, const UBool *resultsManually) {
    325   UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool));
    326   const char* name;
    327   UErrorCode status = U_ZERO_ERROR;
    328   int32_t i;
    329 
    330   /* fill the bool for the selector results! */
    331   uprv_memset(resultsFromSystem, 0, gCountAvailable);
    332   while ((name = uenum_next(res,NULL, &status)) != NULL) {
    333     resultsFromSystem[findIndex(name)] = TRUE;
    334   }
    335   for(i = 0 ; i < gCountAvailable; i++) {
    336     if(resultsManually[i] != resultsFromSystem[i]) {
    337       log_err("failure in converter selector\n"
    338               "converter %s had conflicting results -- manual: %d, system %d\n",
    339               gAvailableNames[i], resultsManually[i], resultsFromSystem[i]);
    340     }
    341   }
    342   uprv_free(resultsFromSystem);
    343   uenum_close(res);
    344 }
    345 
    346 static UConverterSelector *
    347 serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) {
    348   char *new_buffer;
    349   int32_t ser_len, ser_len2;
    350   /* preflight */
    351   ser_len = ucnvsel_serialize(sel, NULL, 0, status);
    352   if (*status != U_BUFFER_OVERFLOW_ERROR) {
    353     log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status));
    354     return sel;
    355   }
    356   new_buffer = (char *)uprv_malloc(ser_len);
    357   *status = U_ZERO_ERROR;
    358   ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status);
    359   if (U_FAILURE(*status) || ser_len != ser_len2) {
    360     log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status));
    361     uprv_free(new_buffer);
    362     return sel;
    363   }
    364   ucnvsel_close(sel);
    365   uprv_free(*buffer);
    366   *buffer = new_buffer;
    367   sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status);
    368   if (U_FAILURE(*status)) {
    369     log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status));
    370     return NULL;
    371   }
    372   return sel;
    373 }
    374 
    375 static void TestSelector()
    376 {
    377   TestText text;
    378   USet* excluded_sets[3] = { NULL };
    379   int32_t i, testCaseIdx;
    380 
    381   if (!getAvailableNames()) {
    382     return;
    383   }
    384   if (!text_open(&text)) {
    385     releaseAvailableNames();;
    386   }
    387 
    388   excluded_sets[0] = uset_openEmpty();
    389   for(i = 1 ; i < 3 ; i++) {
    390     excluded_sets[i] = uset_open(i*30, i*30+500);
    391   }
    392 
    393   for(testCaseIdx = 0; testCaseIdx < LENGTHOF(getEncodingsFns); testCaseIdx++)
    394   {
    395     int32_t excluded_set_id;
    396     int32_t num_encodings;
    397     const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings);
    398     if (QUICK && num_encodings > 25) {
    399       uprv_free((void *)encodings);
    400       continue;
    401     }
    402 
    403     /*
    404      * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++)
    405      *
    406      * This loop was replaced by the following statement because
    407      * the loop made the test run longer without adding to the code coverage.
    408      * The handling of the exclusion set is independent of the
    409      * set of encodings, so there is no need to test every combination.
    410      */
    411     excluded_set_id = testCaseIdx % LENGTHOF(excluded_sets);
    412     {
    413       UConverterSelector *sel_rt, *sel_fb;
    414       char *buffer_fb = NULL;
    415       UErrorCode status = U_ZERO_ERROR;
    416       sel_rt = ucnvsel_open(encodings, num_encodings,
    417                             excluded_sets[excluded_set_id],
    418                             UCNV_ROUNDTRIP_SET, &status);
    419       if (num_encodings == gCountAvailable) {
    420         /* test the special "all converters" parameter values */
    421         sel_fb = ucnvsel_open(NULL, 0,
    422                               excluded_sets[excluded_set_id],
    423                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
    424       } else if (uset_isEmpty(excluded_sets[excluded_set_id])) {
    425         /* test that a NULL set gives the same results as an empty set */
    426         sel_fb = ucnvsel_open(encodings, num_encodings,
    427                               NULL,
    428                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
    429       } else {
    430         sel_fb = ucnvsel_open(encodings, num_encodings,
    431                               excluded_sets[excluded_set_id],
    432                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
    433       }
    434       if (U_FAILURE(status)) {
    435         log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status));
    436         ucnvsel_close(sel_rt);
    437         uprv_free((void *)encodings);
    438         continue;
    439       }
    440 
    441       text_reset(&text);
    442       for (;;) {
    443         UBool *manual_rt, *manual_fb;
    444         static UChar utf16[10000];
    445         char *s;
    446         int32_t length8, length16;
    447 
    448         s = text_nextString(&text, &length8);
    449         if (s == NULL || (QUICK && text.number > 3)) {
    450           break;
    451         }
    452 
    453         manual_rt = getResultsManually(encodings, num_encodings,
    454                                        s, length8,
    455                                        excluded_sets[excluded_set_id],
    456                                        UCNV_ROUNDTRIP_SET);
    457         manual_fb = getResultsManually(encodings, num_encodings,
    458                                        s, length8,
    459                                        excluded_sets[excluded_set_id],
    460                                        UCNV_ROUNDTRIP_AND_FALLBACK_SET);
    461         /* UTF-8 with length */
    462         status = U_ZERO_ERROR;
    463         verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt);
    464         verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb);
    465         /* UTF-8 NUL-terminated */
    466         verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt);
    467         verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb);
    468 
    469         u_strFromUTF8(utf16, LENGTHOF(utf16), &length16, s, length8, &status);
    470         if (U_FAILURE(status)) {
    471           log_err("error converting the test text (string %ld) to UTF-16 - %s\n",
    472                   (long)text.number, u_errorName(status));
    473         } else {
    474           if (text.number == 0) {
    475             sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status);
    476           }
    477           if (U_SUCCESS(status)) {
    478             /* UTF-16 with length */
    479             verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt);
    480             verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb);
    481             /* UTF-16 NUL-terminated */
    482             verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt);
    483             verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb);
    484           }
    485         }
    486 
    487         uprv_free(manual_rt);
    488         uprv_free(manual_fb);
    489       }
    490       ucnvsel_close(sel_rt);
    491       ucnvsel_close(sel_fb);
    492       uprv_free(buffer_fb);
    493     }
    494     uprv_free((void *)encodings);
    495   }
    496 
    497   releaseAvailableNames();
    498   text_close(&text);
    499   for(i = 0 ; i < 3 ; i++) {
    500     uset_close(excluded_sets[i]);
    501   }
    502 }
    503