Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * Copyright (c) 1997-2010, International Business Machines
      3  * Corporation and others. All Rights Reserved.
      4  ********************************************************************
      5  *
      6  * File UCNVSELTST.C
      7  *
      8  * Modification History:
      9  *        Name                     Description
     10  *     MOHAMED ELDAWY               Creation
     11  ********************************************************************
     12  */
     13 
     14 /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/
     15 
     16 #include "ucnvseltst.h"
     17 
     18 #include <stdio.h>
     19 
     20 #include "unicode/utypes.h"
     21 #include "unicode/ucnvsel.h"
     22 #include "unicode/ustring.h"
     23 #include "cmemory.h"
     24 #include "cstring.h"
     25 #include "propsvec.h"
     26 
     27 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     28 
     29 #define FILENAME_BUFFER 1024
     30 
     31 #define TDSRCPATH  ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING
     32 
     33 static void TestSelector(void);
     34 static void TestUPropsVector(void);
     35 void addCnvSelTest(TestNode** root);  /* Declaration required to suppress compiler warnings. */
     36 
     37 void addCnvSelTest(TestNode** root)
     38 {
     39     addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector");
     40     addTest(root, &TestUPropsVector, "tsconv/ucnvseltst/TestUPropsVector");
     41 }
     42 
     43 static const char **gAvailableNames = NULL;
     44 static int32_t gCountAvailable = 0;
     45 
     46 static UBool
     47 getAvailableNames() {
     48   int32_t i;
     49   if (gAvailableNames != NULL) {
     50     return TRUE;
     51   }
     52   gCountAvailable = ucnv_countAvailable();
     53   if (gCountAvailable == 0) {
     54     log_data_err("No converters available.\n");
     55     return FALSE;
     56   }
     57   gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *));
     58   if (gAvailableNames == NULL) {
     59     log_err("unable to allocate memory for %ld available converter names\n",
     60             (long)gCountAvailable);
     61     return FALSE;
     62   }
     63   for (i = 0; i < gCountAvailable; ++i) {
     64     gAvailableNames[i] = ucnv_getAvailableName(i);
     65   }
     66   return TRUE;
     67 }
     68 
     69 static void
     70 releaseAvailableNames() {
     71   uprv_free((void *)gAvailableNames);
     72   gAvailableNames = NULL;
     73   gCountAvailable = 0;
     74 }
     75 
     76 static const char **
     77 getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) {
     78   const char **names;
     79   int32_t i;
     80 
     81   *pCount = 0;
     82   if (count <= 0) {
     83     return NULL;
     84   }
     85   names = (const char **)uprv_malloc(count * sizeof(char *));
     86   if (names == NULL) {
     87     log_err("memory allocation error for %ld pointers\n", (long)count);
     88     return NULL;
     89   }
     90   if (step == 0 && count > 0) {
     91     step = 1;
     92   }
     93   for (i = 0; i < count; ++i) {
     94     if (0 <= start && start < gCountAvailable) {
     95       names[i] = gAvailableNames[start];
     96       start += step;
     97       ++*pCount;
     98     }
     99   }
    100   return names;
    101 }
    102 
    103 #if 0
    104 /*
    105  * ucnvsel_open() does not support "no encodings":
    106  * Given 0 encodings it will open a selector for all available ones.
    107  */
    108 static const char **
    109 getNoEncodings(int32_t *pCount) {
    110   *pCount = 0;
    111   return NULL;
    112 }
    113 #endif
    114 
    115 static const char **
    116 getOneEncoding(int32_t *pCount) {
    117   return getEncodings(1, 0, 1, pCount);
    118 }
    119 
    120 static const char **
    121 getFirstEvenEncodings(int32_t *pCount) {
    122   return getEncodings(0, 2, 25, pCount);
    123 }
    124 
    125 static const char **
    126 getMiddleEncodings(int32_t *pCount) {
    127   return getEncodings(gCountAvailable - 12, 1, 22, pCount);
    128 }
    129 
    130 static const char **
    131 getLastEncodings(int32_t *pCount) {
    132   return getEncodings(gCountAvailable - 1, -1, 25, pCount);
    133 }
    134 
    135 static const char **
    136 getSomeEncodings(int32_t *pCount) {
    137   /* 20 evenly distributed */
    138   return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount);
    139 }
    140 
    141 static const char **
    142 getEveryThirdEncoding(int32_t *pCount) {
    143   return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount);
    144 }
    145 
    146 static const char **
    147 getAllEncodings(int32_t *pCount) {
    148   return getEncodings(0, 1, gCountAvailable, pCount);
    149 }
    150 
    151 typedef const char **GetEncodingsFn(int32_t *);
    152 
    153 static GetEncodingsFn *const getEncodingsFns[] = {
    154   getOneEncoding,
    155   getFirstEvenEncodings,
    156   getMiddleEncodings,
    157   getLastEncodings,
    158   getSomeEncodings,
    159   getEveryThirdEncoding,
    160   getAllEncodings
    161 };
    162 
    163 static FILE *fopenOrError(const char *filename) {
    164     int32_t needLen;
    165     FILE *f;
    166     char fnbuf[FILENAME_BUFFER];
    167     const char* directory= ctest_dataSrcDir();
    168     needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename)+1;
    169     if(needLen > FILENAME_BUFFER) {
    170         log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n",
    171                 filename, needLen, FILENAME_BUFFER);
    172         return NULL;
    173     }
    174 
    175     strcpy(fnbuf, directory);
    176     strcat(fnbuf, TDSRCPATH);
    177     strcat(fnbuf, filename);
    178 
    179     f = fopen(fnbuf, "rb");
    180 
    181     if(f == NULL) {
    182         log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename);
    183     }
    184     return f;
    185 }
    186 
    187 typedef struct TestText {
    188   char *text, *textLimit;
    189   char *limit;
    190   int32_t number;
    191 } TestText;
    192 
    193 static void
    194 text_reset(TestText *tt) {
    195   tt->limit = tt->text;
    196   tt->number = 0;
    197 }
    198 
    199 static char *
    200 text_nextString(TestText *tt, int32_t *pLength) {
    201   char *s = tt->limit;
    202   if (s == tt->textLimit) {
    203     /* we already delivered the last string */
    204     return NULL;
    205   } else if (s == tt->text) {
    206     /* first string */
    207     if ((tt->textLimit - tt->text) >= 3 &&
    208         s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf
    209     ) {
    210       s += 3;  /* skip the UTF-8 signature byte sequence (U+FEFF) */
    211     }
    212   } else {
    213     /* skip the string terminator */
    214     ++s;
    215     ++tt->number;
    216   }
    217 
    218   /* find the end of this string */
    219   tt->limit = uprv_strchr(s, 0);
    220   *pLength = (int32_t)(tt->limit - s);
    221   return s;
    222 }
    223 
    224 static UBool
    225 text_open(TestText *tt) {
    226   FILE *f;
    227   char *s;
    228   int32_t length;
    229   uprv_memset(tt, 0, sizeof(TestText));
    230   f = fopenOrError("ConverterSelectorTestUTF8.txt");
    231   if(!f) {
    232     return FALSE;
    233   }
    234   fseek(f, 0, SEEK_END);
    235   length = (int32_t)ftell(f);
    236   fseek(f, 0, SEEK_SET);
    237   tt->text = (char *)uprv_malloc(length + 1);
    238   if (tt->text == NULL) {
    239     fclose(f);
    240     return FALSE;
    241   }
    242   if (length != fread(tt->text, 1, length, f)) {
    243     log_err("error reading %ld bytes from test text file\n", (long)length);
    244     length = 0;
    245     uprv_free(tt->text);
    246   }
    247   fclose(f);
    248   tt->textLimit = tt->text + length;
    249   *tt->textLimit = 0;
    250   /* replace all Unicode '#' (U+0023) with NUL */
    251   for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {}
    252   text_reset(tt);
    253   return TRUE;
    254 }
    255 
    256 static void
    257 text_close(TestText *tt) {
    258   uprv_free(tt->text);
    259 }
    260 
    261 static int32_t findIndex(const char* converterName) {
    262   int32_t i;
    263   for (i = 0 ; i < gCountAvailable; i++) {
    264     if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) {
    265       return i;
    266     }
    267   }
    268   return -1;
    269 }
    270 
    271 static UBool *
    272 getResultsManually(const char** encodings, int32_t num_encodings,
    273                    const char *utf8, int32_t length,
    274                    const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) {
    275   UBool* resultsManually;
    276   int32_t i;
    277 
    278   resultsManually = (UBool*) uprv_malloc(gCountAvailable);
    279   uprv_memset(resultsManually, 0, gCountAvailable);
    280 
    281   for(i = 0 ; i < num_encodings ; i++) {
    282     UErrorCode status = U_ZERO_ERROR;
    283     /* get unicode set for that converter */
    284     USet* set;
    285     UConverter* test_converter;
    286     UChar32 cp;
    287     int32_t encIndex, offset;
    288 
    289     set = uset_openEmpty();
    290     test_converter = ucnv_open(encodings[i], &status);
    291     ucnv_getUnicodeSet(test_converter, set,
    292                        whichSet, &status);
    293     if (excludedCodePoints != NULL) {
    294       uset_addAll(set, excludedCodePoints);
    295     }
    296     uset_freeze(set);
    297     offset = 0;
    298     cp = 0;
    299 
    300     encIndex = findIndex(encodings[i]);
    301     /*
    302      * The following is almost, but not entirely, the same as
    303      * resultsManually[encIndex] =
    304      *   (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
    305      * They might be different if the set contains strings,
    306      * or if the utf8 string contains an illegal sequence.
    307      *
    308      * The UConverterSelector does not currently handle strings that can be
    309      * converted, and it treats an illegal sequence as convertible
    310      * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
    311      */
    312     resultsManually[encIndex] = TRUE;
    313     while(offset<length) {
    314       U8_NEXT(utf8, offset, length, cp);
    315       if (cp >= 0 && !uset_contains(set, cp)) {
    316         resultsManually[encIndex] = FALSE;
    317         break;
    318       }
    319     }
    320     uset_close(set);
    321     ucnv_close(test_converter);
    322   }
    323   return resultsManually;
    324 }
    325 
    326 /* closes res but does not free resultsManually */
    327 static void verifyResult(UEnumeration* res, const UBool *resultsManually) {
    328   UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool));
    329   const char* name;
    330   UErrorCode status = U_ZERO_ERROR;
    331   int32_t i;
    332 
    333   /* fill the bool for the selector results! */
    334   uprv_memset(resultsFromSystem, 0, gCountAvailable);
    335   while ((name = uenum_next(res,NULL, &status)) != NULL) {
    336     resultsFromSystem[findIndex(name)] = TRUE;
    337   }
    338   for(i = 0 ; i < gCountAvailable; i++) {
    339     if(resultsManually[i] != resultsFromSystem[i]) {
    340       log_err("failure in converter selector\n"
    341               "converter %s had conflicting results -- manual: %d, system %d\n",
    342               gAvailableNames[i], resultsManually[i], resultsFromSystem[i]);
    343     }
    344   }
    345   uprv_free(resultsFromSystem);
    346   uenum_close(res);
    347 }
    348 
    349 static UConverterSelector *
    350 serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) {
    351   char *new_buffer;
    352   int32_t ser_len, ser_len2;
    353   /* preflight */
    354   ser_len = ucnvsel_serialize(sel, NULL, 0, status);
    355   if (*status != U_BUFFER_OVERFLOW_ERROR) {
    356     log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status));
    357     return sel;
    358   }
    359   new_buffer = (char *)uprv_malloc(ser_len);
    360   *status = U_ZERO_ERROR;
    361   ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status);
    362   if (U_FAILURE(*status) || ser_len != ser_len2) {
    363     log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status));
    364     uprv_free(new_buffer);
    365     return sel;
    366   }
    367   ucnvsel_close(sel);
    368   uprv_free(*buffer);
    369   *buffer = new_buffer;
    370   sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status);
    371   if (U_FAILURE(*status)) {
    372     log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status));
    373     return NULL;
    374   }
    375   return sel;
    376 }
    377 
    378 static void TestSelector()
    379 {
    380   TestText text;
    381   USet* excluded_sets[3] = { NULL };
    382   int32_t i, testCaseIdx;
    383 
    384   if (!getAvailableNames()) {
    385     return;
    386   }
    387   if (!text_open(&text)) {
    388     releaseAvailableNames();;
    389   }
    390 
    391   excluded_sets[0] = uset_openEmpty();
    392   for(i = 1 ; i < 3 ; i++) {
    393     excluded_sets[i] = uset_open(i*30, i*30+500);
    394   }
    395 
    396   for(testCaseIdx = 0; testCaseIdx < LENGTHOF(getEncodingsFns); testCaseIdx++)
    397   {
    398     int32_t excluded_set_id;
    399     int32_t num_encodings;
    400     const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings);
    401     if (getTestOption(QUICK_OPTION) && num_encodings > 25) {
    402       uprv_free((void *)encodings);
    403       continue;
    404     }
    405 
    406     /*
    407      * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++)
    408      *
    409      * This loop was replaced by the following statement because
    410      * the loop made the test run longer without adding to the code coverage.
    411      * The handling of the exclusion set is independent of the
    412      * set of encodings, so there is no need to test every combination.
    413      */
    414     excluded_set_id = testCaseIdx % LENGTHOF(excluded_sets);
    415     {
    416       UConverterSelector *sel_rt, *sel_fb;
    417       char *buffer_fb = NULL;
    418       UErrorCode status = U_ZERO_ERROR;
    419       sel_rt = ucnvsel_open(encodings, num_encodings,
    420                             excluded_sets[excluded_set_id],
    421                             UCNV_ROUNDTRIP_SET, &status);
    422       if (num_encodings == gCountAvailable) {
    423         /* test the special "all converters" parameter values */
    424         sel_fb = ucnvsel_open(NULL, 0,
    425                               excluded_sets[excluded_set_id],
    426                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
    427       } else if (uset_isEmpty(excluded_sets[excluded_set_id])) {
    428         /* test that a NULL set gives the same results as an empty set */
    429         sel_fb = ucnvsel_open(encodings, num_encodings,
    430                               NULL,
    431                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
    432       } else {
    433         sel_fb = ucnvsel_open(encodings, num_encodings,
    434                               excluded_sets[excluded_set_id],
    435                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
    436       }
    437       if (U_FAILURE(status)) {
    438         log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status));
    439         ucnvsel_close(sel_rt);
    440         uprv_free((void *)encodings);
    441         continue;
    442       }
    443 
    444       text_reset(&text);
    445       for (;;) {
    446         UBool *manual_rt, *manual_fb;
    447         static UChar utf16[10000];
    448         char *s;
    449         int32_t length8, length16;
    450 
    451         s = text_nextString(&text, &length8);
    452         if (s == NULL || (getTestOption(QUICK_OPTION) && text.number > 3)) {
    453           break;
    454         }
    455 
    456         manual_rt = getResultsManually(encodings, num_encodings,
    457                                        s, length8,
    458                                        excluded_sets[excluded_set_id],
    459                                        UCNV_ROUNDTRIP_SET);
    460         manual_fb = getResultsManually(encodings, num_encodings,
    461                                        s, length8,
    462                                        excluded_sets[excluded_set_id],
    463                                        UCNV_ROUNDTRIP_AND_FALLBACK_SET);
    464         /* UTF-8 with length */
    465         status = U_ZERO_ERROR;
    466         verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt);
    467         verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb);
    468         /* UTF-8 NUL-terminated */
    469         verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt);
    470         verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb);
    471 
    472         u_strFromUTF8(utf16, LENGTHOF(utf16), &length16, s, length8, &status);
    473         if (U_FAILURE(status)) {
    474           log_err("error converting the test text (string %ld) to UTF-16 - %s\n",
    475                   (long)text.number, u_errorName(status));
    476         } else {
    477           if (text.number == 0) {
    478             sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status);
    479           }
    480           if (U_SUCCESS(status)) {
    481             /* UTF-16 with length */
    482             verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt);
    483             verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb);
    484             /* UTF-16 NUL-terminated */
    485             verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt);
    486             verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb);
    487           }
    488         }
    489 
    490         uprv_free(manual_rt);
    491         uprv_free(manual_fb);
    492       }
    493       ucnvsel_close(sel_rt);
    494       ucnvsel_close(sel_fb);
    495       uprv_free(buffer_fb);
    496     }
    497     uprv_free((void *)encodings);
    498   }
    499 
    500   releaseAvailableNames();
    501   text_close(&text);
    502   for(i = 0 ; i < 3 ; i++) {
    503     uset_close(excluded_sets[i]);
    504   }
    505 }
    506 
    507 /* Improve code coverage of UPropsVectors */
    508 static void TestUPropsVector() {
    509     uint32_t value;
    510     UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    511     UPropsVectors *pv = upvec_open(100, &errorCode);
    512     if (pv != NULL) {
    513         log_err("Should have returned NULL if UErrorCode is an error.");
    514         return;
    515     }
    516     errorCode = U_ZERO_ERROR;
    517     pv = upvec_open(-1, &errorCode);
    518     if (pv != NULL || U_SUCCESS(errorCode)) {
    519         log_err("Should have returned NULL if column is less than 0.\n");
    520         return;
    521     }
    522     errorCode = U_ZERO_ERROR;
    523     pv = upvec_open(100, &errorCode);
    524     if (pv == NULL || U_FAILURE(errorCode)) {
    525         log_err("Unable to open UPropsVectors.\n");
    526         return;
    527     }
    528 
    529     if (upvec_getValue(pv, 0, 1) != 0) {
    530         log_err("upvec_getValue should return 0.\n");
    531     }
    532     if (upvec_getRow(pv, 0, NULL, NULL) == NULL) {
    533         log_err("upvec_getRow should not return NULL.\n");
    534     }
    535     if (upvec_getArray(pv, NULL, NULL) != NULL) {
    536         log_err("upvec_getArray should return NULL.\n");
    537     }
    538 
    539     upvec_close(pv);
    540 }
    541