Home | History | Annotate | Download | only in intltest
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2003-2014, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  convtest.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2003jul15
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Test file for data-driven conversion tests.
     17 */
     18 
     19 #include "unicode/utypes.h"
     20 
     21 #if !UCONFIG_NO_LEGACY_CONVERSION
     22 /*
     23  * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION
     24  * is slightly unnecessary - it removes tests for Unicode charsets
     25  * like UTF-8 that should work.
     26  * However, there is no easy way for the test to detect whether a test case
     27  * is for a Unicode charset, so it would be difficult to only exclude those.
     28  * Also, regular testing of ICU is done with all modules on, therefore
     29  * not testing conversion for a custom configuration like this should be ok.
     30  */
     31 
     32 #include "unicode/ucnv.h"
     33 #include "unicode/unistr.h"
     34 #include "unicode/parsepos.h"
     35 #include "unicode/uniset.h"
     36 #include "unicode/ustring.h"
     37 #include "unicode/ures.h"
     38 #include "convtest.h"
     39 #include "cmemory.h"
     40 #include "unicode/tstdtmod.h"
     41 #include <string.h>
     42 #include <stdlib.h>
     43 
     44 enum {
     45     // characters used in test data for callbacks
     46     SUB_CB='?',
     47     SKIP_CB='0',
     48     STOP_CB='.',
     49     ESC_CB='&'
     50 };
     51 
     52 ConversionTest::ConversionTest() {
     53     UErrorCode errorCode=U_ZERO_ERROR;
     54     utf8Cnv=ucnv_open("UTF-8", &errorCode);
     55     ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
     56     if(U_FAILURE(errorCode)) {
     57         errln("unable to open UTF-8 converter");
     58     }
     59 }
     60 
     61 ConversionTest::~ConversionTest() {
     62     ucnv_close(utf8Cnv);
     63 }
     64 
     65 void
     66 ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
     67     if (exec) logln("TestSuite ConversionTest: ");
     68     switch (index) {
     69 #if !UCONFIG_NO_FILE_IO
     70         case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;
     71         case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;
     72         case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;
     73         case 3: name="TestDefaultIgnorableCallback"; if (exec) TestDefaultIgnorableCallback(); break;
     74 #else
     75         case 0:
     76         case 1:
     77         case 2:
     78         case 3: name="skip"; break;
     79 #endif
     80         case 4: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;
     81         default: name=""; break; //needed to end loop
     82     }
     83 }
     84 
     85 // test data interface ----------------------------------------------------- ***
     86 
     87 void
     88 ConversionTest::TestToUnicode() {
     89     ConversionCase cc;
     90     char charset[100], cbopt[4];
     91     const char *option;
     92     UnicodeString s, unicode;
     93     int32_t offsetsLength;
     94     UConverterToUCallback callback;
     95 
     96     TestDataModule *dataModule;
     97     TestData *testData;
     98     const DataMap *testCase;
     99     UErrorCode errorCode;
    100     int32_t i;
    101 
    102     errorCode=U_ZERO_ERROR;
    103     dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
    104     if(U_SUCCESS(errorCode)) {
    105         testData=dataModule->createTestData("toUnicode", errorCode);
    106         if(U_SUCCESS(errorCode)) {
    107             for(i=0; testData->nextCase(testCase, errorCode); ++i) {
    108                 if(U_FAILURE(errorCode)) {
    109                     errln("error retrieving conversion/toUnicode test case %d - %s",
    110                             i, u_errorName(errorCode));
    111                     errorCode=U_ZERO_ERROR;
    112                     continue;
    113                 }
    114 
    115                 cc.caseNr=i;
    116 
    117                 s=testCase->getString("charset", errorCode);
    118                 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
    119                 cc.charset=charset;
    120 
    121                 // BEGIN android-added
    122                 // To save space, Android does not build full ISO-2022-CN tables.
    123                 // We skip the TestGetKeywordValuesForLocale for counting available collations.
    124                 if (strlen(charset) >= 8 &&
    125                     strncmp(charset+4, "2022-CN", 4) == 0) {
    126                     continue;
    127                 }
    128                 // END android-added
    129 
    130                 cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);
    131                 unicode=testCase->getString("unicode", errorCode);
    132                 cc.unicode=unicode.getBuffer();
    133                 cc.unicodeLength=unicode.length();
    134 
    135                 offsetsLength=0;
    136                 cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);
    137                 if(offsetsLength==0) {
    138                     cc.offsets=NULL;
    139                 } else if(offsetsLength!=unicode.length()) {
    140                     errln("toUnicode[%d] unicode[%d] and offsets[%d] must have the same length",
    141                             i, unicode.length(), offsetsLength);
    142                     errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    143                 }
    144 
    145                 cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);
    146                 cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);
    147 
    148                 s=testCase->getString("errorCode", errorCode);
    149                 if(s==UNICODE_STRING("invalid", 7)) {
    150                     cc.outErrorCode=U_INVALID_CHAR_FOUND;
    151                 } else if(s==UNICODE_STRING("illegal", 7)) {
    152                     cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;
    153                 } else if(s==UNICODE_STRING("truncated", 9)) {
    154                     cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;
    155                 } else if(s==UNICODE_STRING("illesc", 6)) {
    156                     cc.outErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE;
    157                 } else if(s==UNICODE_STRING("unsuppesc", 9)) {
    158                     cc.outErrorCode=U_UNSUPPORTED_ESCAPE_SEQUENCE;
    159                 } else {
    160                     cc.outErrorCode=U_ZERO_ERROR;
    161                 }
    162 
    163                 s=testCase->getString("callback", errorCode);
    164                 s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");
    165                 cc.cbopt=cbopt;
    166                 switch(cbopt[0]) {
    167                 case SUB_CB:
    168                     callback=UCNV_TO_U_CALLBACK_SUBSTITUTE;
    169                     break;
    170                 case SKIP_CB:
    171                     callback=UCNV_TO_U_CALLBACK_SKIP;
    172                     break;
    173                 case STOP_CB:
    174                     callback=UCNV_TO_U_CALLBACK_STOP;
    175                     break;
    176                 case ESC_CB:
    177                     callback=UCNV_TO_U_CALLBACK_ESCAPE;
    178                     break;
    179                 default:
    180                     callback=NULL;
    181                     break;
    182                 }
    183                 option=callback==NULL ? cbopt : cbopt+1;
    184                 if(*option==0) {
    185                     option=NULL;
    186                 }
    187 
    188                 cc.invalidChars=testCase->getBinary(cc.invalidLength, "invalidChars", errorCode);
    189 
    190                 if(U_FAILURE(errorCode)) {
    191                     errln("error parsing conversion/toUnicode test case %d - %s",
    192                             i, u_errorName(errorCode));
    193                     errorCode=U_ZERO_ERROR;
    194                 } else {
    195                     logln("TestToUnicode[%d] %s", i, charset);
    196                     ToUnicodeCase(cc, callback, option);
    197                 }
    198             }
    199             delete testData;
    200         }
    201         delete dataModule;
    202     }
    203     else {
    204         dataerrln("Could not load test conversion data");
    205     }
    206 }
    207 
    208 void
    209 ConversionTest::TestFromUnicode() {
    210     ConversionCase cc;
    211     char charset[100], cbopt[4];
    212     const char *option;
    213     UnicodeString s, unicode, invalidUChars;
    214     int32_t offsetsLength, index;
    215     UConverterFromUCallback callback;
    216 
    217     TestDataModule *dataModule;
    218     TestData *testData;
    219     const DataMap *testCase;
    220     const UChar *p;
    221     UErrorCode errorCode;
    222     int32_t i, length;
    223 
    224     errorCode=U_ZERO_ERROR;
    225     dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
    226     if(U_SUCCESS(errorCode)) {
    227         testData=dataModule->createTestData("fromUnicode", errorCode);
    228         if(U_SUCCESS(errorCode)) {
    229             for(i=0; testData->nextCase(testCase, errorCode); ++i) {
    230                 if(U_FAILURE(errorCode)) {
    231                     errln("error retrieving conversion/fromUnicode test case %d - %s",
    232                             i, u_errorName(errorCode));
    233                     errorCode=U_ZERO_ERROR;
    234                     continue;
    235                 }
    236 
    237                 cc.caseNr=i;
    238 
    239                 s=testCase->getString("charset", errorCode);
    240                 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
    241                 cc.charset=charset;
    242 
    243                 // BEGIN android-added
    244                 // To save space, Android does not build full ISO-2022-CN tables.
    245                 // We skip the TestGetKeywordValuesForLocale for counting available collations.
    246                 if (strlen(charset) >= 8 &&
    247                     strncmp(charset+4, "2022-CN", 4) == 0) {
    248                     continue;
    249                 }
    250                 // END android-added
    251 
    252                 unicode=testCase->getString("unicode", errorCode);
    253                 cc.unicode=unicode.getBuffer();
    254                 cc.unicodeLength=unicode.length();
    255                 cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);
    256 
    257                 offsetsLength=0;
    258                 cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);
    259                 if(offsetsLength==0) {
    260                     cc.offsets=NULL;
    261                 } else if(offsetsLength!=cc.bytesLength) {
    262                     errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have the same length",
    263                             i, cc.bytesLength, offsetsLength);
    264                     errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    265                 }
    266 
    267                 cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);
    268                 cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);
    269 
    270                 s=testCase->getString("errorCode", errorCode);
    271                 if(s==UNICODE_STRING("invalid", 7)) {
    272                     cc.outErrorCode=U_INVALID_CHAR_FOUND;
    273                 } else if(s==UNICODE_STRING("illegal", 7)) {
    274                     cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;
    275                 } else if(s==UNICODE_STRING("truncated", 9)) {
    276                     cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;
    277                 } else {
    278                     cc.outErrorCode=U_ZERO_ERROR;
    279                 }
    280 
    281                 s=testCase->getString("callback", errorCode);
    282                 cc.setSub=0; // default: no subchar
    283 
    284                 if((index=s.indexOf((UChar)0))>0) {
    285                     // read NUL-separated subchar first, if any
    286                     // copy the subchar from Latin-1 characters
    287                     // start after the NUL
    288                     p=s.getTerminatedBuffer();
    289                     length=index+1;
    290                     p+=length;
    291                     length=s.length()-length;
    292                     if(length<=0 || length>=(int32_t)sizeof(cc.subchar)) {
    293                         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    294                     } else {
    295                         int32_t j;
    296 
    297                         for(j=0; j<length; ++j) {
    298                             cc.subchar[j]=(char)p[j];
    299                         }
    300                         // NUL-terminate the subchar
    301                         cc.subchar[j]=0;
    302                         cc.setSub=1;
    303                     }
    304 
    305                     // remove the NUL and subchar from s
    306                     s.truncate(index);
    307                 } else if((index=s.indexOf((UChar)0x3d))>0) /* '=' */ {
    308                     // read a substitution string, separated by an equal sign
    309                     p=s.getBuffer()+index+1;
    310                     length=s.length()-(index+1);
    311                     if(length<0 || length>=UPRV_LENGTHOF(cc.subString)) {
    312                         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    313                     } else {
    314                         u_memcpy(cc.subString, p, length);
    315                         // NUL-terminate the subString
    316                         cc.subString[length]=0;
    317                         cc.setSub=-1;
    318                     }
    319 
    320                     // remove the equal sign and subString from s
    321                     s.truncate(index);
    322                 }
    323 
    324                 s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");
    325                 cc.cbopt=cbopt;
    326                 switch(cbopt[0]) {
    327                 case SUB_CB:
    328                     callback=UCNV_FROM_U_CALLBACK_SUBSTITUTE;
    329                     break;
    330                 case SKIP_CB:
    331                     callback=UCNV_FROM_U_CALLBACK_SKIP;
    332                     break;
    333                 case STOP_CB:
    334                     callback=UCNV_FROM_U_CALLBACK_STOP;
    335                     break;
    336                 case ESC_CB:
    337                     callback=UCNV_FROM_U_CALLBACK_ESCAPE;
    338                     break;
    339                 default:
    340                     callback=NULL;
    341                     break;
    342                 }
    343                 option=callback==NULL ? cbopt : cbopt+1;
    344                 if(*option==0) {
    345                     option=NULL;
    346                 }
    347 
    348                 invalidUChars=testCase->getString("invalidUChars", errorCode);
    349                 cc.invalidUChars=invalidUChars.getBuffer();
    350                 cc.invalidLength=invalidUChars.length();
    351 
    352                 if(U_FAILURE(errorCode)) {
    353                     errln("error parsing conversion/fromUnicode test case %d - %s",
    354                             i, u_errorName(errorCode));
    355                     errorCode=U_ZERO_ERROR;
    356                 } else {
    357                     logln("TestFromUnicode[%d] %s", i, charset);
    358                     FromUnicodeCase(cc, callback, option);
    359                 }
    360             }
    361             delete testData;
    362         }
    363         delete dataModule;
    364     }
    365     else {
    366         dataerrln("Could not load test conversion data");
    367     }
    368 }
    369 
    370 static const UChar ellipsis[]={ 0x2e, 0x2e, 0x2e };
    371 
    372 void
    373 ConversionTest::TestGetUnicodeSet() {
    374     char charset[100];
    375     UnicodeString s, map, mapnot;
    376     int32_t which;
    377 
    378     ParsePosition pos;
    379     UnicodeSet cnvSet, mapSet, mapnotSet, diffSet;
    380     UnicodeSet *cnvSetPtr = &cnvSet;
    381     LocalUConverterPointer cnv;
    382 
    383     TestDataModule *dataModule;
    384     TestData *testData;
    385     const DataMap *testCase;
    386     UErrorCode errorCode;
    387     int32_t i;
    388 
    389     errorCode=U_ZERO_ERROR;
    390     dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
    391     if(U_SUCCESS(errorCode)) {
    392         testData=dataModule->createTestData("getUnicodeSet", errorCode);
    393         if(U_SUCCESS(errorCode)) {
    394             for(i=0; testData->nextCase(testCase, errorCode); ++i) {
    395                 if(U_FAILURE(errorCode)) {
    396                     errln("error retrieving conversion/getUnicodeSet test case %d - %s",
    397                             i, u_errorName(errorCode));
    398                     errorCode=U_ZERO_ERROR;
    399                     continue;
    400                 }
    401 
    402                 s=testCase->getString("charset", errorCode);
    403                 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
    404 
    405                 // BEGIN android-added
    406                 // To save space, Android does not build full ISO-2022-CN tables.
    407                 // We skip the TestGetKeywordValuesForLocale for counting available collations.
    408                 if (strlen(charset) >= 8 &&
    409                     strncmp(charset+4, "2022-CN", 4) == 0) {
    410                     continue;
    411                 }
    412                 // END android-added
    413 
    414                 map=testCase->getString("map", errorCode);
    415                 mapnot=testCase->getString("mapnot", errorCode);
    416 
    417                 which=testCase->getInt28("which", errorCode);
    418 
    419                 if(U_FAILURE(errorCode)) {
    420                     errln("error parsing conversion/getUnicodeSet test case %d - %s",
    421                             i, u_errorName(errorCode));
    422                     errorCode=U_ZERO_ERROR;
    423                     continue;
    424                 }
    425 
    426                 // test this test case
    427                 mapSet.clear();
    428                 mapnotSet.clear();
    429 
    430                 pos.setIndex(0);
    431                 mapSet.applyPattern(map, pos, 0, NULL, errorCode);
    432                 if(U_FAILURE(errorCode) || pos.getIndex()!=map.length()) {
    433                     errln("error creating the map set for conversion/getUnicodeSet test case %d - %s\n"
    434                           "    error index %d  index %d  U+%04x",
    435                             i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), map.char32At(pos.getIndex()));
    436                     errorCode=U_ZERO_ERROR;
    437                     continue;
    438                 }
    439 
    440                 pos.setIndex(0);
    441                 mapnotSet.applyPattern(mapnot, pos, 0, NULL, errorCode);
    442                 if(U_FAILURE(errorCode) || pos.getIndex()!=mapnot.length()) {
    443                     errln("error creating the mapnot set for conversion/getUnicodeSet test case %d - %s\n"
    444                           "    error index %d  index %d  U+%04x",
    445                             i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), mapnot.char32At(pos.getIndex()));
    446                     errorCode=U_ZERO_ERROR;
    447                     continue;
    448                 }
    449 
    450                 logln("TestGetUnicodeSet[%d] %s", i, charset);
    451 
    452                 cnv.adoptInstead(cnv_open(charset, errorCode));
    453                 if(U_FAILURE(errorCode)) {
    454                     errcheckln(errorCode, "error opening \"%s\" for conversion/getUnicodeSet test case %d - %s",
    455                             charset, i, u_errorName(errorCode));
    456                     errorCode=U_ZERO_ERROR;
    457                     continue;
    458                 }
    459 
    460                 ucnv_getUnicodeSet(cnv.getAlias(), cnvSetPtr->toUSet(), (UConverterUnicodeSet)which, &errorCode);
    461 
    462                 if(U_FAILURE(errorCode)) {
    463                     errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s",
    464                             charset, i, u_errorName(errorCode));
    465                     errorCode=U_ZERO_ERROR;
    466                     continue;
    467                 }
    468 
    469                 // are there items that must be in cnvSet but are not?
    470                 (diffSet=mapSet).removeAll(cnvSet);
    471                 if(!diffSet.isEmpty()) {
    472                     diffSet.toPattern(s, TRUE);
    473                     if(s.length()>100) {
    474                         s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
    475                     }
    476                     errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",
    477                             charset, i);
    478                     errln(s);
    479                 }
    480 
    481                 // are there items that must not be in cnvSet but are?
    482                 (diffSet=mapnotSet).retainAll(cnvSet);
    483                 if(!diffSet.isEmpty()) {
    484                     diffSet.toPattern(s, TRUE);
    485                     if(s.length()>100) {
    486                         s.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
    487                     }
    488                     errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",
    489                             charset, i);
    490                     errln(s);
    491                 }
    492             }
    493             delete testData;
    494         }
    495         delete dataModule;
    496     }
    497     else {
    498         dataerrln("Could not load test conversion data");
    499     }
    500 }
    501 
    502 U_CDECL_BEGIN
    503 static void U_CALLCONV
    504 getUnicodeSetCallback(const void *context,
    505                       UConverterFromUnicodeArgs * /*fromUArgs*/,
    506                       const UChar* /*codeUnits*/,
    507                       int32_t /*length*/,
    508                       UChar32 codePoint,
    509                       UConverterCallbackReason reason,
    510                       UErrorCode *pErrorCode) {
    511     if(reason<=UCNV_IRREGULAR) {
    512         ((UnicodeSet *)context)->remove(codePoint);  // the converter cannot convert this code point
    513         *pErrorCode=U_ZERO_ERROR;                    // skip
    514     }  // else ignore the reset, close and clone calls.
    515 }
    516 U_CDECL_END
    517 
    518 // Compare ucnv_getUnicodeSet() with the set of characters that can be converted.
    519 void
    520 ConversionTest::TestGetUnicodeSet2() {
    521     // Build a string with all code points.
    522     UChar32 cpLimit;
    523     int32_t s0Length;
    524     if(quick) {
    525         cpLimit=s0Length=0x10000;  // BMP only
    526     } else {
    527         cpLimit=0x110000;
    528         s0Length=0x10000+0x200000;  // BMP + surrogate pairs
    529     }
    530     UChar *s0=new UChar[s0Length];
    531     if(s0==NULL) {
    532         return;
    533     }
    534     UChar *s=s0;
    535     UChar32 c;
    536     UChar c2;
    537     // low BMP
    538     for(c=0; c<=0xd7ff; ++c) {
    539         *s++=(UChar)c;
    540     }
    541     // trail surrogates
    542     for(c=0xdc00; c<=0xdfff; ++c) {
    543         *s++=(UChar)c;
    544     }
    545     // lead surrogates
    546     // (after trails so that there is not even one surrogate pair in between)
    547     for(c=0xd800; c<=0xdbff; ++c) {
    548         *s++=(UChar)c;
    549     }
    550     // high BMP
    551     for(c=0xe000; c<=0xffff; ++c) {
    552         *s++=(UChar)c;
    553     }
    554     // supplementary code points = surrogate pairs
    555     if(cpLimit==0x110000) {
    556         for(c=0xd800; c<=0xdbff; ++c) {
    557             for(c2=0xdc00; c2<=0xdfff; ++c2) {
    558                 *s++=(UChar)c;
    559                 *s++=c2;
    560             }
    561         }
    562     }
    563 
    564     static const char *const cnvNames[]={
    565         "UTF-8",
    566         "UTF-7",
    567         "UTF-16",
    568         "US-ASCII",
    569         "ISO-8859-1",
    570         "windows-1252",
    571         "Shift-JIS",
    572         "ibm-1390",  // EBCDIC_STATEFUL table
    573         "ibm-16684",  // DBCS-only extension table based on EBCDIC_STATEFUL table
    574         "HZ",
    575         "ISO-2022-JP",
    576         "JIS7",
    577         "ISO-2022-CN",
    578         "ISO-2022-CN-EXT",
    579         "LMBCS"
    580     };
    581     LocalUConverterPointer cnv;
    582     char buffer[1024];
    583     int32_t i;
    584     for(i=0; i<UPRV_LENGTHOF(cnvNames); ++i) {
    585         UErrorCode errorCode=U_ZERO_ERROR;
    586         cnv.adoptInstead(cnv_open(cnvNames[i], errorCode));
    587         if(U_FAILURE(errorCode)) {
    588             errcheckln(errorCode, "failed to open converter %s - %s", cnvNames[i], u_errorName(errorCode));
    589             continue;
    590         }
    591         UnicodeSet expected;
    592         ucnv_setFromUCallBack(cnv.getAlias(), getUnicodeSetCallback, &expected, NULL, NULL, &errorCode);
    593         if(U_FAILURE(errorCode)) {
    594             errln("failed to set the callback on converter %s - %s", cnvNames[i], u_errorName(errorCode));
    595             continue;
    596         }
    597         UConverterUnicodeSet which;
    598         for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUnicodeSet)((int)which+1)) {
    599             if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
    600                 ucnv_setFallback(cnv.getAlias(), TRUE);
    601             }
    602             expected.add(0, cpLimit-1);
    603             s=s0;
    604             UBool flush;
    605             do {
    606                 char *t=buffer;
    607                 flush=(UBool)(s==s0+s0Length);
    608                 ucnv_fromUnicode(cnv.getAlias(), &t, buffer+sizeof(buffer), (const UChar **)&s, s0+s0Length, NULL, flush, &errorCode);
    609                 if(U_FAILURE(errorCode)) {
    610                     if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
    611                         errorCode=U_ZERO_ERROR;
    612                         continue;
    613                     } else {
    614                         break;  // unexpected error, should not occur
    615                     }
    616                 }
    617             } while(!flush);
    618             UnicodeSet set;
    619             ucnv_getUnicodeSet(cnv.getAlias(), set.toUSet(), which, &errorCode);
    620             if(cpLimit<0x110000) {
    621                 set.remove(cpLimit, 0x10ffff);
    622             }
    623             if(which==UCNV_ROUNDTRIP_SET) {
    624                 // ignore PUA code points because they will be converted even if they
    625                 // are fallbacks and when other fallbacks are turned off,
    626                 // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roundtrips
    627                 expected.remove(0xe000, 0xf8ff);
    628                 expected.remove(0xf0000, 0xffffd);
    629                 expected.remove(0x100000, 0x10fffd);
    630                 set.remove(0xe000, 0xf8ff);
    631                 set.remove(0xf0000, 0xffffd);
    632                 set.remove(0x100000, 0x10fffd);
    633             }
    634             if(set!=expected) {
    635                 // First try to see if we have different sets because ucnv_getUnicodeSet()
    636                 // added strings: The above conversion method does not tell us what strings might be convertible.
    637                 // Remove strings from the set and compare again.
    638                 // Unfortunately, there are no good, direct set methods for finding out whether there are strings
    639                 // in the set, nor for enumerating or removing just them.
    640                 // Intersect all code points with the set. The intersection will not contain strings.
    641                 UnicodeSet temp(0, 0x10ffff);
    642                 temp.retainAll(set);
    643                 set=temp;
    644             }
    645             if(set!=expected) {
    646                 UnicodeSet diffSet;
    647                 UnicodeString out;
    648 
    649                 // are there items that must be in the set but are not?
    650                 (diffSet=expected).removeAll(set);
    651                 if(!diffSet.isEmpty()) {
    652                     diffSet.toPattern(out, TRUE);
    653                     if(out.length()>100) {
    654                         out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
    655                     }
    656                     errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",
    657                             cnvNames[i], which);
    658                     errln(out);
    659                 }
    660 
    661                 // are there items that must not be in the set but are?
    662                 (diffSet=set).removeAll(expected);
    663                 if(!diffSet.isEmpty()) {
    664                     diffSet.toPattern(out, TRUE);
    665                     if(out.length()>100) {
    666                         out.replace(100, 0x7fffffff, ellipsis, UPRV_LENGTHOF(ellipsis));
    667                     }
    668                     errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",
    669                             cnvNames[i], which);
    670                     errln(out);
    671                 }
    672             }
    673         }
    674     }
    675 
    676     delete [] s0;
    677 }
    678 
    679 // Test all codepoints which has the default ignorable Unicode property are ignored if they have no mapping
    680 // If there are any failures, the hard coded list (IS_DEFAULT_IGNORABLE_CODE_POINT) in ucnv_err.c should be updated
    681 void
    682 ConversionTest::TestDefaultIgnorableCallback() {
    683     UErrorCode status = U_ZERO_ERROR;
    684     const char *cnv_name = "euc-jp-2007";
    685     const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]";
    686     const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]";
    687 
    688     UnicodeSet *set_ignorable = new UnicodeSet(pattern_ignorable, status);
    689     if (U_FAILURE(status)) {
    690         dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status));
    691         return;
    692     }
    693 
    694     UnicodeSet *set_not_ignorable = new UnicodeSet(pattern_not_ignorable, status);
    695     if (U_FAILURE(status)) {
    696         dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status));
    697         return;
    698     }
    699 
    700     UConverter *cnv = cnv_open(cnv_name, status);
    701     if (U_FAILURE(status)) {
    702         dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status));
    703         return;
    704     }
    705 
    706     // set callback for the converter
    707     ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);
    708 
    709     UChar32 input[1];
    710     char output[10];
    711     int32_t outputLength;
    712 
    713     // test default ignorables are ignored
    714     int size = set_ignorable->size();
    715     for (int i = 0; i < size; i++) {
    716         status = U_ZERO_ERROR;
    717         outputLength= 0;
    718 
    719         input[0] = set_ignorable->charAt(i);
    720 
    721         outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
    722         if (U_FAILURE(status) || outputLength != 0) {
    723             errln("Ignorable code point: U+%04X not skipped as expected - %s", input[0], u_errorName(status));
    724         }
    725     }
    726 
    727     // test non-ignorables are not ignored
    728     size = set_not_ignorable->size();
    729     for (int i = 0; i < size; i++) {
    730         status = U_ZERO_ERROR;
    731         outputLength= 0;
    732 
    733         input[0] = set_not_ignorable->charAt(i);
    734 
    735         if (input[0] == 0) {
    736             continue;
    737         }
    738 
    739         outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
    740         if (U_FAILURE(status) || outputLength <= 0) {
    741             errln("Non-ignorable code point: U+%04X skipped unexpectedly - %s", input[0], u_errorName(status));
    742         }
    743     }
    744 
    745     ucnv_close(cnv);
    746     delete set_not_ignorable;
    747     delete set_ignorable;
    748 }
    749 
    750 // open testdata or ICU data converter ------------------------------------- ***
    751 
    752 UConverter *
    753 ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) {
    754     if(name!=NULL && *name=='+') {
    755         // Converter names that start with '+' are ignored in ICU4J tests.
    756         ++name;
    757     }
    758     if(name!=NULL && *name=='*') {
    759         /* loadTestData(): set the data directory */
    760         return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode);
    761     } else {
    762         return ucnv_open(name, &errorCode);
    763     }
    764 }
    765 
    766 // output helpers ---------------------------------------------------------- ***
    767 
    768 static inline char
    769 hexDigit(uint8_t digit) {
    770     return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit);
    771 }
    772 
    773 static char *
    774 printBytes(const uint8_t *bytes, int32_t length, char *out) {
    775     uint8_t b;
    776 
    777     if(length>0) {
    778         b=*bytes++;
    779         --length;
    780         *out++=hexDigit((uint8_t)(b>>4));
    781         *out++=hexDigit((uint8_t)(b&0xf));
    782     }
    783 
    784     while(length>0) {
    785         b=*bytes++;
    786         --length;
    787         *out++=' ';
    788         *out++=hexDigit((uint8_t)(b>>4));
    789         *out++=hexDigit((uint8_t)(b&0xf));
    790     }
    791     *out++=0;
    792     return out;
    793 }
    794 
    795 static char *
    796 printUnicode(const UChar *unicode, int32_t length, char *out) {
    797     UChar32 c;
    798     int32_t i;
    799 
    800     for(i=0; i<length;) {
    801         if(i>0) {
    802             *out++=' ';
    803         }
    804         U16_NEXT(unicode, i, length, c);
    805         // write 4..6 digits
    806         if(c>=0x100000) {
    807             *out++='1';
    808         }
    809         if(c>=0x10000) {
    810             *out++=hexDigit((uint8_t)((c>>16)&0xf));
    811         }
    812         *out++=hexDigit((uint8_t)((c>>12)&0xf));
    813         *out++=hexDigit((uint8_t)((c>>8)&0xf));
    814         *out++=hexDigit((uint8_t)((c>>4)&0xf));
    815         *out++=hexDigit((uint8_t)(c&0xf));
    816     }
    817     *out++=0;
    818     return out;
    819 }
    820 
    821 static char *
    822 printOffsets(const int32_t *offsets, int32_t length, char *out) {
    823     int32_t i, o, d;
    824 
    825     if(offsets==NULL) {
    826         length=0;
    827     }
    828 
    829     for(i=0; i<length; ++i) {
    830         if(i>0) {
    831             *out++=' ';
    832         }
    833         o=offsets[i];
    834 
    835         // print all offsets with 2 characters each (-x, -9..99, xx)
    836         if(o<-9) {
    837             *out++='-';
    838             *out++='x';
    839         } else if(o<0) {
    840             *out++='-';
    841             *out++=(char)('0'-o);
    842         } else if(o<=99) {
    843             *out++=(d=o/10)==0 ? ' ' : (char)('0'+d);
    844             *out++=(char)('0'+o%10);
    845         } else /* o>99 */ {
    846             *out++='x';
    847             *out++='x';
    848         }
    849     }
    850     *out++=0;
    851     return out;
    852 }
    853 
    854 // toUnicode test worker functions ----------------------------------------- ***
    855 
    856 static int32_t
    857 stepToUnicode(ConversionCase &cc, UConverter *cnv,
    858               UChar *result, int32_t resultCapacity,
    859               int32_t *resultOffsets, /* also resultCapacity */
    860               int32_t step,
    861               UErrorCode *pErrorCode) {
    862     const char *source, *sourceLimit, *bytesLimit;
    863     UChar *target, *targetLimit, *resultLimit;
    864     UBool flush;
    865 
    866     source=(const char *)cc.bytes;
    867     target=result;
    868     bytesLimit=source+cc.bytesLength;
    869     resultLimit=result+resultCapacity;
    870 
    871     if(step>=0) {
    872         // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time
    873         // move only one buffer (in vs. out) at a time to be extra mean
    874         // step==0 performs bulk conversion and generates offsets
    875 
    876         // initialize the partial limits for the loop
    877         if(step==0) {
    878             // use the entire buffers
    879             sourceLimit=bytesLimit;
    880             targetLimit=resultLimit;
    881             flush=cc.finalFlush;
    882         } else {
    883             // start with empty partial buffers
    884             sourceLimit=source;
    885             targetLimit=target;
    886             flush=FALSE;
    887 
    888             // output offsets only for bulk conversion
    889             resultOffsets=NULL;
    890         }
    891 
    892         for(;;) {
    893             // resetting the opposite conversion direction must not affect this one
    894             ucnv_resetFromUnicode(cnv);
    895 
    896             // convert
    897             ucnv_toUnicode(cnv,
    898                 &target, targetLimit,
    899                 &source, sourceLimit,
    900                 resultOffsets,
    901                 flush, pErrorCode);
    902 
    903             // check pointers and errors
    904             if(source>sourceLimit || target>targetLimit) {
    905                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
    906                 break;
    907             } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
    908                 if(target!=targetLimit) {
    909                     // buffer overflow must only be set when the target is filled
    910                     *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
    911                     break;
    912                 } else if(targetLimit==resultLimit) {
    913                     // not just a partial overflow
    914                     break;
    915                 }
    916 
    917                 // the partial target is filled, set a new limit, reset the error and continue
    918                 targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
    919                 *pErrorCode=U_ZERO_ERROR;
    920             } else if(U_FAILURE(*pErrorCode)) {
    921                 // some other error occurred, done
    922                 break;
    923             } else {
    924                 if(source!=sourceLimit) {
    925                     // when no error occurs, then the input must be consumed
    926                     *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
    927                     break;
    928                 }
    929 
    930                 if(sourceLimit==bytesLimit) {
    931                     // we are done
    932                     break;
    933                 }
    934 
    935                 // the partial conversion succeeded, set a new limit and continue
    936                 sourceLimit=(bytesLimit-source)>=step ? source+step : bytesLimit;
    937                 flush=(UBool)(cc.finalFlush && sourceLimit==bytesLimit);
    938             }
    939         }
    940     } else /* step<0 */ {
    941         /*
    942          * step==-1: call only ucnv_getNextUChar()
    943          * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()
    944          *   if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,
    945          *   else give it at most (-step-2)/2 bytes
    946          */
    947         UChar32 c;
    948 
    949         // end the loop by getting an index out of bounds error
    950         for(;;) {
    951             // resetting the opposite conversion direction must not affect this one
    952             ucnv_resetFromUnicode(cnv);
    953 
    954             // convert
    955             if((step&1)!=0 /* odd: -1, -3, -5, ... */) {
    956                 sourceLimit=source; // use sourceLimit not as a real limit
    957                                     // but to remember the pre-getNextUChar source pointer
    958                 c=ucnv_getNextUChar(cnv, &source, bytesLimit, pErrorCode);
    959 
    960                 // check pointers and errors
    961                 if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
    962                     if(source!=bytesLimit) {
    963                         *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
    964                     } else {
    965                         *pErrorCode=U_ZERO_ERROR;
    966                     }
    967                     break;
    968                 } else if(U_FAILURE(*pErrorCode)) {
    969                     break;
    970                 }
    971                 // source may not move if c is from previous overflow
    972 
    973                 if(target==resultLimit) {
    974                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    975                     break;
    976                 }
    977                 if(c<=0xffff) {
    978                     *target++=(UChar)c;
    979                 } else {
    980                     *target++=U16_LEAD(c);
    981                     if(target==resultLimit) {
    982                         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    983                         break;
    984                     }
    985                     *target++=U16_TRAIL(c);
    986                 }
    987 
    988                 // alternate between -n-1 and -n but leave -1 alone
    989                 if(step<-1) {
    990                     ++step;
    991                 }
    992             } else /* step is even */ {
    993                 // allow only one UChar output
    994                 targetLimit=target<resultLimit ? target+1 : resultLimit;
    995 
    996                 // as with ucnv_getNextUChar(), we always flush (if we go to bytesLimit)
    997                 // and never output offsets
    998                 if(step==-2) {
    999                     sourceLimit=bytesLimit;
   1000                 } else {
   1001                     sourceLimit=source+(-step-2)/2;
   1002                     if(sourceLimit>bytesLimit) {
   1003                         sourceLimit=bytesLimit;
   1004                     }
   1005                 }
   1006 
   1007                 ucnv_toUnicode(cnv,
   1008                     &target, targetLimit,
   1009                     &source, sourceLimit,
   1010                     NULL, (UBool)(sourceLimit==bytesLimit), pErrorCode);
   1011 
   1012                 // check pointers and errors
   1013                 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
   1014                     if(target!=targetLimit) {
   1015                         // buffer overflow must only be set when the target is filled
   1016                         *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   1017                         break;
   1018                     } else if(targetLimit==resultLimit) {
   1019                         // not just a partial overflow
   1020                         break;
   1021                     }
   1022 
   1023                     // the partial target is filled, set a new limit and continue
   1024                     *pErrorCode=U_ZERO_ERROR;
   1025                 } else if(U_FAILURE(*pErrorCode)) {
   1026                     // some other error occurred, done
   1027                     break;
   1028                 } else {
   1029                     if(source!=sourceLimit) {
   1030                         // when no error occurs, then the input must be consumed
   1031                         *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   1032                         break;
   1033                     }
   1034 
   1035                     // we are done (flush==TRUE) but we continue, to get the index out of bounds error above
   1036                 }
   1037 
   1038                 --step;
   1039             }
   1040         }
   1041     }
   1042 
   1043     return (int32_t)(target-result);
   1044 }
   1045 
   1046 UBool
   1047 ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback, const char *option) {
   1048     // open the converter
   1049     IcuTestErrorCode errorCode(*this, "ToUnicodeCase");
   1050     LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode));
   1051     if(errorCode.isFailure()) {
   1052         errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
   1053                 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, errorCode.errorName());
   1054         errorCode.reset();
   1055         return FALSE;
   1056     }
   1057 
   1058     // set the callback
   1059     if(callback!=NULL) {
   1060         ucnv_setToUCallBack(cnv.getAlias(), callback, option, NULL, NULL, errorCode);
   1061         if(U_FAILURE(errorCode)) {
   1062             errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s",
   1063                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
   1064             return FALSE;
   1065         }
   1066     }
   1067 
   1068     int32_t resultOffsets[256];
   1069     UChar result[256];
   1070     int32_t resultLength;
   1071     UBool ok;
   1072 
   1073     static const struct {
   1074         int32_t step;
   1075         const char *name;
   1076     } steps[]={
   1077         { 0, "bulk" }, // must be first for offsets to be checked
   1078         { 1, "step=1" },
   1079         { 3, "step=3" },
   1080         { 7, "step=7" },
   1081         { -1, "getNext" },
   1082         { -2, "toU(bulk)+getNext" },
   1083         { -3, "getNext+toU(bulk)" },
   1084         { -4, "toU(1)+getNext" },
   1085         { -5, "getNext+toU(1)" },
   1086         { -12, "toU(5)+getNext" },
   1087         { -13, "getNext+toU(5)" },
   1088     };
   1089     int32_t i, step;
   1090 
   1091     ok=TRUE;
   1092     for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
   1093         step=steps[i].step;
   1094         if(step<0 && !cc.finalFlush) {
   1095             // skip ucnv_getNextUChar() if !finalFlush because
   1096             // ucnv_getNextUChar() always implies flush
   1097             continue;
   1098         }
   1099         if(step!=0) {
   1100             // bulk test is first, then offsets are not checked any more
   1101             cc.offsets=NULL;
   1102         }
   1103         else {
   1104             memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));
   1105         }
   1106         memset(result, -1, UPRV_LENGTHOF(result));
   1107         errorCode.reset();
   1108         resultLength=stepToUnicode(cc, cnv.getAlias(),
   1109                                 result, UPRV_LENGTHOF(result),
   1110                                 step==0 ? resultOffsets : NULL,
   1111                                 step, errorCode);
   1112         ok=checkToUnicode(
   1113                 cc, cnv.getAlias(), steps[i].name,
   1114                 result, resultLength,
   1115                 cc.offsets!=NULL ? resultOffsets : NULL,
   1116                 errorCode);
   1117         if(errorCode.isFailure() || !cc.finalFlush) {
   1118             // reset if an error occurred or we did not flush
   1119             // otherwise do nothing to make sure that flushing resets
   1120             ucnv_resetToUnicode(cnv.getAlias());
   1121         }
   1122         if (cc.offsets != NULL && resultOffsets[resultLength] != -1) {
   1123             errln("toUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
   1124                 cc.caseNr, cc.charset, resultLength);
   1125         }
   1126         if (result[resultLength] != (UChar)-1) {
   1127             errln("toUnicode[%d](%s) Conversion wrote too much to result at index %d",
   1128                 cc.caseNr, cc.charset, resultLength);
   1129         }
   1130     }
   1131 
   1132     // not a real loop, just a convenience for breaking out of the block
   1133     while(ok && cc.finalFlush) {
   1134         // test ucnv_toUChars()
   1135         memset(result, 0, sizeof(result));
   1136 
   1137         errorCode.reset();
   1138         resultLength=ucnv_toUChars(cnv.getAlias(),
   1139                         result, UPRV_LENGTHOF(result),
   1140                         (const char *)cc.bytes, cc.bytesLength,
   1141                         errorCode);
   1142         ok=checkToUnicode(
   1143                 cc, cnv.getAlias(), "toUChars",
   1144                 result, resultLength,
   1145                 NULL,
   1146                 errorCode);
   1147         if(!ok) {
   1148             break;
   1149         }
   1150 
   1151         // test preflighting
   1152         // keep the correct result for simple checking
   1153         errorCode.reset();
   1154         resultLength=ucnv_toUChars(cnv.getAlias(),
   1155                         NULL, 0,
   1156                         (const char *)cc.bytes, cc.bytesLength,
   1157                         errorCode);
   1158         if(errorCode.get()==U_STRING_NOT_TERMINATED_WARNING || errorCode.get()==U_BUFFER_OVERFLOW_ERROR) {
   1159             errorCode.reset();
   1160         }
   1161         ok=checkToUnicode(
   1162                 cc, cnv.getAlias(), "preflight toUChars",
   1163                 result, resultLength,
   1164                 NULL,
   1165                 errorCode);
   1166         break;
   1167     }
   1168 
   1169     errorCode.reset();  // all errors have already been reported
   1170     return ok;
   1171 }
   1172 
   1173 UBool
   1174 ConversionTest::checkToUnicode(ConversionCase &cc, UConverter *cnv, const char *name,
   1175                                const UChar *result, int32_t resultLength,
   1176                                const int32_t *resultOffsets,
   1177                                UErrorCode resultErrorCode) {
   1178     char resultInvalidChars[8];
   1179     int8_t resultInvalidLength;
   1180     UErrorCode errorCode;
   1181 
   1182     const char *msg;
   1183 
   1184     // reset the message; NULL will mean "ok"
   1185     msg=NULL;
   1186 
   1187     errorCode=U_ZERO_ERROR;
   1188     resultInvalidLength=sizeof(resultInvalidChars);
   1189     ucnv_getInvalidChars(cnv, resultInvalidChars, &resultInvalidLength, &errorCode);
   1190     if(U_FAILURE(errorCode)) {
   1191         errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChars() failed - %s",
   1192                 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));
   1193         return FALSE;
   1194     }
   1195 
   1196     // check everything that might have gone wrong
   1197     if(cc.unicodeLength!=resultLength) {
   1198         msg="wrong result length";
   1199     } else if(0!=u_memcmp(cc.unicode, result, cc.unicodeLength)) {
   1200         msg="wrong result string";
   1201     } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.unicodeLength*sizeof(*cc.offsets))) {
   1202         msg="wrong offsets";
   1203     } else if(cc.outErrorCode!=resultErrorCode) {
   1204         msg="wrong error code";
   1205     } else if(cc.invalidLength!=resultInvalidLength) {
   1206         msg="wrong length of last invalid input";
   1207     } else if(0!=memcmp(cc.invalidChars, resultInvalidChars, cc.invalidLength)) {
   1208         msg="wrong last invalid input";
   1209     }
   1210 
   1211     if(msg==NULL) {
   1212         return TRUE;
   1213     } else {
   1214         char buffer[2000]; // one buffer for all strings
   1215         char *s, *bytesString, *unicodeString, *resultString,
   1216             *offsetsString, *resultOffsetsString,
   1217             *invalidCharsString, *resultInvalidCharsString;
   1218 
   1219         bytesString=s=buffer;
   1220         s=printBytes(cc.bytes, cc.bytesLength, bytesString);
   1221         s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString=s);
   1222         s=printUnicode(result, resultLength, resultString=s);
   1223         s=printOffsets(cc.offsets, cc.unicodeLength, offsetsString=s);
   1224         s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);
   1225         s=printBytes(cc.invalidChars, cc.invalidLength, invalidCharsString=s);
   1226         s=printBytes((uint8_t *)resultInvalidChars, resultInvalidLength, resultInvalidCharsString=s);
   1227 
   1228         if((s-buffer)>(int32_t)sizeof(buffer)) {
   1229             errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkToUnicode() test output buffer overflow writing %d chars\n",
   1230                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));
   1231             exit(1);
   1232         }
   1233 
   1234         errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
   1235               "  bytes <%s>[%d]\n"
   1236               " expected <%s>[%d]\n"
   1237               "  result  <%s>[%d]\n"
   1238               " offsets         <%s>\n"
   1239               "  result offsets <%s>\n"
   1240               " error code expected %s got %s\n"
   1241               "  invalidChars expected <%s> got <%s>\n",
   1242               cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,
   1243               bytesString, cc.bytesLength,
   1244               unicodeString, cc.unicodeLength,
   1245               resultString, resultLength,
   1246               offsetsString,
   1247               resultOffsetsString,
   1248               u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),
   1249               invalidCharsString, resultInvalidCharsString);
   1250 
   1251         return FALSE;
   1252     }
   1253 }
   1254 
   1255 // fromUnicode test worker functions --------------------------------------- ***
   1256 
   1257 static int32_t
   1258 stepFromUTF8(ConversionCase &cc,
   1259              UConverter *utf8Cnv, UConverter *cnv,
   1260              char *result, int32_t resultCapacity,
   1261              int32_t step,
   1262              UErrorCode *pErrorCode) {
   1263     const char *source, *sourceLimit, *utf8Limit;
   1264     UChar pivotBuffer[32];
   1265     UChar *pivotSource, *pivotTarget, *pivotLimit;
   1266     char *target, *targetLimit, *resultLimit;
   1267     UBool flush;
   1268 
   1269     source=cc.utf8;
   1270     pivotSource=pivotTarget=pivotBuffer;
   1271     target=result;
   1272     utf8Limit=source+cc.utf8Length;
   1273     resultLimit=result+resultCapacity;
   1274 
   1275     // call ucnv_convertEx() with in/out buffers no larger than (step) at a time
   1276     // move only one buffer (in vs. out) at a time to be extra mean
   1277     // step==0 performs bulk conversion
   1278 
   1279     // initialize the partial limits for the loop
   1280     if(step==0) {
   1281         // use the entire buffers
   1282         sourceLimit=utf8Limit;
   1283         targetLimit=resultLimit;
   1284         flush=cc.finalFlush;
   1285 
   1286         pivotLimit=pivotBuffer+UPRV_LENGTHOF(pivotBuffer);
   1287     } else {
   1288         // start with empty partial buffers
   1289         sourceLimit=source;
   1290         targetLimit=target;
   1291         flush=FALSE;
   1292 
   1293         // empty pivot is not allowed, make it of length step
   1294         pivotLimit=pivotBuffer+step;
   1295     }
   1296 
   1297     for(;;) {
   1298         // resetting the opposite conversion direction must not affect this one
   1299         ucnv_resetFromUnicode(utf8Cnv);
   1300         ucnv_resetToUnicode(cnv);
   1301 
   1302         // convert
   1303         ucnv_convertEx(cnv, utf8Cnv,
   1304             &target, targetLimit,
   1305             &source, sourceLimit,
   1306             pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
   1307             FALSE, flush, pErrorCode);
   1308 
   1309         // check pointers and errors
   1310         if(source>sourceLimit || target>targetLimit) {
   1311             *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   1312             break;
   1313         } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
   1314             if(target!=targetLimit) {
   1315                 // buffer overflow must only be set when the target is filled
   1316                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   1317                 break;
   1318             } else if(targetLimit==resultLimit) {
   1319                 // not just a partial overflow
   1320                 break;
   1321             }
   1322 
   1323             // the partial target is filled, set a new limit, reset the error and continue
   1324             targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
   1325             *pErrorCode=U_ZERO_ERROR;
   1326         } else if(U_FAILURE(*pErrorCode)) {
   1327             if(pivotSource==pivotBuffer) {
   1328                 // toUnicode error, should not occur
   1329                 // toUnicode errors are tested in cintltst TestConvertExFromUTF8()
   1330                 break;
   1331             } else {
   1332                 // fromUnicode error
   1333                 // some other error occurred, done
   1334                 break;
   1335             }
   1336         } else {
   1337             if(source!=sourceLimit) {
   1338                 // when no error occurs, then the input must be consumed
   1339                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   1340                 break;
   1341             }
   1342 
   1343             if(sourceLimit==utf8Limit) {
   1344                 // we are done
   1345                 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
   1346                     // ucnv_convertEx() warns about not terminating the output
   1347                     // but ucnv_fromUnicode() does not and so
   1348                     // checkFromUnicode() does not expect it
   1349                     *pErrorCode=U_ZERO_ERROR;
   1350                 }
   1351                 break;
   1352             }
   1353 
   1354             // the partial conversion succeeded, set a new limit and continue
   1355             sourceLimit=(utf8Limit-source)>=step ? source+step : utf8Limit;
   1356             flush=(UBool)(cc.finalFlush && sourceLimit==utf8Limit);
   1357         }
   1358     }
   1359 
   1360     return (int32_t)(target-result);
   1361 }
   1362 
   1363 static int32_t
   1364 stepFromUnicode(ConversionCase &cc, UConverter *cnv,
   1365                 char *result, int32_t resultCapacity,
   1366                 int32_t *resultOffsets, /* also resultCapacity */
   1367                 int32_t step,
   1368                 UErrorCode *pErrorCode) {
   1369     const UChar *source, *sourceLimit, *unicodeLimit;
   1370     char *target, *targetLimit, *resultLimit;
   1371     UBool flush;
   1372 
   1373     source=cc.unicode;
   1374     target=result;
   1375     unicodeLimit=source+cc.unicodeLength;
   1376     resultLimit=result+resultCapacity;
   1377 
   1378     // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time
   1379     // move only one buffer (in vs. out) at a time to be extra mean
   1380     // step==0 performs bulk conversion and generates offsets
   1381 
   1382     // initialize the partial limits for the loop
   1383     if(step==0) {
   1384         // use the entire buffers
   1385         sourceLimit=unicodeLimit;
   1386         targetLimit=resultLimit;
   1387         flush=cc.finalFlush;
   1388     } else {
   1389         // start with empty partial buffers
   1390         sourceLimit=source;
   1391         targetLimit=target;
   1392         flush=FALSE;
   1393 
   1394         // output offsets only for bulk conversion
   1395         resultOffsets=NULL;
   1396     }
   1397 
   1398     for(;;) {
   1399         // resetting the opposite conversion direction must not affect this one
   1400         ucnv_resetToUnicode(cnv);
   1401 
   1402         // convert
   1403         ucnv_fromUnicode(cnv,
   1404             &target, targetLimit,
   1405             &source, sourceLimit,
   1406             resultOffsets,
   1407             flush, pErrorCode);
   1408 
   1409         // check pointers and errors
   1410         if(source>sourceLimit || target>targetLimit) {
   1411             *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   1412             break;
   1413         } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
   1414             if(target!=targetLimit) {
   1415                 // buffer overflow must only be set when the target is filled
   1416                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   1417                 break;
   1418             } else if(targetLimit==resultLimit) {
   1419                 // not just a partial overflow
   1420                 break;
   1421             }
   1422 
   1423             // the partial target is filled, set a new limit, reset the error and continue
   1424             targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
   1425             *pErrorCode=U_ZERO_ERROR;
   1426         } else if(U_FAILURE(*pErrorCode)) {
   1427             // some other error occurred, done
   1428             break;
   1429         } else {
   1430             if(source!=sourceLimit) {
   1431                 // when no error occurs, then the input must be consumed
   1432                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   1433                 break;
   1434             }
   1435 
   1436             if(sourceLimit==unicodeLimit) {
   1437                 // we are done
   1438                 break;
   1439             }
   1440 
   1441             // the partial conversion succeeded, set a new limit and continue
   1442             sourceLimit=(unicodeLimit-source)>=step ? source+step : unicodeLimit;
   1443             flush=(UBool)(cc.finalFlush && sourceLimit==unicodeLimit);
   1444         }
   1445     }
   1446 
   1447     return (int32_t)(target-result);
   1448 }
   1449 
   1450 UBool
   1451 ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback callback, const char *option) {
   1452     UConverter *cnv;
   1453     UErrorCode errorCode;
   1454 
   1455     // open the converter
   1456     errorCode=U_ZERO_ERROR;
   1457     cnv=cnv_open(cc.charset, errorCode);
   1458     if(U_FAILURE(errorCode)) {
   1459         errcheckln(errorCode, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
   1460                 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
   1461         return FALSE;
   1462     }
   1463     ucnv_resetToUnicode(utf8Cnv);
   1464 
   1465     // set the callback
   1466     if(callback!=NULL) {
   1467         ucnv_setFromUCallBack(cnv, callback, option, NULL, NULL, &errorCode);
   1468         if(U_FAILURE(errorCode)) {
   1469             errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s",
   1470                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
   1471             ucnv_close(cnv);
   1472             return FALSE;
   1473         }
   1474     }
   1475 
   1476     // set the fallbacks flag
   1477     // TODO change with Jitterbug 2401, then add a similar call for toUnicode too
   1478     ucnv_setFallback(cnv, cc.fallbacks);
   1479 
   1480     // set the subchar
   1481     int32_t length;
   1482 
   1483     if(cc.setSub>0) {
   1484         length=(int32_t)strlen(cc.subchar);
   1485         ucnv_setSubstChars(cnv, cc.subchar, (int8_t)length, &errorCode);
   1486         if(U_FAILURE(errorCode)) {
   1487             errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstChars() failed - %s",
   1488                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
   1489             ucnv_close(cnv);
   1490             return FALSE;
   1491         }
   1492     } else if(cc.setSub<0) {
   1493         ucnv_setSubstString(cnv, cc.subString, -1, &errorCode);
   1494         if(U_FAILURE(errorCode)) {
   1495             errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstString() failed - %s",
   1496                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
   1497             ucnv_close(cnv);
   1498             return FALSE;
   1499         }
   1500     }
   1501 
   1502     // convert unicode to utf8
   1503     char utf8[256];
   1504     cc.utf8=utf8;
   1505     u_strToUTF8(utf8, UPRV_LENGTHOF(utf8), &cc.utf8Length,
   1506                 cc.unicode, cc.unicodeLength,
   1507                 &errorCode);
   1508     if(U_FAILURE(errorCode)) {
   1509         // skip UTF-8 testing of a string with an unpaired surrogate,
   1510         // or of one that's too long
   1511         // toUnicode errors are tested in cintltst TestConvertExFromUTF8()
   1512         cc.utf8Length=-1;
   1513     }
   1514 
   1515     int32_t resultOffsets[256];
   1516     char result[256];
   1517     int32_t resultLength;
   1518     UBool ok;
   1519 
   1520     static const struct {
   1521         int32_t step;
   1522         const char *name, *utf8Name;
   1523     } steps[]={
   1524         { 0, "bulk",   "utf8" }, // must be first for offsets to be checked
   1525         { 1, "step=1", "utf8 step=1" },
   1526         { 3, "step=3", "utf8 step=3" },
   1527         { 7, "step=7", "utf8 step=7" }
   1528     };
   1529     int32_t i, step;
   1530 
   1531     ok=TRUE;
   1532     for(i=0; i<UPRV_LENGTHOF(steps) && ok; ++i) {
   1533         step=steps[i].step;
   1534         memset(resultOffsets, -1, UPRV_LENGTHOF(resultOffsets));
   1535         memset(result, -1, UPRV_LENGTHOF(result));
   1536         errorCode=U_ZERO_ERROR;
   1537         resultLength=stepFromUnicode(cc, cnv,
   1538                                 result, UPRV_LENGTHOF(result),
   1539                                 step==0 ? resultOffsets : NULL,
   1540                                 step, &errorCode);
   1541         ok=checkFromUnicode(
   1542                 cc, cnv, steps[i].name,
   1543                 (uint8_t *)result, resultLength,
   1544                 cc.offsets!=NULL ? resultOffsets : NULL,
   1545                 errorCode);
   1546         if(U_FAILURE(errorCode) || !cc.finalFlush) {
   1547             // reset if an error occurred or we did not flush
   1548             // otherwise do nothing to make sure that flushing resets
   1549             ucnv_resetFromUnicode(cnv);
   1550         }
   1551         if (resultOffsets[resultLength] != -1) {
   1552             errln("fromUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
   1553                 cc.caseNr, cc.charset, resultLength);
   1554         }
   1555         if (result[resultLength] != (char)-1) {
   1556             errln("fromUnicode[%d](%s) Conversion wrote too much to result at index %d",
   1557                 cc.caseNr, cc.charset, resultLength);
   1558         }
   1559 
   1560         // bulk test is first, then offsets are not checked any more
   1561         cc.offsets=NULL;
   1562 
   1563         // test direct conversion from UTF-8
   1564         if(cc.utf8Length>=0) {
   1565             errorCode=U_ZERO_ERROR;
   1566             resultLength=stepFromUTF8(cc, utf8Cnv, cnv,
   1567                                     result, UPRV_LENGTHOF(result),
   1568                                     step, &errorCode);
   1569             ok=checkFromUnicode(
   1570                     cc, cnv, steps[i].utf8Name,
   1571                     (uint8_t *)result, resultLength,
   1572                     NULL,
   1573                     errorCode);
   1574             if(U_FAILURE(errorCode) || !cc.finalFlush) {
   1575                 // reset if an error occurred or we did not flush
   1576                 // otherwise do nothing to make sure that flushing resets
   1577                 ucnv_resetToUnicode(utf8Cnv);
   1578                 ucnv_resetFromUnicode(cnv);
   1579             }
   1580         }
   1581     }
   1582 
   1583     // not a real loop, just a convenience for breaking out of the block
   1584     while(ok && cc.finalFlush) {
   1585         // test ucnv_fromUChars()
   1586         memset(result, 0, sizeof(result));
   1587 
   1588         errorCode=U_ZERO_ERROR;
   1589         resultLength=ucnv_fromUChars(cnv,
   1590                         result, UPRV_LENGTHOF(result),
   1591                         cc.unicode, cc.unicodeLength,
   1592                         &errorCode);
   1593         ok=checkFromUnicode(
   1594                 cc, cnv, "fromUChars",
   1595                 (uint8_t *)result, resultLength,
   1596                 NULL,
   1597                 errorCode);
   1598         if(!ok) {
   1599             break;
   1600         }
   1601 
   1602         // test preflighting
   1603         // keep the correct result for simple checking
   1604         errorCode=U_ZERO_ERROR;
   1605         resultLength=ucnv_fromUChars(cnv,
   1606                         NULL, 0,
   1607                         cc.unicode, cc.unicodeLength,
   1608                         &errorCode);
   1609         if(errorCode==U_STRING_NOT_TERMINATED_WARNING || errorCode==U_BUFFER_OVERFLOW_ERROR) {
   1610             errorCode=U_ZERO_ERROR;
   1611         }
   1612         ok=checkFromUnicode(
   1613                 cc, cnv, "preflight fromUChars",
   1614                 (uint8_t *)result, resultLength,
   1615                 NULL,
   1616                 errorCode);
   1617         break;
   1618     }
   1619 
   1620     ucnv_close(cnv);
   1621     return ok;
   1622 }
   1623 
   1624 UBool
   1625 ConversionTest::checkFromUnicode(ConversionCase &cc, UConverter *cnv, const char *name,
   1626                                  const uint8_t *result, int32_t resultLength,
   1627                                  const int32_t *resultOffsets,
   1628                                  UErrorCode resultErrorCode) {
   1629     UChar resultInvalidUChars[8];
   1630     int8_t resultInvalidLength;
   1631     UErrorCode errorCode;
   1632 
   1633     const char *msg;
   1634 
   1635     // reset the message; NULL will mean "ok"
   1636     msg=NULL;
   1637 
   1638     errorCode=U_ZERO_ERROR;
   1639     resultInvalidLength=UPRV_LENGTHOF(resultInvalidUChars);
   1640     ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode);
   1641     if(U_FAILURE(errorCode)) {
   1642         errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",
   1643                 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));
   1644         return FALSE;
   1645     }
   1646 
   1647     // check everything that might have gone wrong
   1648     if(cc.bytesLength!=resultLength) {
   1649         msg="wrong result length";
   1650     } else if(0!=memcmp(cc.bytes, result, cc.bytesLength)) {
   1651         msg="wrong result string";
   1652     } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.bytesLength*sizeof(*cc.offsets))) {
   1653         msg="wrong offsets";
   1654     } else if(cc.outErrorCode!=resultErrorCode) {
   1655         msg="wrong error code";
   1656     } else if(cc.invalidLength!=resultInvalidLength) {
   1657         msg="wrong length of last invalid input";
   1658     } else if(0!=u_memcmp(cc.invalidUChars, resultInvalidUChars, cc.invalidLength)) {
   1659         msg="wrong last invalid input";
   1660     }
   1661 
   1662     if(msg==NULL) {
   1663         return TRUE;
   1664     } else {
   1665         char buffer[2000]; // one buffer for all strings
   1666         char *s, *unicodeString, *bytesString, *resultString,
   1667             *offsetsString, *resultOffsetsString,
   1668             *invalidCharsString, *resultInvalidUCharsString;
   1669 
   1670         unicodeString=s=buffer;
   1671         s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString);
   1672         s=printBytes(cc.bytes, cc.bytesLength, bytesString=s);
   1673         s=printBytes(result, resultLength, resultString=s);
   1674         s=printOffsets(cc.offsets, cc.bytesLength, offsetsString=s);
   1675         s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);
   1676         s=printUnicode(cc.invalidUChars, cc.invalidLength, invalidCharsString=s);
   1677         s=printUnicode(resultInvalidUChars, resultInvalidLength, resultInvalidUCharsString=s);
   1678 
   1679         if((s-buffer)>(int32_t)sizeof(buffer)) {
   1680             errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n",
   1681                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));
   1682             exit(1);
   1683         }
   1684 
   1685         errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
   1686               "  unicode <%s>[%d]\n"
   1687               " expected <%s>[%d]\n"
   1688               "  result  <%s>[%d]\n"
   1689               " offsets         <%s>\n"
   1690               "  result offsets <%s>\n"
   1691               " error code expected %s got %s\n"
   1692               "  invalidChars expected <%s> got <%s>\n",
   1693               cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,
   1694               unicodeString, cc.unicodeLength,
   1695               bytesString, cc.bytesLength,
   1696               resultString, resultLength,
   1697               offsetsString,
   1698               resultOffsetsString,
   1699               u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),
   1700               invalidCharsString, resultInvalidUCharsString);
   1701 
   1702         return FALSE;
   1703     }
   1704 }
   1705 
   1706 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
   1707