Home | History | Annotate | Download | only in intltest
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2003-2010, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  convtest.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2003jul15
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Test file for data-driven conversion tests.
     17 */
     18 
     19 #include "unicode/utypes.h"
     20 
     21 #if !UCONFIG_NO_LEGACY_CONVERSION
     22 /*
     23  * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION
     24  * is slightly unnecessary - it removes tests for Unicode charsets
     25  * like UTF-8 that should work.
     26  * However, there is no easy way for the test to detect whether a test case
     27  * is for a Unicode charset, so it would be difficult to only exclude those.
     28  * Also, regular testing of ICU is done with all modules on, therefore
     29  * not testing conversion for a custom configuration like this should be ok.
     30  */
     31 
     32 #include "unicode/ucnv.h"
     33 #include "unicode/unistr.h"
     34 #include "unicode/parsepos.h"
     35 #include "unicode/uniset.h"
     36 #include "unicode/ustring.h"
     37 #include "unicode/ures.h"
     38 #include "convtest.h"
     39 #include "unicode/tstdtmod.h"
     40 #include <string.h>
     41 #include <stdlib.h>
     42 
     43 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     44 
     45 enum {
     46     // characters used in test data for callbacks
     47     SUB_CB='?',
     48     SKIP_CB='0',
     49     STOP_CB='.',
     50     ESC_CB='&'
     51 };
     52 
     53 ConversionTest::ConversionTest() {
     54     UErrorCode errorCode=U_ZERO_ERROR;
     55     utf8Cnv=ucnv_open("UTF-8", &errorCode);
     56     ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
     57     if(U_FAILURE(errorCode)) {
     58         errln("unable to open UTF-8 converter");
     59     }
     60 }
     61 
     62 ConversionTest::~ConversionTest() {
     63     ucnv_close(utf8Cnv);
     64 }
     65 
     66 void
     67 ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
     68     if (exec) logln("TestSuite ConversionTest: ");
     69     switch (index) {
     70 #if !UCONFIG_NO_FILE_IO
     71         case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break;
     72         case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break;
     73         case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break;
     74 #else
     75         case 0:
     76         case 1:
     77         case 2: name="skip"; break;
     78 #endif
     79         case 3: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break;
     80         default: name=""; break; //needed to end loop
     81     }
     82 }
     83 
     84 // test data interface ----------------------------------------------------- ***
     85 
     86 void
     87 ConversionTest::TestToUnicode() {
     88     ConversionCase cc;
     89     char charset[100], cbopt[4];
     90     const char *option;
     91     UnicodeString s, unicode;
     92     int32_t offsetsLength;
     93     UConverterToUCallback callback;
     94 
     95     TestDataModule *dataModule;
     96     TestData *testData;
     97     const DataMap *testCase;
     98     UErrorCode errorCode;
     99     int32_t i;
    100 
    101     errorCode=U_ZERO_ERROR;
    102     dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
    103     if(U_SUCCESS(errorCode)) {
    104         testData=dataModule->createTestData("toUnicode", errorCode);
    105         if(U_SUCCESS(errorCode)) {
    106             for(i=0; testData->nextCase(testCase, errorCode); ++i) {
    107                 if(U_FAILURE(errorCode)) {
    108                     errln("error retrieving conversion/toUnicode test case %d - %s",
    109                             i, u_errorName(errorCode));
    110                     errorCode=U_ZERO_ERROR;
    111                     continue;
    112                 }
    113 
    114                 cc.caseNr=i;
    115 
    116                 s=testCase->getString("charset", errorCode);
    117                 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
    118                 cc.charset=charset;
    119 
    120                 cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);
    121                 unicode=testCase->getString("unicode", errorCode);
    122                 cc.unicode=unicode.getBuffer();
    123                 cc.unicodeLength=unicode.length();
    124 
    125                 offsetsLength=0;
    126                 cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);
    127                 if(offsetsLength==0) {
    128                     cc.offsets=NULL;
    129                 } else if(offsetsLength!=unicode.length()) {
    130                     errln("toUnicode[%d] unicode[%d] and offsets[%d] must have the same length",
    131                             i, unicode.length(), offsetsLength);
    132                     errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    133                 }
    134 
    135                 cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);
    136                 cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);
    137 
    138                 s=testCase->getString("errorCode", errorCode);
    139                 if(s==UNICODE_STRING("invalid", 7)) {
    140                     cc.outErrorCode=U_INVALID_CHAR_FOUND;
    141                 } else if(s==UNICODE_STRING("illegal", 7)) {
    142                     cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;
    143                 } else if(s==UNICODE_STRING("truncated", 9)) {
    144                     cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;
    145                 } else if(s==UNICODE_STRING("illesc", 6)) {
    146                     cc.outErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE;
    147                 } else if(s==UNICODE_STRING("unsuppesc", 9)) {
    148                     cc.outErrorCode=U_UNSUPPORTED_ESCAPE_SEQUENCE;
    149                 } else {
    150                     cc.outErrorCode=U_ZERO_ERROR;
    151                 }
    152 
    153                 s=testCase->getString("callback", errorCode);
    154                 s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");
    155                 cc.cbopt=cbopt;
    156                 switch(cbopt[0]) {
    157                 case SUB_CB:
    158                     callback=UCNV_TO_U_CALLBACK_SUBSTITUTE;
    159                     break;
    160                 case SKIP_CB:
    161                     callback=UCNV_TO_U_CALLBACK_SKIP;
    162                     break;
    163                 case STOP_CB:
    164                     callback=UCNV_TO_U_CALLBACK_STOP;
    165                     break;
    166                 case ESC_CB:
    167                     callback=UCNV_TO_U_CALLBACK_ESCAPE;
    168                     break;
    169                 default:
    170                     callback=NULL;
    171                     break;
    172                 }
    173                 option=callback==NULL ? cbopt : cbopt+1;
    174                 if(*option==0) {
    175                     option=NULL;
    176                 }
    177 
    178                 cc.invalidChars=testCase->getBinary(cc.invalidLength, "invalidChars", errorCode);
    179 
    180                 if(U_FAILURE(errorCode)) {
    181                     errln("error parsing conversion/toUnicode test case %d - %s",
    182                             i, u_errorName(errorCode));
    183                     errorCode=U_ZERO_ERROR;
    184                 } else {
    185                     logln("TestToUnicode[%d] %s", i, charset);
    186                     ToUnicodeCase(cc, callback, option);
    187                 }
    188             }
    189             delete testData;
    190         }
    191         delete dataModule;
    192     }
    193     else {
    194         dataerrln("Could not load test conversion data");
    195     }
    196 }
    197 
    198 void
    199 ConversionTest::TestFromUnicode() {
    200     ConversionCase cc;
    201     char charset[100], cbopt[4];
    202     const char *option;
    203     UnicodeString s, unicode, invalidUChars;
    204     int32_t offsetsLength, index;
    205     UConverterFromUCallback callback;
    206 
    207     TestDataModule *dataModule;
    208     TestData *testData;
    209     const DataMap *testCase;
    210     const UChar *p;
    211     UErrorCode errorCode;
    212     int32_t i, length;
    213 
    214     errorCode=U_ZERO_ERROR;
    215     dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
    216     if(U_SUCCESS(errorCode)) {
    217         testData=dataModule->createTestData("fromUnicode", errorCode);
    218         if(U_SUCCESS(errorCode)) {
    219             for(i=0; testData->nextCase(testCase, errorCode); ++i) {
    220                 if(U_FAILURE(errorCode)) {
    221                     errln("error retrieving conversion/fromUnicode test case %d - %s",
    222                             i, u_errorName(errorCode));
    223                     errorCode=U_ZERO_ERROR;
    224                     continue;
    225                 }
    226 
    227                 cc.caseNr=i;
    228 
    229                 s=testCase->getString("charset", errorCode);
    230                 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
    231                 cc.charset=charset;
    232 
    233                 unicode=testCase->getString("unicode", errorCode);
    234                 cc.unicode=unicode.getBuffer();
    235                 cc.unicodeLength=unicode.length();
    236                 cc.bytes=testCase->getBinary(cc.bytesLength, "bytes", errorCode);
    237 
    238                 offsetsLength=0;
    239                 cc.offsets=testCase->getIntVector(offsetsLength, "offsets", errorCode);
    240                 if(offsetsLength==0) {
    241                     cc.offsets=NULL;
    242                 } else if(offsetsLength!=cc.bytesLength) {
    243                     errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have the same length",
    244                             i, cc.bytesLength, offsetsLength);
    245                     errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    246                 }
    247 
    248                 cc.finalFlush= 0!=testCase->getInt28("flush", errorCode);
    249                 cc.fallbacks= 0!=testCase->getInt28("fallbacks", errorCode);
    250 
    251                 s=testCase->getString("errorCode", errorCode);
    252                 if(s==UNICODE_STRING("invalid", 7)) {
    253                     cc.outErrorCode=U_INVALID_CHAR_FOUND;
    254                 } else if(s==UNICODE_STRING("illegal", 7)) {
    255                     cc.outErrorCode=U_ILLEGAL_CHAR_FOUND;
    256                 } else if(s==UNICODE_STRING("truncated", 9)) {
    257                     cc.outErrorCode=U_TRUNCATED_CHAR_FOUND;
    258                 } else {
    259                     cc.outErrorCode=U_ZERO_ERROR;
    260                 }
    261 
    262                 s=testCase->getString("callback", errorCode);
    263                 cc.setSub=0; // default: no subchar
    264 
    265                 if((index=s.indexOf((UChar)0))>0) {
    266                     // read NUL-separated subchar first, if any
    267                     // copy the subchar from Latin-1 characters
    268                     // start after the NUL
    269                     p=s.getTerminatedBuffer();
    270                     length=index+1;
    271                     p+=length;
    272                     length=s.length()-length;
    273                     if(length<=0 || length>=(int32_t)sizeof(cc.subchar)) {
    274                         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    275                     } else {
    276                         int32_t j;
    277 
    278                         for(j=0; j<length; ++j) {
    279                             cc.subchar[j]=(char)p[j];
    280                         }
    281                         // NUL-terminate the subchar
    282                         cc.subchar[j]=0;
    283                         cc.setSub=1;
    284                     }
    285 
    286                     // remove the NUL and subchar from s
    287                     s.truncate(index);
    288                 } else if((index=s.indexOf((UChar)0x3d))>0) /* '=' */ {
    289                     // read a substitution string, separated by an equal sign
    290                     p=s.getBuffer()+index+1;
    291                     length=s.length()-(index+1);
    292                     if(length<0 || length>=LENGTHOF(cc.subString)) {
    293                         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    294                     } else {
    295                         u_memcpy(cc.subString, p, length);
    296                         // NUL-terminate the subString
    297                         cc.subString[length]=0;
    298                         cc.setSub=-1;
    299                     }
    300 
    301                     // remove the equal sign and subString from s
    302                     s.truncate(index);
    303                 }
    304 
    305                 s.extract(0, 0x7fffffff, cbopt, sizeof(cbopt), "");
    306                 cc.cbopt=cbopt;
    307                 switch(cbopt[0]) {
    308                 case SUB_CB:
    309                     callback=UCNV_FROM_U_CALLBACK_SUBSTITUTE;
    310                     break;
    311                 case SKIP_CB:
    312                     callback=UCNV_FROM_U_CALLBACK_SKIP;
    313                     break;
    314                 case STOP_CB:
    315                     callback=UCNV_FROM_U_CALLBACK_STOP;
    316                     break;
    317                 case ESC_CB:
    318                     callback=UCNV_FROM_U_CALLBACK_ESCAPE;
    319                     break;
    320                 default:
    321                     callback=NULL;
    322                     break;
    323                 }
    324                 option=callback==NULL ? cbopt : cbopt+1;
    325                 if(*option==0) {
    326                     option=NULL;
    327                 }
    328 
    329                 invalidUChars=testCase->getString("invalidUChars", errorCode);
    330                 cc.invalidUChars=invalidUChars.getBuffer();
    331                 cc.invalidLength=invalidUChars.length();
    332 
    333                 if(U_FAILURE(errorCode)) {
    334                     errln("error parsing conversion/fromUnicode test case %d - %s",
    335                             i, u_errorName(errorCode));
    336                     errorCode=U_ZERO_ERROR;
    337                 } else {
    338                     logln("TestFromUnicode[%d] %s", i, charset);
    339                     FromUnicodeCase(cc, callback, option);
    340                 }
    341             }
    342             delete testData;
    343         }
    344         delete dataModule;
    345     }
    346     else {
    347         dataerrln("Could not load test conversion data");
    348     }
    349 }
    350 
    351 static const UChar ellipsis[]={ 0x2e, 0x2e, 0x2e };
    352 
    353 void
    354 ConversionTest::TestGetUnicodeSet() {
    355     char charset[100];
    356     UnicodeString s, map, mapnot;
    357     int32_t which;
    358 
    359     ParsePosition pos;
    360     UnicodeSet cnvSet, mapSet, mapnotSet, diffSet;
    361     UnicodeSet *cnvSetPtr = &cnvSet;
    362     LocalUConverterPointer cnv;
    363 
    364     TestDataModule *dataModule;
    365     TestData *testData;
    366     const DataMap *testCase;
    367     UErrorCode errorCode;
    368     int32_t i;
    369 
    370     errorCode=U_ZERO_ERROR;
    371     dataModule=TestDataModule::getTestDataModule("conversion", *this, errorCode);
    372     if(U_SUCCESS(errorCode)) {
    373         testData=dataModule->createTestData("getUnicodeSet", errorCode);
    374         if(U_SUCCESS(errorCode)) {
    375             for(i=0; testData->nextCase(testCase, errorCode); ++i) {
    376                 if(U_FAILURE(errorCode)) {
    377                     errln("error retrieving conversion/getUnicodeSet test case %d - %s",
    378                             i, u_errorName(errorCode));
    379                     errorCode=U_ZERO_ERROR;
    380                     continue;
    381                 }
    382 
    383                 s=testCase->getString("charset", errorCode);
    384                 s.extract(0, 0x7fffffff, charset, sizeof(charset), "");
    385 
    386                 map=testCase->getString("map", errorCode);
    387                 mapnot=testCase->getString("mapnot", errorCode);
    388 
    389                 which=testCase->getInt28("which", errorCode);
    390 
    391                 if(U_FAILURE(errorCode)) {
    392                     errln("error parsing conversion/getUnicodeSet test case %d - %s",
    393                             i, u_errorName(errorCode));
    394                     errorCode=U_ZERO_ERROR;
    395                     continue;
    396                 }
    397 
    398                 // test this test case
    399                 mapSet.clear();
    400                 mapnotSet.clear();
    401 
    402                 pos.setIndex(0);
    403                 mapSet.applyPattern(map, pos, 0, NULL, errorCode);
    404                 if(U_FAILURE(errorCode) || pos.getIndex()!=map.length()) {
    405                     errln("error creating the map set for conversion/getUnicodeSet test case %d - %s\n"
    406                           "    error index %d  index %d  U+%04x",
    407                             i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), map.char32At(pos.getIndex()));
    408                     errorCode=U_ZERO_ERROR;
    409                     continue;
    410                 }
    411 
    412                 pos.setIndex(0);
    413                 mapnotSet.applyPattern(mapnot, pos, 0, NULL, errorCode);
    414                 if(U_FAILURE(errorCode) || pos.getIndex()!=mapnot.length()) {
    415                     errln("error creating the mapnot set for conversion/getUnicodeSet test case %d - %s\n"
    416                           "    error index %d  index %d  U+%04x",
    417                             i, u_errorName(errorCode), pos.getErrorIndex(), pos.getIndex(), mapnot.char32At(pos.getIndex()));
    418                     errorCode=U_ZERO_ERROR;
    419                     continue;
    420                 }
    421 
    422                 logln("TestGetUnicodeSet[%d] %s", i, charset);
    423 
    424                 cnv.adoptInstead(cnv_open(charset, errorCode));
    425                 if(U_FAILURE(errorCode)) {
    426                     errcheckln(errorCode, "error opening \"%s\" for conversion/getUnicodeSet test case %d - %s",
    427                             charset, i, u_errorName(errorCode));
    428                     errorCode=U_ZERO_ERROR;
    429                     continue;
    430                 }
    431 
    432                 ucnv_getUnicodeSet(cnv.getAlias(), cnvSetPtr->toUSet(), (UConverterUnicodeSet)which, &errorCode);
    433 
    434                 if(U_FAILURE(errorCode)) {
    435                     errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s",
    436                             charset, i, u_errorName(errorCode));
    437                     errorCode=U_ZERO_ERROR;
    438                     continue;
    439                 }
    440 
    441                 // are there items that must be in cnvSet but are not?
    442                 (diffSet=mapSet).removeAll(cnvSet);
    443                 if(!diffSet.isEmpty()) {
    444                     diffSet.toPattern(s, TRUE);
    445                     if(s.length()>100) {
    446                         s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
    447                     }
    448                     errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d",
    449                             charset, i);
    450                     errln(s);
    451                 }
    452 
    453                 // are there items that must not be in cnvSet but are?
    454                 (diffSet=mapnotSet).retainAll(cnvSet);
    455                 if(!diffSet.isEmpty()) {
    456                     diffSet.toPattern(s, TRUE);
    457                     if(s.length()>100) {
    458                         s.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
    459                     }
    460                     errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d",
    461                             charset, i);
    462                     errln(s);
    463                 }
    464             }
    465             delete testData;
    466         }
    467         delete dataModule;
    468     }
    469     else {
    470         dataerrln("Could not load test conversion data");
    471     }
    472 }
    473 
    474 U_CDECL_BEGIN
    475 static void U_CALLCONV
    476 getUnicodeSetCallback(const void *context,
    477                       UConverterFromUnicodeArgs * /*fromUArgs*/,
    478                       const UChar* /*codeUnits*/,
    479                       int32_t /*length*/,
    480                       UChar32 codePoint,
    481                       UConverterCallbackReason reason,
    482                       UErrorCode *pErrorCode) {
    483     if(reason<=UCNV_IRREGULAR) {
    484         ((UnicodeSet *)context)->remove(codePoint);  // the converter cannot convert this code point
    485         *pErrorCode=U_ZERO_ERROR;                    // skip
    486     }  // else ignore the reset, close and clone calls.
    487 }
    488 U_CDECL_END
    489 
    490 // Compare ucnv_getUnicodeSet() with the set of characters that can be converted.
    491 void
    492 ConversionTest::TestGetUnicodeSet2() {
    493     // Build a string with all code points.
    494     UChar32 cpLimit;
    495     int32_t s0Length;
    496     if(quick) {
    497         cpLimit=s0Length=0x10000;  // BMP only
    498     } else {
    499         cpLimit=0x110000;
    500         s0Length=0x10000+0x200000;  // BMP + surrogate pairs
    501     }
    502     UChar *s0=new UChar[s0Length];
    503     if(s0==NULL) {
    504         return;
    505     }
    506     UChar *s=s0;
    507     UChar32 c;
    508     UChar c2;
    509     // low BMP
    510     for(c=0; c<=0xd7ff; ++c) {
    511         *s++=(UChar)c;
    512     }
    513     // trail surrogates
    514     for(c=0xdc00; c<=0xdfff; ++c) {
    515         *s++=(UChar)c;
    516     }
    517     // lead surrogates
    518     // (after trails so that there is not even one surrogate pair in between)
    519     for(c=0xd800; c<=0xdbff; ++c) {
    520         *s++=(UChar)c;
    521     }
    522     // high BMP
    523     for(c=0xe000; c<=0xffff; ++c) {
    524         *s++=(UChar)c;
    525     }
    526     // supplementary code points = surrogate pairs
    527     if(cpLimit==0x110000) {
    528         for(c=0xd800; c<=0xdbff; ++c) {
    529             for(c2=0xdc00; c2<=0xdfff; ++c2) {
    530                 *s++=(UChar)c;
    531                 *s++=c2;
    532             }
    533         }
    534     }
    535 
    536     static const char *const cnvNames[]={
    537         "UTF-8",
    538         "UTF-7",
    539         "UTF-16",
    540         "US-ASCII",
    541         "ISO-8859-1",
    542         "windows-1252",
    543         "Shift-JIS",
    544         "ibm-1390",  // EBCDIC_STATEFUL table
    545         "ibm-16684",  // DBCS-only extension table based on EBCDIC_STATEFUL table
    546         "HZ",
    547         "ISO-2022-JP",
    548         "JIS7",
    549         "ISO-2022-CN",
    550         "ISO-2022-CN-EXT",
    551         "LMBCS"
    552     };
    553     LocalUConverterPointer cnv;
    554     char buffer[1024];
    555     int32_t i;
    556     for(i=0; i<LENGTHOF(cnvNames); ++i) {
    557         UErrorCode errorCode=U_ZERO_ERROR;
    558         cnv.adoptInstead(cnv_open(cnvNames[i], errorCode));
    559         if(U_FAILURE(errorCode)) {
    560             errcheckln(errorCode, "failed to open converter %s - %s", cnvNames[i], u_errorName(errorCode));
    561             continue;
    562         }
    563         UnicodeSet expected;
    564         ucnv_setFromUCallBack(cnv.getAlias(), getUnicodeSetCallback, &expected, NULL, NULL, &errorCode);
    565         if(U_FAILURE(errorCode)) {
    566             errln("failed to set the callback on converter %s - %s", cnvNames[i], u_errorName(errorCode));
    567             continue;
    568         }
    569         UConverterUnicodeSet which;
    570         for(which=UCNV_ROUNDTRIP_SET; which<UCNV_SET_COUNT; which=(UConverterUnicodeSet)((int)which+1)) {
    571             if(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
    572                 ucnv_setFallback(cnv.getAlias(), TRUE);
    573             }
    574             expected.add(0, cpLimit-1);
    575             s=s0;
    576             UBool flush;
    577             do {
    578                 char *t=buffer;
    579                 flush=(UBool)(s==s0+s0Length);
    580                 ucnv_fromUnicode(cnv.getAlias(), &t, buffer+sizeof(buffer), (const UChar **)&s, s0+s0Length, NULL, flush, &errorCode);
    581                 if(U_FAILURE(errorCode)) {
    582                     if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
    583                         errorCode=U_ZERO_ERROR;
    584                         continue;
    585                     } else {
    586                         break;  // unexpected error, should not occur
    587                     }
    588                 }
    589             } while(!flush);
    590             UnicodeSet set;
    591             ucnv_getUnicodeSet(cnv.getAlias(), set.toUSet(), which, &errorCode);
    592             if(cpLimit<0x110000) {
    593                 set.remove(cpLimit, 0x10ffff);
    594             }
    595             if(which==UCNV_ROUNDTRIP_SET) {
    596                 // ignore PUA code points because they will be converted even if they
    597                 // are fallbacks and when other fallbacks are turned off,
    598                 // but ucnv_getUnicodeSet(UCNV_ROUNDTRIP_SET) delivers true roundtrips
    599                 expected.remove(0xe000, 0xf8ff);
    600                 expected.remove(0xf0000, 0xffffd);
    601                 expected.remove(0x100000, 0x10fffd);
    602                 set.remove(0xe000, 0xf8ff);
    603                 set.remove(0xf0000, 0xffffd);
    604                 set.remove(0x100000, 0x10fffd);
    605             }
    606             if(set!=expected) {
    607                 // First try to see if we have different sets because ucnv_getUnicodeSet()
    608                 // added strings: The above conversion method does not tell us what strings might be convertible.
    609                 // Remove strings from the set and compare again.
    610                 // Unfortunately, there are no good, direct set methods for finding out whether there are strings
    611                 // in the set, nor for enumerating or removing just them.
    612                 // Intersect all code points with the set. The intersection will not contain strings.
    613                 UnicodeSet temp(0, 0x10ffff);
    614                 temp.retainAll(set);
    615                 set=temp;
    616             }
    617             if(set!=expected) {
    618                 UnicodeSet diffSet;
    619                 UnicodeString out;
    620 
    621                 // are there items that must be in the set but are not?
    622                 (diffSet=expected).removeAll(set);
    623                 if(!diffSet.isEmpty()) {
    624                     diffSet.toPattern(out, TRUE);
    625                     if(out.length()>100) {
    626                         out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
    627                     }
    628                     errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - which set: %d",
    629                             cnvNames[i], which);
    630                     errln(out);
    631                 }
    632 
    633                 // are there items that must not be in the set but are?
    634                 (diffSet=set).removeAll(expected);
    635                 if(!diffSet.isEmpty()) {
    636                     diffSet.toPattern(out, TRUE);
    637                     if(out.length()>100) {
    638                         out.replace(100, 0x7fffffff, ellipsis, LENGTHOF(ellipsis));
    639                     }
    640                     errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - which set: %d",
    641                             cnvNames[i], which);
    642                     errln(out);
    643                 }
    644             }
    645         }
    646     }
    647 
    648     delete [] s0;
    649 }
    650 
    651 // open testdata or ICU data converter ------------------------------------- ***
    652 
    653 UConverter *
    654 ConversionTest::cnv_open(const char *name, UErrorCode &errorCode) {
    655     if(name!=NULL && *name=='*') {
    656         /* loadTestData(): set the data directory */
    657         return ucnv_openPackage(loadTestData(errorCode), name+1, &errorCode);
    658     } else if(name!=NULL && *name=='+') {
    659         return ucnv_open((name+1), &errorCode);
    660     } else {
    661         return ucnv_open(name, &errorCode);
    662     }
    663 }
    664 
    665 // output helpers ---------------------------------------------------------- ***
    666 
    667 static inline char
    668 hexDigit(uint8_t digit) {
    669     return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit);
    670 }
    671 
    672 static char *
    673 printBytes(const uint8_t *bytes, int32_t length, char *out) {
    674     uint8_t b;
    675 
    676     if(length>0) {
    677         b=*bytes++;
    678         --length;
    679         *out++=hexDigit((uint8_t)(b>>4));
    680         *out++=hexDigit((uint8_t)(b&0xf));
    681     }
    682 
    683     while(length>0) {
    684         b=*bytes++;
    685         --length;
    686         *out++=' ';
    687         *out++=hexDigit((uint8_t)(b>>4));
    688         *out++=hexDigit((uint8_t)(b&0xf));
    689     }
    690     *out++=0;
    691     return out;
    692 }
    693 
    694 static char *
    695 printUnicode(const UChar *unicode, int32_t length, char *out) {
    696     UChar32 c;
    697     int32_t i;
    698 
    699     for(i=0; i<length;) {
    700         if(i>0) {
    701             *out++=' ';
    702         }
    703         U16_NEXT(unicode, i, length, c);
    704         // write 4..6 digits
    705         if(c>=0x100000) {
    706             *out++='1';
    707         }
    708         if(c>=0x10000) {
    709             *out++=hexDigit((uint8_t)((c>>16)&0xf));
    710         }
    711         *out++=hexDigit((uint8_t)((c>>12)&0xf));
    712         *out++=hexDigit((uint8_t)((c>>8)&0xf));
    713         *out++=hexDigit((uint8_t)((c>>4)&0xf));
    714         *out++=hexDigit((uint8_t)(c&0xf));
    715     }
    716     *out++=0;
    717     return out;
    718 }
    719 
    720 static char *
    721 printOffsets(const int32_t *offsets, int32_t length, char *out) {
    722     int32_t i, o, d;
    723 
    724     if(offsets==NULL) {
    725         length=0;
    726     }
    727 
    728     for(i=0; i<length; ++i) {
    729         if(i>0) {
    730             *out++=' ';
    731         }
    732         o=offsets[i];
    733 
    734         // print all offsets with 2 characters each (-x, -9..99, xx)
    735         if(o<-9) {
    736             *out++='-';
    737             *out++='x';
    738         } else if(o<0) {
    739             *out++='-';
    740             *out++=(char)('0'-o);
    741         } else if(o<=99) {
    742             *out++=(d=o/10)==0 ? ' ' : (char)('0'+d);
    743             *out++=(char)('0'+o%10);
    744         } else /* o>99 */ {
    745             *out++='x';
    746             *out++='x';
    747         }
    748     }
    749     *out++=0;
    750     return out;
    751 }
    752 
    753 // toUnicode test worker functions ----------------------------------------- ***
    754 
    755 static int32_t
    756 stepToUnicode(ConversionCase &cc, UConverter *cnv,
    757               UChar *result, int32_t resultCapacity,
    758               int32_t *resultOffsets, /* also resultCapacity */
    759               int32_t step,
    760               UErrorCode *pErrorCode) {
    761     const char *source, *sourceLimit, *bytesLimit;
    762     UChar *target, *targetLimit, *resultLimit;
    763     UBool flush;
    764 
    765     source=(const char *)cc.bytes;
    766     target=result;
    767     bytesLimit=source+cc.bytesLength;
    768     resultLimit=result+resultCapacity;
    769 
    770     if(step>=0) {
    771         // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time
    772         // move only one buffer (in vs. out) at a time to be extra mean
    773         // step==0 performs bulk conversion and generates offsets
    774 
    775         // initialize the partial limits for the loop
    776         if(step==0) {
    777             // use the entire buffers
    778             sourceLimit=bytesLimit;
    779             targetLimit=resultLimit;
    780             flush=cc.finalFlush;
    781         } else {
    782             // start with empty partial buffers
    783             sourceLimit=source;
    784             targetLimit=target;
    785             flush=FALSE;
    786 
    787             // output offsets only for bulk conversion
    788             resultOffsets=NULL;
    789         }
    790 
    791         for(;;) {
    792             // resetting the opposite conversion direction must not affect this one
    793             ucnv_resetFromUnicode(cnv);
    794 
    795             // convert
    796             ucnv_toUnicode(cnv,
    797                 &target, targetLimit,
    798                 &source, sourceLimit,
    799                 resultOffsets,
    800                 flush, pErrorCode);
    801 
    802             // check pointers and errors
    803             if(source>sourceLimit || target>targetLimit) {
    804                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
    805                 break;
    806             } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
    807                 if(target!=targetLimit) {
    808                     // buffer overflow must only be set when the target is filled
    809                     *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
    810                     break;
    811                 } else if(targetLimit==resultLimit) {
    812                     // not just a partial overflow
    813                     break;
    814                 }
    815 
    816                 // the partial target is filled, set a new limit, reset the error and continue
    817                 targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
    818                 *pErrorCode=U_ZERO_ERROR;
    819             } else if(U_FAILURE(*pErrorCode)) {
    820                 // some other error occurred, done
    821                 break;
    822             } else {
    823                 if(source!=sourceLimit) {
    824                     // when no error occurs, then the input must be consumed
    825                     *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
    826                     break;
    827                 }
    828 
    829                 if(sourceLimit==bytesLimit) {
    830                     // we are done
    831                     break;
    832                 }
    833 
    834                 // the partial conversion succeeded, set a new limit and continue
    835                 sourceLimit=(bytesLimit-source)>=step ? source+step : bytesLimit;
    836                 flush=(UBool)(cc.finalFlush && sourceLimit==bytesLimit);
    837             }
    838         }
    839     } else /* step<0 */ {
    840         /*
    841          * step==-1: call only ucnv_getNextUChar()
    842          * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar()
    843          *   if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input,
    844          *   else give it at most (-step-2)/2 bytes
    845          */
    846         UChar32 c;
    847 
    848         // end the loop by getting an index out of bounds error
    849         for(;;) {
    850             // resetting the opposite conversion direction must not affect this one
    851             ucnv_resetFromUnicode(cnv);
    852 
    853             // convert
    854             if((step&1)!=0 /* odd: -1, -3, -5, ... */) {
    855                 sourceLimit=source; // use sourceLimit not as a real limit
    856                                     // but to remember the pre-getNextUChar source pointer
    857                 c=ucnv_getNextUChar(cnv, &source, bytesLimit, pErrorCode);
    858 
    859                 // check pointers and errors
    860                 if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
    861                     if(source!=bytesLimit) {
    862                         *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
    863                     } else {
    864                         *pErrorCode=U_ZERO_ERROR;
    865                     }
    866                     break;
    867                 } else if(U_FAILURE(*pErrorCode)) {
    868                     break;
    869                 }
    870                 // source may not move if c is from previous overflow
    871 
    872                 if(target==resultLimit) {
    873                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    874                     break;
    875                 }
    876                 if(c<=0xffff) {
    877                     *target++=(UChar)c;
    878                 } else {
    879                     *target++=U16_LEAD(c);
    880                     if(target==resultLimit) {
    881                         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    882                         break;
    883                     }
    884                     *target++=U16_TRAIL(c);
    885                 }
    886 
    887                 // alternate between -n-1 and -n but leave -1 alone
    888                 if(step<-1) {
    889                     ++step;
    890                 }
    891             } else /* step is even */ {
    892                 // allow only one UChar output
    893                 targetLimit=target<resultLimit ? target+1 : resultLimit;
    894 
    895                 // as with ucnv_getNextUChar(), we always flush (if we go to bytesLimit)
    896                 // and never output offsets
    897                 if(step==-2) {
    898                     sourceLimit=bytesLimit;
    899                 } else {
    900                     sourceLimit=source+(-step-2)/2;
    901                     if(sourceLimit>bytesLimit) {
    902                         sourceLimit=bytesLimit;
    903                     }
    904                 }
    905 
    906                 ucnv_toUnicode(cnv,
    907                     &target, targetLimit,
    908                     &source, sourceLimit,
    909                     NULL, (UBool)(sourceLimit==bytesLimit), pErrorCode);
    910 
    911                 // check pointers and errors
    912                 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
    913                     if(target!=targetLimit) {
    914                         // buffer overflow must only be set when the target is filled
    915                         *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
    916                         break;
    917                     } else if(targetLimit==resultLimit) {
    918                         // not just a partial overflow
    919                         break;
    920                     }
    921 
    922                     // the partial target is filled, set a new limit and continue
    923                     *pErrorCode=U_ZERO_ERROR;
    924                 } else if(U_FAILURE(*pErrorCode)) {
    925                     // some other error occurred, done
    926                     break;
    927                 } else {
    928                     if(source!=sourceLimit) {
    929                         // when no error occurs, then the input must be consumed
    930                         *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
    931                         break;
    932                     }
    933 
    934                     // we are done (flush==TRUE) but we continue, to get the index out of bounds error above
    935                 }
    936 
    937                 --step;
    938             }
    939         }
    940     }
    941 
    942     return (int32_t)(target-result);
    943 }
    944 
    945 UBool
    946 ConversionTest::ToUnicodeCase(ConversionCase &cc, UConverterToUCallback callback, const char *option) {
    947     // open the converter
    948     IcuTestErrorCode errorCode(*this, "ToUnicodeCase");
    949     LocalUConverterPointer cnv(cnv_open(cc.charset, errorCode));
    950     if(errorCode.isFailure()) {
    951         errcheckln(errorCode, "toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
    952                 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, errorCode.errorName());
    953         errorCode.reset();
    954         return FALSE;
    955     }
    956 
    957     // set the callback
    958     if(callback!=NULL) {
    959         ucnv_setToUCallBack(cnv.getAlias(), callback, option, NULL, NULL, errorCode);
    960         if(U_FAILURE(errorCode)) {
    961             errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s",
    962                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
    963             return FALSE;
    964         }
    965     }
    966 
    967     int32_t resultOffsets[256];
    968     UChar result[256];
    969     int32_t resultLength;
    970     UBool ok;
    971 
    972     static const struct {
    973         int32_t step;
    974         const char *name;
    975     } steps[]={
    976         { 0, "bulk" }, // must be first for offsets to be checked
    977         { 1, "step=1" },
    978         { 3, "step=3" },
    979         { 7, "step=7" },
    980         { -1, "getNext" },
    981         { -2, "toU(bulk)+getNext" },
    982         { -3, "getNext+toU(bulk)" },
    983         { -4, "toU(1)+getNext" },
    984         { -5, "getNext+toU(1)" },
    985         { -12, "toU(5)+getNext" },
    986         { -13, "getNext+toU(5)" },
    987     };
    988     int32_t i, step;
    989 
    990     ok=TRUE;
    991     for(i=0; i<LENGTHOF(steps) && ok; ++i) {
    992         step=steps[i].step;
    993         if(step<0 && !cc.finalFlush) {
    994             // skip ucnv_getNextUChar() if !finalFlush because
    995             // ucnv_getNextUChar() always implies flush
    996             continue;
    997         }
    998         if(step!=0) {
    999             // bulk test is first, then offsets are not checked any more
   1000             cc.offsets=NULL;
   1001         }
   1002         else {
   1003             memset(resultOffsets, -1, LENGTHOF(resultOffsets));
   1004         }
   1005         memset(result, -1, LENGTHOF(result));
   1006         errorCode.reset();
   1007         resultLength=stepToUnicode(cc, cnv.getAlias(),
   1008                                 result, LENGTHOF(result),
   1009                                 step==0 ? resultOffsets : NULL,
   1010                                 step, errorCode);
   1011         ok=checkToUnicode(
   1012                 cc, cnv.getAlias(), steps[i].name,
   1013                 result, resultLength,
   1014                 cc.offsets!=NULL ? resultOffsets : NULL,
   1015                 errorCode);
   1016         if(errorCode.isFailure() || !cc.finalFlush) {
   1017             // reset if an error occurred or we did not flush
   1018             // otherwise do nothing to make sure that flushing resets
   1019             ucnv_resetToUnicode(cnv.getAlias());
   1020         }
   1021         if (cc.offsets != NULL && resultOffsets[resultLength] != -1) {
   1022             errln("toUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
   1023                 cc.caseNr, cc.charset, resultLength);
   1024         }
   1025         if (result[resultLength] != (UChar)-1) {
   1026             errln("toUnicode[%d](%s) Conversion wrote too much to result at index %d",
   1027                 cc.caseNr, cc.charset, resultLength);
   1028         }
   1029     }
   1030 
   1031     // not a real loop, just a convenience for breaking out of the block
   1032     while(ok && cc.finalFlush) {
   1033         // test ucnv_toUChars()
   1034         memset(result, 0, sizeof(result));
   1035 
   1036         errorCode.reset();
   1037         resultLength=ucnv_toUChars(cnv.getAlias(),
   1038                         result, LENGTHOF(result),
   1039                         (const char *)cc.bytes, cc.bytesLength,
   1040                         errorCode);
   1041         ok=checkToUnicode(
   1042                 cc, cnv.getAlias(), "toUChars",
   1043                 result, resultLength,
   1044                 NULL,
   1045                 errorCode);
   1046         if(!ok) {
   1047             break;
   1048         }
   1049 
   1050         // test preflighting
   1051         // keep the correct result for simple checking
   1052         errorCode.reset();
   1053         resultLength=ucnv_toUChars(cnv.getAlias(),
   1054                         NULL, 0,
   1055                         (const char *)cc.bytes, cc.bytesLength,
   1056                         errorCode);
   1057         if(errorCode.get()==U_STRING_NOT_TERMINATED_WARNING || errorCode.get()==U_BUFFER_OVERFLOW_ERROR) {
   1058             errorCode.reset();
   1059         }
   1060         ok=checkToUnicode(
   1061                 cc, cnv.getAlias(), "preflight toUChars",
   1062                 result, resultLength,
   1063                 NULL,
   1064                 errorCode);
   1065         break;
   1066     }
   1067 
   1068     errorCode.reset();  // all errors have already been reported
   1069     return ok;
   1070 }
   1071 
   1072 UBool
   1073 ConversionTest::checkToUnicode(ConversionCase &cc, UConverter *cnv, const char *name,
   1074                                const UChar *result, int32_t resultLength,
   1075                                const int32_t *resultOffsets,
   1076                                UErrorCode resultErrorCode) {
   1077     char resultInvalidChars[8];
   1078     int8_t resultInvalidLength;
   1079     UErrorCode errorCode;
   1080 
   1081     const char *msg;
   1082 
   1083     // reset the message; NULL will mean "ok"
   1084     msg=NULL;
   1085 
   1086     errorCode=U_ZERO_ERROR;
   1087     resultInvalidLength=sizeof(resultInvalidChars);
   1088     ucnv_getInvalidChars(cnv, resultInvalidChars, &resultInvalidLength, &errorCode);
   1089     if(U_FAILURE(errorCode)) {
   1090         errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChars() failed - %s",
   1091                 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));
   1092         return FALSE;
   1093     }
   1094 
   1095     // check everything that might have gone wrong
   1096     if(cc.unicodeLength!=resultLength) {
   1097         msg="wrong result length";
   1098     } else if(0!=u_memcmp(cc.unicode, result, cc.unicodeLength)) {
   1099         msg="wrong result string";
   1100     } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.unicodeLength*sizeof(*cc.offsets))) {
   1101         msg="wrong offsets";
   1102     } else if(cc.outErrorCode!=resultErrorCode) {
   1103         msg="wrong error code";
   1104     } else if(cc.invalidLength!=resultInvalidLength) {
   1105         msg="wrong length of last invalid input";
   1106     } else if(0!=memcmp(cc.invalidChars, resultInvalidChars, cc.invalidLength)) {
   1107         msg="wrong last invalid input";
   1108     }
   1109 
   1110     if(msg==NULL) {
   1111         return TRUE;
   1112     } else {
   1113         char buffer[2000]; // one buffer for all strings
   1114         char *s, *bytesString, *unicodeString, *resultString,
   1115             *offsetsString, *resultOffsetsString,
   1116             *invalidCharsString, *resultInvalidCharsString;
   1117 
   1118         bytesString=s=buffer;
   1119         s=printBytes(cc.bytes, cc.bytesLength, bytesString);
   1120         s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString=s);
   1121         s=printUnicode(result, resultLength, resultString=s);
   1122         s=printOffsets(cc.offsets, cc.unicodeLength, offsetsString=s);
   1123         s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);
   1124         s=printBytes(cc.invalidChars, cc.invalidLength, invalidCharsString=s);
   1125         s=printBytes((uint8_t *)resultInvalidChars, resultInvalidLength, resultInvalidCharsString=s);
   1126 
   1127         if((s-buffer)>(int32_t)sizeof(buffer)) {
   1128             errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkToUnicode() test output buffer overflow writing %d chars\n",
   1129                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));
   1130             exit(1);
   1131         }
   1132 
   1133         errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
   1134               "  bytes <%s>[%d]\n"
   1135               " expected <%s>[%d]\n"
   1136               "  result  <%s>[%d]\n"
   1137               " offsets         <%s>\n"
   1138               "  result offsets <%s>\n"
   1139               " error code expected %s got %s\n"
   1140               "  invalidChars expected <%s> got <%s>\n",
   1141               cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,
   1142               bytesString, cc.bytesLength,
   1143               unicodeString, cc.unicodeLength,
   1144               resultString, resultLength,
   1145               offsetsString,
   1146               resultOffsetsString,
   1147               u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),
   1148               invalidCharsString, resultInvalidCharsString);
   1149 
   1150         return FALSE;
   1151     }
   1152 }
   1153 
   1154 // fromUnicode test worker functions --------------------------------------- ***
   1155 
   1156 static int32_t
   1157 stepFromUTF8(ConversionCase &cc,
   1158              UConverter *utf8Cnv, UConverter *cnv,
   1159              char *result, int32_t resultCapacity,
   1160              int32_t step,
   1161              UErrorCode *pErrorCode) {
   1162     const char *source, *sourceLimit, *utf8Limit;
   1163     UChar pivotBuffer[32];
   1164     UChar *pivotSource, *pivotTarget, *pivotLimit;
   1165     char *target, *targetLimit, *resultLimit;
   1166     UBool flush;
   1167 
   1168     source=cc.utf8;
   1169     pivotSource=pivotTarget=pivotBuffer;
   1170     target=result;
   1171     utf8Limit=source+cc.utf8Length;
   1172     resultLimit=result+resultCapacity;
   1173 
   1174     // call ucnv_convertEx() with in/out buffers no larger than (step) at a time
   1175     // move only one buffer (in vs. out) at a time to be extra mean
   1176     // step==0 performs bulk conversion
   1177 
   1178     // initialize the partial limits for the loop
   1179     if(step==0) {
   1180         // use the entire buffers
   1181         sourceLimit=utf8Limit;
   1182         targetLimit=resultLimit;
   1183         flush=cc.finalFlush;
   1184 
   1185         pivotLimit=pivotBuffer+LENGTHOF(pivotBuffer);
   1186     } else {
   1187         // start with empty partial buffers
   1188         sourceLimit=source;
   1189         targetLimit=target;
   1190         flush=FALSE;
   1191 
   1192         // empty pivot is not allowed, make it of length step
   1193         pivotLimit=pivotBuffer+step;
   1194     }
   1195 
   1196     for(;;) {
   1197         // resetting the opposite conversion direction must not affect this one
   1198         ucnv_resetFromUnicode(utf8Cnv);
   1199         ucnv_resetToUnicode(cnv);
   1200 
   1201         // convert
   1202         ucnv_convertEx(cnv, utf8Cnv,
   1203             &target, targetLimit,
   1204             &source, sourceLimit,
   1205             pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
   1206             FALSE, flush, pErrorCode);
   1207 
   1208         // check pointers and errors
   1209         if(source>sourceLimit || target>targetLimit) {
   1210             *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   1211             break;
   1212         } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
   1213             if(target!=targetLimit) {
   1214                 // buffer overflow must only be set when the target is filled
   1215                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   1216                 break;
   1217             } else if(targetLimit==resultLimit) {
   1218                 // not just a partial overflow
   1219                 break;
   1220             }
   1221 
   1222             // the partial target is filled, set a new limit, reset the error and continue
   1223             targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
   1224             *pErrorCode=U_ZERO_ERROR;
   1225         } else if(U_FAILURE(*pErrorCode)) {
   1226             if(pivotSource==pivotBuffer) {
   1227                 // toUnicode error, should not occur
   1228                 // toUnicode errors are tested in cintltst TestConvertExFromUTF8()
   1229                 break;
   1230             } else {
   1231                 // fromUnicode error
   1232                 // some other error occurred, done
   1233                 break;
   1234             }
   1235         } else {
   1236             if(source!=sourceLimit) {
   1237                 // when no error occurs, then the input must be consumed
   1238                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   1239                 break;
   1240             }
   1241 
   1242             if(sourceLimit==utf8Limit) {
   1243                 // we are done
   1244                 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
   1245                     // ucnv_convertEx() warns about not terminating the output
   1246                     // but ucnv_fromUnicode() does not and so
   1247                     // checkFromUnicode() does not expect it
   1248                     *pErrorCode=U_ZERO_ERROR;
   1249                 }
   1250                 break;
   1251             }
   1252 
   1253             // the partial conversion succeeded, set a new limit and continue
   1254             sourceLimit=(utf8Limit-source)>=step ? source+step : utf8Limit;
   1255             flush=(UBool)(cc.finalFlush && sourceLimit==utf8Limit);
   1256         }
   1257     }
   1258 
   1259     return (int32_t)(target-result);
   1260 }
   1261 
   1262 static int32_t
   1263 stepFromUnicode(ConversionCase &cc, UConverter *cnv,
   1264                 char *result, int32_t resultCapacity,
   1265                 int32_t *resultOffsets, /* also resultCapacity */
   1266                 int32_t step,
   1267                 UErrorCode *pErrorCode) {
   1268     const UChar *source, *sourceLimit, *unicodeLimit;
   1269     char *target, *targetLimit, *resultLimit;
   1270     UBool flush;
   1271 
   1272     source=cc.unicode;
   1273     target=result;
   1274     unicodeLimit=source+cc.unicodeLength;
   1275     resultLimit=result+resultCapacity;
   1276 
   1277     // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time
   1278     // move only one buffer (in vs. out) at a time to be extra mean
   1279     // step==0 performs bulk conversion and generates offsets
   1280 
   1281     // initialize the partial limits for the loop
   1282     if(step==0) {
   1283         // use the entire buffers
   1284         sourceLimit=unicodeLimit;
   1285         targetLimit=resultLimit;
   1286         flush=cc.finalFlush;
   1287     } else {
   1288         // start with empty partial buffers
   1289         sourceLimit=source;
   1290         targetLimit=target;
   1291         flush=FALSE;
   1292 
   1293         // output offsets only for bulk conversion
   1294         resultOffsets=NULL;
   1295     }
   1296 
   1297     for(;;) {
   1298         // resetting the opposite conversion direction must not affect this one
   1299         ucnv_resetToUnicode(cnv);
   1300 
   1301         // convert
   1302         ucnv_fromUnicode(cnv,
   1303             &target, targetLimit,
   1304             &source, sourceLimit,
   1305             resultOffsets,
   1306             flush, pErrorCode);
   1307 
   1308         // check pointers and errors
   1309         if(source>sourceLimit || target>targetLimit) {
   1310             *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   1311             break;
   1312         } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
   1313             if(target!=targetLimit) {
   1314                 // buffer overflow must only be set when the target is filled
   1315                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   1316                 break;
   1317             } else if(targetLimit==resultLimit) {
   1318                 // not just a partial overflow
   1319                 break;
   1320             }
   1321 
   1322             // the partial target is filled, set a new limit, reset the error and continue
   1323             targetLimit=(resultLimit-target)>=step ? target+step : resultLimit;
   1324             *pErrorCode=U_ZERO_ERROR;
   1325         } else if(U_FAILURE(*pErrorCode)) {
   1326             // some other error occurred, done
   1327             break;
   1328         } else {
   1329             if(source!=sourceLimit) {
   1330                 // when no error occurs, then the input must be consumed
   1331                 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   1332                 break;
   1333             }
   1334 
   1335             if(sourceLimit==unicodeLimit) {
   1336                 // we are done
   1337                 break;
   1338             }
   1339 
   1340             // the partial conversion succeeded, set a new limit and continue
   1341             sourceLimit=(unicodeLimit-source)>=step ? source+step : unicodeLimit;
   1342             flush=(UBool)(cc.finalFlush && sourceLimit==unicodeLimit);
   1343         }
   1344     }
   1345 
   1346     return (int32_t)(target-result);
   1347 }
   1348 
   1349 UBool
   1350 ConversionTest::FromUnicodeCase(ConversionCase &cc, UConverterFromUCallback callback, const char *option) {
   1351     UConverter *cnv;
   1352     UErrorCode errorCode;
   1353 
   1354     // open the converter
   1355     errorCode=U_ZERO_ERROR;
   1356     cnv=cnv_open(cc.charset, errorCode);
   1357     if(U_FAILURE(errorCode)) {
   1358         errcheckln(errorCode, "fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s",
   1359                 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
   1360         return FALSE;
   1361     }
   1362     ucnv_resetToUnicode(utf8Cnv);
   1363 
   1364     // set the callback
   1365     if(callback!=NULL) {
   1366         ucnv_setFromUCallBack(cnv, callback, option, NULL, NULL, &errorCode);
   1367         if(U_FAILURE(errorCode)) {
   1368             errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s",
   1369                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
   1370             ucnv_close(cnv);
   1371             return FALSE;
   1372         }
   1373     }
   1374 
   1375     // set the fallbacks flag
   1376     // TODO change with Jitterbug 2401, then add a similar call for toUnicode too
   1377     ucnv_setFallback(cnv, cc.fallbacks);
   1378 
   1379     // set the subchar
   1380     int32_t length;
   1381 
   1382     if(cc.setSub>0) {
   1383         length=(int32_t)strlen(cc.subchar);
   1384         ucnv_setSubstChars(cnv, cc.subchar, (int8_t)length, &errorCode);
   1385         if(U_FAILURE(errorCode)) {
   1386             errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstChars() failed - %s",
   1387                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
   1388             ucnv_close(cnv);
   1389             return FALSE;
   1390         }
   1391     } else if(cc.setSub<0) {
   1392         ucnv_setSubstString(cnv, cc.subString, -1, &errorCode);
   1393         if(U_FAILURE(errorCode)) {
   1394             errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubstString() failed - %s",
   1395                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, u_errorName(errorCode));
   1396             ucnv_close(cnv);
   1397             return FALSE;
   1398         }
   1399     }
   1400 
   1401     // convert unicode to utf8
   1402     char utf8[256];
   1403     cc.utf8=utf8;
   1404     u_strToUTF8(utf8, LENGTHOF(utf8), &cc.utf8Length,
   1405                 cc.unicode, cc.unicodeLength,
   1406                 &errorCode);
   1407     if(U_FAILURE(errorCode)) {
   1408         // skip UTF-8 testing of a string with an unpaired surrogate,
   1409         // or of one that's too long
   1410         // toUnicode errors are tested in cintltst TestConvertExFromUTF8()
   1411         cc.utf8Length=-1;
   1412     }
   1413 
   1414     int32_t resultOffsets[256];
   1415     char result[256];
   1416     int32_t resultLength;
   1417     UBool ok;
   1418 
   1419     static const struct {
   1420         int32_t step;
   1421         const char *name, *utf8Name;
   1422     } steps[]={
   1423         { 0, "bulk",   "utf8" }, // must be first for offsets to be checked
   1424         { 1, "step=1", "utf8 step=1" },
   1425         { 3, "step=3", "utf8 step=3" },
   1426         { 7, "step=7", "utf8 step=7" }
   1427     };
   1428     int32_t i, step;
   1429 
   1430     ok=TRUE;
   1431     for(i=0; i<LENGTHOF(steps) && ok; ++i) {
   1432         step=steps[i].step;
   1433         memset(resultOffsets, -1, LENGTHOF(resultOffsets));
   1434         memset(result, -1, LENGTHOF(result));
   1435         errorCode=U_ZERO_ERROR;
   1436         resultLength=stepFromUnicode(cc, cnv,
   1437                                 result, LENGTHOF(result),
   1438                                 step==0 ? resultOffsets : NULL,
   1439                                 step, &errorCode);
   1440         ok=checkFromUnicode(
   1441                 cc, cnv, steps[i].name,
   1442                 (uint8_t *)result, resultLength,
   1443                 cc.offsets!=NULL ? resultOffsets : NULL,
   1444                 errorCode);
   1445         if(U_FAILURE(errorCode) || !cc.finalFlush) {
   1446             // reset if an error occurred or we did not flush
   1447             // otherwise do nothing to make sure that flushing resets
   1448             ucnv_resetFromUnicode(cnv);
   1449         }
   1450         if (resultOffsets[resultLength] != -1) {
   1451             errln("fromUnicode[%d](%s) Conversion wrote too much to offsets at index %d",
   1452                 cc.caseNr, cc.charset, resultLength);
   1453         }
   1454         if (result[resultLength] != (char)-1) {
   1455             errln("fromUnicode[%d](%s) Conversion wrote too much to result at index %d",
   1456                 cc.caseNr, cc.charset, resultLength);
   1457         }
   1458 
   1459         // bulk test is first, then offsets are not checked any more
   1460         cc.offsets=NULL;
   1461 
   1462         // test direct conversion from UTF-8
   1463         if(cc.utf8Length>=0) {
   1464             errorCode=U_ZERO_ERROR;
   1465             resultLength=stepFromUTF8(cc, utf8Cnv, cnv,
   1466                                     result, LENGTHOF(result),
   1467                                     step, &errorCode);
   1468             ok=checkFromUnicode(
   1469                     cc, cnv, steps[i].utf8Name,
   1470                     (uint8_t *)result, resultLength,
   1471                     NULL,
   1472                     errorCode);
   1473             if(U_FAILURE(errorCode) || !cc.finalFlush) {
   1474                 // reset if an error occurred or we did not flush
   1475                 // otherwise do nothing to make sure that flushing resets
   1476                 ucnv_resetToUnicode(utf8Cnv);
   1477                 ucnv_resetFromUnicode(cnv);
   1478             }
   1479         }
   1480     }
   1481 
   1482     // not a real loop, just a convenience for breaking out of the block
   1483     while(ok && cc.finalFlush) {
   1484         // test ucnv_fromUChars()
   1485         memset(result, 0, sizeof(result));
   1486 
   1487         errorCode=U_ZERO_ERROR;
   1488         resultLength=ucnv_fromUChars(cnv,
   1489                         result, LENGTHOF(result),
   1490                         cc.unicode, cc.unicodeLength,
   1491                         &errorCode);
   1492         ok=checkFromUnicode(
   1493                 cc, cnv, "fromUChars",
   1494                 (uint8_t *)result, resultLength,
   1495                 NULL,
   1496                 errorCode);
   1497         if(!ok) {
   1498             break;
   1499         }
   1500 
   1501         // test preflighting
   1502         // keep the correct result for simple checking
   1503         errorCode=U_ZERO_ERROR;
   1504         resultLength=ucnv_fromUChars(cnv,
   1505                         NULL, 0,
   1506                         cc.unicode, cc.unicodeLength,
   1507                         &errorCode);
   1508         if(errorCode==U_STRING_NOT_TERMINATED_WARNING || errorCode==U_BUFFER_OVERFLOW_ERROR) {
   1509             errorCode=U_ZERO_ERROR;
   1510         }
   1511         ok=checkFromUnicode(
   1512                 cc, cnv, "preflight fromUChars",
   1513                 (uint8_t *)result, resultLength,
   1514                 NULL,
   1515                 errorCode);
   1516         break;
   1517     }
   1518 
   1519     ucnv_close(cnv);
   1520     return ok;
   1521 }
   1522 
   1523 UBool
   1524 ConversionTest::checkFromUnicode(ConversionCase &cc, UConverter *cnv, const char *name,
   1525                                  const uint8_t *result, int32_t resultLength,
   1526                                  const int32_t *resultOffsets,
   1527                                  UErrorCode resultErrorCode) {
   1528     UChar resultInvalidUChars[8];
   1529     int8_t resultInvalidLength;
   1530     UErrorCode errorCode;
   1531 
   1532     const char *msg;
   1533 
   1534     // reset the message; NULL will mean "ok"
   1535     msg=NULL;
   1536 
   1537     errorCode=U_ZERO_ERROR;
   1538     resultInvalidLength=LENGTHOF(resultInvalidUChars);
   1539     ucnv_getInvalidUChars(cnv, resultInvalidUChars, &resultInvalidLength, &errorCode);
   1540     if(U_FAILURE(errorCode)) {
   1541         errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s",
   1542                 cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, u_errorName(errorCode));
   1543         return FALSE;
   1544     }
   1545 
   1546     // check everything that might have gone wrong
   1547     if(cc.bytesLength!=resultLength) {
   1548         msg="wrong result length";
   1549     } else if(0!=memcmp(cc.bytes, result, cc.bytesLength)) {
   1550         msg="wrong result string";
   1551     } else if(cc.offsets!=NULL && 0!=memcmp(cc.offsets, resultOffsets, cc.bytesLength*sizeof(*cc.offsets))) {
   1552         msg="wrong offsets";
   1553     } else if(cc.outErrorCode!=resultErrorCode) {
   1554         msg="wrong error code";
   1555     } else if(cc.invalidLength!=resultInvalidLength) {
   1556         msg="wrong length of last invalid input";
   1557     } else if(0!=u_memcmp(cc.invalidUChars, resultInvalidUChars, cc.invalidLength)) {
   1558         msg="wrong last invalid input";
   1559     }
   1560 
   1561     if(msg==NULL) {
   1562         return TRUE;
   1563     } else {
   1564         char buffer[2000]; // one buffer for all strings
   1565         char *s, *unicodeString, *bytesString, *resultString,
   1566             *offsetsString, *resultOffsetsString,
   1567             *invalidCharsString, *resultInvalidUCharsString;
   1568 
   1569         unicodeString=s=buffer;
   1570         s=printUnicode(cc.unicode, cc.unicodeLength, unicodeString);
   1571         s=printBytes(cc.bytes, cc.bytesLength, bytesString=s);
   1572         s=printBytes(result, resultLength, resultString=s);
   1573         s=printOffsets(cc.offsets, cc.bytesLength, offsetsString=s);
   1574         s=printOffsets(resultOffsets, resultLength, resultOffsetsString=s);
   1575         s=printUnicode(cc.invalidUChars, cc.invalidLength, invalidCharsString=s);
   1576         s=printUnicode(resultInvalidUChars, resultInvalidLength, resultInvalidUCharsString=s);
   1577 
   1578         if((s-buffer)>(int32_t)sizeof(buffer)) {
   1579             errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n",
   1580                     cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, (int)(s-buffer));
   1581             exit(1);
   1582         }
   1583 
   1584         errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n"
   1585               "  unicode <%s>[%d]\n"
   1586               " expected <%s>[%d]\n"
   1587               "  result  <%s>[%d]\n"
   1588               " offsets         <%s>\n"
   1589               "  result offsets <%s>\n"
   1590               " error code expected %s got %s\n"
   1591               "  invalidChars expected <%s> got <%s>\n",
   1592               cc.caseNr, cc.charset, cc.cbopt, cc.fallbacks, cc.finalFlush, name, msg,
   1593               unicodeString, cc.unicodeLength,
   1594               bytesString, cc.bytesLength,
   1595               resultString, resultLength,
   1596               offsetsString,
   1597               resultOffsetsString,
   1598               u_errorName(cc.outErrorCode), u_errorName(resultErrorCode),
   1599               invalidCharsString, resultInvalidUCharsString);
   1600 
   1601         return FALSE;
   1602     }
   1603 }
   1604 
   1605 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
   1606