Home | History | Annotate | Download | only in cintltst
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /********************************************************************
      4  * COPYRIGHT:
      5  * Copyright (c) 1997-2016, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  ********************************************************************/
      8 /*****************************************************************************
      9 *
     10 * File ncnvtst.c
     11 *
     12 * Modification History:
     13 *        Name                     Description
     14 *   Madhu Katragadda              7/7/2000        Converter Tests for extended code coverage
     15 ******************************************************************************
     16 */
     17 #include <stdio.h>
     18 #include <stdlib.h>
     19 #include <string.h>
     20 #include "unicode/uloc.h"
     21 #include "unicode/ucnv.h"
     22 #include "unicode/utypes.h"
     23 #include "unicode/ustring.h"
     24 #include "unicode/uset.h"
     25 #include "unicode/utf8.h"
     26 #include "unicode/utf16.h"
     27 #include "cintltst.h"
     28 #include "cmemory.h"
     29 
     30 #define MAX_LENGTH 999
     31 
     32 #define UNICODE_LIMIT 0x10FFFF
     33 #define SURROGATE_HIGH_START    0xD800
     34 #define SURROGATE_LOW_END       0xDFFF
     35 
     36 static int32_t  gInBufferSize = 0;
     37 static int32_t  gOutBufferSize = 0;
     38 static char     gNuConvTestName[1024];
     39 
     40 #define nct_min(x,y)  ((x<y) ? x : y)
     41 
     42 static void printSeq(const unsigned char* a, int len);
     43 static void printSeqErr(const unsigned char* a, int len);
     44 static void printUSeq(const UChar* a, int len);
     45 static void printUSeqErr(const UChar* a, int len);
     46 static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
     47                 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
     48 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
     49                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
     50 
     51 static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
     52                 const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset);
     53 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
     54                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset);
     55 
     56 static void setNuConvTestName(const char *codepage, const char *direction)
     57 {
     58     sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
     59         codepage,
     60         direction,
     61         (int)gInBufferSize,
     62         (int)gOutBufferSize);
     63 }
     64 
     65 
     66 static void TestSurrogateBehaviour(void);
     67 static void TestErrorBehaviour(void);
     68 
     69 #if !UCONFIG_NO_LEGACY_CONVERSION
     70 static void TestToUnicodeErrorBehaviour(void);
     71 static void TestGetNextErrorBehaviour(void);
     72 #endif
     73 
     74 static void TestRegressionUTF8(void);
     75 static void TestRegressionUTF32(void);
     76 static void TestAvailableConverters(void);
     77 static void TestFlushInternalBuffer(void);  /*for improved code coverage in ucnv_cnv.c*/
     78 static void TestResetBehaviour(void);
     79 static void TestTruncated(void);
     80 static void TestUnicodeSet(void);
     81 
     82 static void TestWithBufferSize(int32_t osize, int32_t isize);
     83 
     84 
     85 static void printSeq(const unsigned char* a, int len)
     86 {
     87     int i=0;
     88     log_verbose("\n{");
     89     while (i<len)
     90         log_verbose("0x%02X ", a[i++]);
     91     log_verbose("}\n");
     92 }
     93 
     94 static void printUSeq(const UChar* a, int len)
     95 {
     96     int i=0;
     97     log_verbose("\n{");
     98     while (i<len)
     99         log_verbose("%0x04X ", a[i++]);
    100     log_verbose("}\n");
    101 }
    102 
    103 static void printSeqErr(const unsigned char* a, int len)
    104 {
    105     int i=0;
    106     fprintf(stderr, "\n{");
    107     while (i<len)  fprintf(stderr, "0x%02X ", a[i++]);
    108     fprintf(stderr, "}\n");
    109 }
    110 
    111 static void printUSeqErr(const UChar* a, int len)
    112 {
    113     int i=0;
    114     fprintf(stderr, "\n{");
    115     while (i<len)
    116         fprintf(stderr, "0x%04X ", a[i++]);
    117     fprintf(stderr,"}\n");
    118 }
    119 
    120 void addExtraTests(TestNode** root);
    121 
    122 void addExtraTests(TestNode** root)
    123 {
    124      addTest(root, &TestSurrogateBehaviour,         "tsconv/ncnvtst/TestSurrogateBehaviour");
    125      addTest(root, &TestErrorBehaviour,             "tsconv/ncnvtst/TestErrorBehaviour");
    126 
    127 #if !UCONFIG_NO_LEGACY_CONVERSION
    128      addTest(root, &TestToUnicodeErrorBehaviour,    "tsconv/ncnvtst/ToUnicodeErrorBehaviour");
    129      addTest(root, &TestGetNextErrorBehaviour,      "tsconv/ncnvtst/TestGetNextErrorBehaviour");
    130 #endif
    131 
    132      addTest(root, &TestAvailableConverters,        "tsconv/ncnvtst/TestAvailableConverters");
    133      addTest(root, &TestFlushInternalBuffer,        "tsconv/ncnvtst/TestFlushInternalBuffer");
    134      addTest(root, &TestResetBehaviour,             "tsconv/ncnvtst/TestResetBehaviour");
    135      addTest(root, &TestRegressionUTF8,             "tsconv/ncnvtst/TestRegressionUTF8");
    136      addTest(root, &TestRegressionUTF32,            "tsconv/ncnvtst/TestRegressionUTF32");
    137      addTest(root, &TestTruncated,                  "tsconv/ncnvtst/TestTruncated");
    138      addTest(root, &TestUnicodeSet,                 "tsconv/ncnvtst/TestUnicodeSet");
    139 }
    140 
    141 /*test surrogate behaviour*/
    142 static void TestSurrogateBehaviour(){
    143     log_verbose("Testing for SBCS and LATIN_1\n");
    144     {
    145         UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032};
    146         const uint8_t expected[] = {0x31, 0x1a, 0x32};
    147 
    148 #if !UCONFIG_NO_LEGACY_CONVERSION
    149         /*SBCS*/
    150         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    151                 expected, sizeof(expected), "ibm-920", 0 , TRUE, U_ZERO_ERROR))
    152             log_err("u-> ibm-920 [UCNV_SBCS] not match.\n");
    153 #endif
    154 
    155         /*LATIN_1*/
    156         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    157                 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR ))
    158             log_err("u-> LATIN_1 not match.\n");
    159 
    160     }
    161 
    162 #if !UCONFIG_NO_LEGACY_CONVERSION
    163     log_verbose("Testing for DBCS and MBCS\n");
    164     {
    165         UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
    166         const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
    167         int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
    168 
    169         /*DBCS*/
    170         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    171                 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR))
    172             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
    173         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    174                 expected, sizeof(expected), "ibm-1363", offsets , TRUE, U_ZERO_ERROR))
    175             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
    176         /*MBCS*/
    177         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    178                 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR))
    179             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
    180         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    181                 expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_ZERO_ERROR))
    182             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
    183     }
    184 
    185     log_verbose("Testing for ISO-2022-jp\n");
    186     {
    187         UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
    188 
    189         const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
    190                                     0x31,0x1A, 0x32};
    191 
    192 
    193         int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
    194 
    195         /*iso-2022-jp*/
    196         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    197                 expected, sizeof(expected), "iso-2022-jp", 0 , TRUE, U_ZERO_ERROR))
    198             log_err("u-> not match.\n");
    199         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    200                 expected, sizeof(expected), "iso-2022-jp", offsets , TRUE, U_ZERO_ERROR))
    201             log_err("u->  not match.\n");
    202     }
    203 
    204    /* BEGIN android-removed */
    205    /* To save space, Android does not build full ISO-2022-CN tables.
    206       We skip the tests for ISO-2022-CN. */
    207    /*
    208     log_verbose("Testing for ISO-2022-cn\n");
    209     {
    210         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
    211 
    212         static const uint8_t expected[] = {
    213                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
    214                                     0x36, 0x21,
    215                                     0x0F, 0x31,
    216                                     0x1A,
    217                                     0x32
    218                                     };
    219 
    220 
    221 
    222         static const int32_t offsets[] = {
    223                                     0,    0,    0,    0,    0,    0,    0,
    224                                     1,    1,
    225                                     2,    2,
    226                                     3,
    227                                     5,  };
    228 
    229         // iso-2022-CN  android-change
    230         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    231                 expected, sizeof(expected), "iso-2022-cn", 0 , TRUE, U_ZERO_ERROR))
    232             log_err("u-> not match.\n");
    233         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    234                 expected, sizeof(expected), "iso-2022-cn", offsets , TRUE, U_ZERO_ERROR))
    235             log_err("u-> not match.\n");
    236     }
    237     */
    238     /* END android-removed */
    239 
    240         log_verbose("Testing for ISO-2022-kr\n");
    241     {
    242         static const UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
    243 
    244         static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
    245                                     0x0E, 0x6C, 0x69,
    246                                     0x0f, 0x1A,
    247                                     0x0e, 0x6F, 0x4B,
    248                                     0x0F, 0x31,
    249                                     0x1A,
    250                                     0x32 };
    251 
    252         static const int32_t offsets[] = {-1, -1, -1, -1,
    253                               0, 0, 0,
    254                               1, 1,
    255                               3, 3, 3,
    256                               4, 4,
    257                               5,
    258                               7,
    259                             };
    260 
    261         /*iso-2022-kr*/
    262         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    263                 expected, sizeof(expected), "iso-2022-kr", 0 , TRUE, U_ZERO_ERROR))
    264             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
    265         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    266                 expected, sizeof(expected), "iso-2022-kr", offsets , TRUE, U_ZERO_ERROR))
    267             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
    268     }
    269 
    270         log_verbose("Testing for HZ\n");
    271     {
    272         static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
    273 
    274         static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
    275                                     0x7E, 0x7D, 0x1A,
    276                                     0x7E, 0x7B, 0x36, 0x21,
    277                                     0x7E, 0x7D, 0x31,
    278                                     0x1A,
    279                                     0x32 };
    280 
    281 
    282         static const int32_t offsets[] = {0,0,0,0,
    283                              1,1,1,
    284                              3,3,3,3,
    285                              4,4,4,
    286                              5,
    287                              7,};
    288 
    289         /*hz*/
    290         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    291                 expected, sizeof(expected), "HZ", 0 , TRUE, U_ZERO_ERROR))
    292             log_err("u-> HZ not match.\n");
    293         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    294                 expected, sizeof(expected), "HZ", offsets , TRUE, U_ZERO_ERROR))
    295             log_err("u-> HZ not match.\n");
    296     }
    297 #endif
    298 
    299     /*UTF-8*/
    300      log_verbose("Testing for UTF8\n");
    301     {
    302         static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
    303         static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
    304                            0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
    305                            0x04, 0x06 };
    306         static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
    307             0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
    308 
    309 
    310         static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
    311         /*UTF-8*/
    312         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    313             expected, sizeof(expected), "UTF8", offsets, TRUE, U_ZERO_ERROR ))
    314             log_err("u-> UTF8 with offsets and flush true did not match.\n");
    315         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    316             expected, sizeof(expected), "UTF8", 0, TRUE, U_ZERO_ERROR ))
    317             log_err("u-> UTF8 with offsets and flush true did not match.\n");
    318         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    319             expected, sizeof(expected), "UTF8", offsets, FALSE, U_ZERO_ERROR ))
    320             log_err("u-> UTF8 with offsets and flush true did not match.\n");
    321         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    322             expected, sizeof(expected), "UTF8", 0, FALSE, U_ZERO_ERROR ))
    323             log_err("u-> UTF8 with offsets and flush true did not match.\n");
    324 
    325         if(!convertToU(expected, sizeof(expected),
    326             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", 0, TRUE, U_ZERO_ERROR ))
    327             log_err("UTF8 -> u did not match.\n");
    328         if(!convertToU(expected, sizeof(expected),
    329             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", 0, FALSE, U_ZERO_ERROR ))
    330             log_err("UTF8 -> u did not match.\n");
    331         if(!convertToU(expected, sizeof(expected),
    332             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR ))
    333             log_err("UTF8 ->u  did not match.\n");
    334         if(!convertToU(expected, sizeof(expected),
    335             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR ))
    336             log_err("UTF8 -> u did not match.\n");
    337 
    338     }
    339 }
    340 
    341 /*test various error behaviours*/
    342 static void TestErrorBehaviour(){
    343     log_verbose("Testing for SBCS and LATIN_1\n");
    344     {
    345         static const UChar    sampleText[] =   { 0x0031, 0xd801};
    346         static const UChar    sampleText2[] =   { 0x0031, 0xd801, 0x0032};
    347         static const uint8_t expected0[] =          { 0x31};
    348         static const uint8_t expected[] =          { 0x31, 0x1a};
    349         static const uint8_t expected2[] =         { 0x31, 0x1a, 0x32};
    350 
    351 #if !UCONFIG_NO_LEGACY_CONVERSION
    352         /*SBCS*/
    353         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    354                 expected, sizeof(expected), "ibm-920", 0, TRUE, U_ZERO_ERROR))
    355             log_err("u-> ibm-920 [UCNV_SBCS] \n");
    356         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    357                 expected0, sizeof(expected0), "ibm-920", 0, FALSE, U_ZERO_ERROR))
    358             log_err("u-> ibm-920 [UCNV_SBCS] \n");
    359         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    360                 expected2, sizeof(expected2), "ibm-920", 0, TRUE, U_ZERO_ERROR))
    361             log_err("u-> ibm-920 [UCNV_SBCS] did not match\n");
    362 #endif
    363 
    364         /*LATIN_1*/
    365         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    366                 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR))
    367             log_err("u-> LATIN_1 is supposed to fail\n");
    368         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    369                 expected0, sizeof(expected0), "LATIN_1", 0, FALSE, U_ZERO_ERROR))
    370             log_err("u-> LATIN_1 is supposed to fail\n");
    371 
    372         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    373                 expected2, sizeof(expected2), "LATIN_1", 0, TRUE, U_ZERO_ERROR))
    374             log_err("u-> LATIN_1 did not match\n");
    375     }
    376 
    377 #if !UCONFIG_NO_LEGACY_CONVERSION
    378     log_verbose("Testing for DBCS and MBCS\n");
    379     {
    380         static const UChar    sampleText[]    = { 0x00a1, 0xd801};
    381         static const uint8_t expected[] = { 0xa2, 0xae};
    382         static const int32_t offsets[]        = { 0x00, 0x00};
    383         static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0};
    384         static const int32_t offsetsSUB[]        = { 0x00, 0x00, 0x01, 0x01};
    385 
    386         static const UChar       sampleText2[] = { 0x00a1, 0xd801, 0x00a4};
    387         static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
    388         static const int32_t offsets2[]        = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02};
    389 
    390         static const UChar       sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01};
    391         static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0};
    392         static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x01, 0x02, 0x02};
    393 
    394         static const UChar       sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01};
    395         static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe};
    396         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 };
    397 
    398         /*DBCS*/
    399         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    400                 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
    401             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
    402         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    403                 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING))
    404             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
    405 
    406         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    407                 expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, TRUE, U_ZERO_ERROR))
    408             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
    409         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    410                 expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_AMBIGUOUS_ALIAS_WARNING))
    411             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
    412 
    413 
    414         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    415                 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
    416             log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
    417         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    418                 expected2, sizeof(expected2), "ibm-1363", offsets2, TRUE, U_ZERO_ERROR))
    419             log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
    420 
    421         /*MBCS*/
    422         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    423                 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
    424             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    425         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    426                 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING))
    427             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    428 
    429         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    430                 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
    431             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
    432         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    433                 expected2, sizeof(expected2), "ibm-1363", 0, FALSE, U_ZERO_ERROR))
    434             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
    435         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    436                 expected2, sizeof(expected2), "ibm-1363", offsets2, FALSE, U_ZERO_ERROR))
    437             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
    438 
    439         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
    440                 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, TRUE, U_ZERO_ERROR))
    441             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    442         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
    443                 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, FALSE, U_ZERO_ERROR))
    444             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    445 
    446         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
    447                 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, TRUE, U_ZERO_ERROR))
    448             log_err("u-> euc-jp [UCNV_MBCS] \n");
    449         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
    450                 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, FALSE, U_ZERO_ERROR))
    451             log_err("u-> euc-jp [UCNV_MBCS] \n");
    452     }
    453 
    454     /*iso-2022-jp*/
    455     log_verbose("Testing for iso-2022-jp\n");
    456     {
    457         static const UChar    sampleText[]    = { 0x0031, 0xd801};
    458         static const uint8_t expected[] = {  0x31};
    459         static const uint8_t expectedSUB[] = {  0x31, 0x1a};
    460         static const int32_t offsets[]        = { 0x00, 1};
    461 
    462         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
    463         static const uint8_t expected2[] = {  0x31,0x1A,0x32};
    464         static const int32_t offsets2[]        = { 0x00,0x01,0x02};
    465 
    466         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
    467         static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a};
    468         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 };
    469         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    470                 expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, TRUE, U_ZERO_ERROR))
    471             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
    472         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    473                 expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_AMBIGUOUS_ALIAS_WARNING))
    474             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
    475 
    476         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    477                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, TRUE, U_ZERO_ERROR))
    478             log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n");
    479         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    480                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
    481             log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
    482         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    483                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
    484             log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
    485 
    486         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
    487                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR))
    488             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
    489         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
    490                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, FALSE, U_ZERO_ERROR))
    491             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
    492     }
    493 
    494     /* BEGIN android-removed */
    495     /* To save space, Android does not build full ISO-2022-CN tables.
    496        We skip the tests for ISO-2022-CN. */
    497     /*iso-2022-cn*/
    498     /*
    499     log_verbose("Testing for iso-2022-cn\n");
    500     {
    501         static const UChar    sampleText[]    = { 0x0031, 0xd801};
    502         static const uint8_t expected[] = { 0x31};
    503         static const uint8_t expectedSUB[] = { 0x31, 0x1A};
    504         static const int32_t offsets[]        = { 0x00, 1};
    505 
    506         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
    507         static const uint8_t expected2[] = { 0x31, 0x1A,0x32};
    508         static const int32_t offsets2[]        = { 0x00, 0x01,0x02};
    509 
    510         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
    511         static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A};
    512         static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x02 };
    513 
    514         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
    515         static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a };
    516         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 };
    517         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    518                 expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, TRUE, U_ZERO_ERROR))
    519             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
    520         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    521                 expected, sizeof(expected), "iso-2022-cn", offsets, FALSE, U_ZERO_ERROR))
    522             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    523 
    524         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    525                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, TRUE, U_ZERO_ERROR))
    526             log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n");
    527         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    528                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR))
    529             log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
    530         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    531                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR))
    532             log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
    533 
    534         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
    535                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, TRUE, U_ZERO_ERROR))
    536             log_err("u->iso-2022-cn [UCNV_MBCS] \n");
    537         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
    538                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, FALSE, U_ZERO_ERROR))
    539             log_err("u-> iso-2022-cn[UCNV_MBCS] \n");
    540 
    541         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
    542                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, TRUE, U_ZERO_ERROR))
    543             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
    544         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
    545                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, FALSE, U_ZERO_ERROR))
    546             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
    547     }
    548     */
    549     /* END android-removed */
    550 
    551     /*iso-2022-kr*/
    552     log_verbose("Testing for iso-2022-kr\n");
    553     {
    554         static const UChar    sampleText[]    = { 0x0031, 0xd801};
    555         static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31};
    556         static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A};
    557         static const int32_t offsets[]        = { -1,   -1,   -1,   -1,   0x00, 1};
    558 
    559         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
    560         static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32};
    561         static const int32_t offsets2[]        = { -1,   -1,   -1,   -1,   0x00, 0x01, 0x02};
    562 
    563         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
    564         static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43,  0x51, 0x50, 0x1A };
    565         static const int32_t offsets3MBCS[]        = { -1,   -1,   -1,   -1,    0x00, 0x01, 0x02, 0x02 };
    566 
    567         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    568                 expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR))
    569             log_err("u-> iso-2022-kr [UCNV_MBCS] \n");
    570         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    571                 expected, sizeof(expected), "iso-2022-kr", offsets, FALSE, U_ZERO_ERROR))
    572             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    573 
    574         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    575                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, TRUE, U_ZERO_ERROR))
    576             log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n");
    577         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    578                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR))
    579             log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
    580         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    581                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR))
    582             log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
    583 
    584         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
    585                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, TRUE, U_ZERO_ERROR))
    586             log_err("u->iso-2022-kr [UCNV_MBCS] \n");
    587         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
    588                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR))
    589             log_err("u-> iso-2022-kr[UCNV_MBCS] \n");
    590     }
    591 
    592     /*HZ*/
    593     log_verbose("Testing for HZ\n");
    594     {
    595         static const UChar    sampleText[]    = { 0x0031, 0xd801};
    596         static const uint8_t expected[] = { 0x7e, 0x7d, 0x31};
    597         static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A};
    598         static const int32_t offsets[]        = { 0x00, 0x00, 0x00, 1};
    599 
    600         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
    601         static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31,  0x1A,  0x32 };
    602         static const int32_t offsets2[]        = { 0x00, 0x00, 0x00, 0x01,  0x02 };
    603 
    604         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
    605         static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50,  0x1A };
    606         static const int32_t offsets3MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x02};
    607 
    608         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
    609         static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a };
    610         static const int32_t offsets4MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 };
    611         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    612                 expectedSUB, sizeof(expectedSUB), "HZ", offsets, TRUE, U_ZERO_ERROR))
    613             log_err("u-> HZ [UCNV_MBCS] \n");
    614         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    615                 expected, sizeof(expected), "HZ", offsets, FALSE, U_ZERO_ERROR))
    616             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    617 
    618         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    619                 expected2, sizeof(expected2), "HZ", offsets2, TRUE, U_ZERO_ERROR))
    620             log_err("u->HZ[UCNV_DBCS] did not match\n");
    621         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    622                 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR))
    623             log_err("u-> HZ [UCNV_DBCS] did not match\n");
    624         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
    625                 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR))
    626             log_err("u-> HZ [UCNV_DBCS] did not match\n");
    627 
    628         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
    629                 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, TRUE, U_ZERO_ERROR))
    630             log_err("u->HZ [UCNV_MBCS] \n");
    631         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
    632                 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, FALSE, U_ZERO_ERROR))
    633             log_err("u-> HZ[UCNV_MBCS] \n");
    634 
    635         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
    636                 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, TRUE, U_ZERO_ERROR))
    637             log_err("u-> HZ [UCNV_MBCS] \n");
    638         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
    639                 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, FALSE, U_ZERO_ERROR))
    640             log_err("u-> HZ [UCNV_MBCS] \n");
    641     }
    642 #endif
    643 }
    644 
    645 #if !UCONFIG_NO_LEGACY_CONVERSION
    646 /*test different convertToUnicode error behaviours*/
    647 static void TestToUnicodeErrorBehaviour()
    648 {
    649     log_verbose("Testing error conditions for DBCS\n");
    650     {
    651         uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04};
    652         const UChar expected[] = { 0x00a1 };
    653 
    654         if(!convertToU(sampleText, sizeof(sampleText),
    655                 expected, UPRV_LENGTHOF(expected), "ibm-1363", 0, TRUE, U_AMBIGUOUS_ALIAS_WARNING ))
    656             log_err("DBCS (ibm-1363)->Unicode  did not match.\n");
    657         if(!convertToU(sampleText, sizeof(sampleText),
    658                 expected, UPRV_LENGTHOF(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING ))
    659             log_err("DBCS (ibm-1363)->Unicode  with flush = false did not match.\n");
    660     }
    661     log_verbose("Testing error conditions for SBCS\n");
    662     {
    663         uint8_t sampleText[] = { 0xa2, 0xFF};
    664         const UChar expected[] = { 0x00c2 };
    665 
    666       /*  uint8_t sampleText2[] = { 0xa2, 0x70 };
    667         const UChar expected2[] = { 0x0073 };*/
    668 
    669         if(!convertToU(sampleText, sizeof(sampleText),
    670                 expected, UPRV_LENGTHOF(expected), "ibm-1051", 0, TRUE, U_ZERO_ERROR ))
    671             log_err("SBCS (ibm-1051)->Unicode  did not match.\n");
    672         if(!convertToU(sampleText, sizeof(sampleText),
    673                 expected, UPRV_LENGTHOF(expected), "ibm-1051", 0, FALSE, U_ZERO_ERROR ))
    674             log_err("SBCS (ibm-1051)->Unicode  with flush = false did not match.\n");
    675 
    676     }
    677 }
    678 
    679 static void TestGetNextErrorBehaviour(){
    680    /*Test for unassigned character*/
    681 #define INPUT_SIZE 1
    682     static const char input1[INPUT_SIZE]={ 0x70 };
    683     const char* source=(const char*)input1;
    684     UErrorCode err=U_ZERO_ERROR;
    685     UChar32 c=0;
    686     UConverter *cnv=ucnv_open("ibm-424", &err);
    687     if(U_FAILURE(err)) {
    688         log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err));
    689         return;
    690     }
    691     c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err);
    692     if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){
    693         log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n",  myErrorName(err), c);
    694     }
    695     ucnv_close(cnv);
    696 }
    697 #endif
    698 
    699 #define MAX_UTF16_LEN 2
    700 #define MAX_UTF8_LEN 4
    701 
    702 /*Regression test for utf8 converter*/
    703 static void TestRegressionUTF8(){
    704     UChar32 currCh = 0;
    705     int32_t offset8;
    706     int32_t offset16;
    707     UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
    708     uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH);
    709 
    710     while (currCh <= UNICODE_LIMIT) {
    711         offset16 = 0;
    712         offset8 = 0;
    713         while(currCh <= UNICODE_LIMIT
    714             && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN)
    715             && offset8 < (MAX_LENGTH - MAX_UTF8_LEN))
    716         {
    717             if (currCh == SURROGATE_HIGH_START) {
    718                 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
    719             }
    720             U16_APPEND_UNSAFE(standardForm, offset16, currCh);
    721             U8_APPEND_UNSAFE(utf8, offset8, currCh);
    722             currCh++;
    723         }
    724         if(!convertFromU(standardForm, offset16,
    725             utf8, offset8, "UTF8", 0, TRUE, U_ZERO_ERROR )) {
    726             log_err("Unicode->UTF8 did not match.\n");
    727         }
    728         if(!convertToU(utf8, offset8,
    729             standardForm, offset16, "UTF8", 0, TRUE, U_ZERO_ERROR )) {
    730             log_err("UTF8->Unicode did not match.\n");
    731         }
    732     }
    733 
    734     free(standardForm);
    735     free(utf8);
    736 
    737     {
    738         static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 };
    739         static const UChar expected[] = { 0x0301, 0x0300 };
    740         UConverter *conv8;
    741         UErrorCode err = U_ZERO_ERROR;
    742         UChar pivotBuffer[100];
    743         const UChar* const pivEnd = pivotBuffer + 100;
    744         const char* srcBeg;
    745         const char* srcEnd;
    746         UChar* pivBeg;
    747 
    748         conv8 = ucnv_open("UTF-8", &err);
    749 
    750         srcBeg = src8;
    751         pivBeg = pivotBuffer;
    752         srcEnd = src8 + 3;
    753         ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
    754         if (srcBeg != srcEnd) {
    755             log_err("Did not consume whole buffer on first call.\n");
    756         }
    757 
    758         srcEnd = src8 + 4;
    759         ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
    760         if (srcBeg != srcEnd) {
    761             log_err("Did not consume whole buffer on second call.\n");
    762         }
    763 
    764         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
    765             log_err("Did not get expected results for UTF-8.\n");
    766         }
    767         ucnv_close(conv8);
    768     }
    769 }
    770 
    771 #define MAX_UTF32_LEN 1
    772 
    773 static void TestRegressionUTF32(){
    774 #if !UCONFIG_ONLY_HTML_CONVERSION
    775     UChar32 currCh = 0;
    776     int32_t offset32;
    777     int32_t offset16;
    778     UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
    779     UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32));
    780 
    781     while (currCh <= UNICODE_LIMIT) {
    782         offset16 = 0;
    783         offset32 = 0;
    784         while(currCh <= UNICODE_LIMIT
    785             && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN)
    786             && offset32 < (MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN))
    787         {
    788             if (currCh == SURROGATE_HIGH_START) {
    789                 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
    790             }
    791             U16_APPEND_UNSAFE(standardForm, offset16, currCh);
    792             utf32[offset32++] = currCh;
    793             currCh++;
    794         }
    795         if(!convertFromU(standardForm, offset16,
    796             (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) {
    797             log_err("Unicode->UTF32 did not match.\n");
    798         }
    799         if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32),
    800             standardForm, offset16, "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) {
    801             log_err("UTF32->Unicode did not match.\n");
    802         }
    803     }
    804     free(standardForm);
    805     free(utf32);
    806 
    807     {
    808         /* Check for lone surrogate error handling. */
    809         static const UChar   sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 };
    810         static const UChar   sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 };
    811         static const uint8_t expectedUTF32BE[] = {
    812             0x00, 0x00, 0x00, 0x31,
    813             0x00, 0x00, 0xff, 0xfd,
    814             0x00, 0x00, 0x00, 0x32
    815         };
    816         static const uint8_t expectedUTF32LE[] = {
    817             0x31, 0x00, 0x00, 0x00,
    818             0xfd, 0xff, 0x00, 0x00,
    819             0x32, 0x00, 0x00, 0x00
    820         };
    821         static const int32_t offsetsUTF32[] = {
    822             0x00, 0x00, 0x00, 0x00,
    823             0x01, 0x01, 0x01, 0x01,
    824             0x02, 0x02, 0x02, 0x02
    825         };
    826 
    827         if(!convertFromU(sampleBadStartSurrogate, UPRV_LENGTHOF(sampleBadStartSurrogate),
    828                 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR))
    829             log_err("u->UTF-32BE\n");
    830         if(!convertFromU(sampleBadEndSurrogate, UPRV_LENGTHOF(sampleBadEndSurrogate),
    831                 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR))
    832             log_err("u->UTF-32BE\n");
    833 
    834         if(!convertFromU(sampleBadStartSurrogate, UPRV_LENGTHOF(sampleBadStartSurrogate),
    835                 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR))
    836             log_err("u->UTF-32LE\n");
    837         if(!convertFromU(sampleBadEndSurrogate, UPRV_LENGTHOF(sampleBadEndSurrogate),
    838                 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR))
    839             log_err("u->UTF-32LE\n");
    840     }
    841 
    842     {
    843         static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 };
    844         static const UChar expected[] = { 0x0031, 0x0030 };
    845         UConverter *convBE;
    846         UErrorCode err = U_ZERO_ERROR;
    847         UChar pivotBuffer[100];
    848         const UChar* const pivEnd = pivotBuffer + 100;
    849         const char* srcBeg;
    850         const char* srcEnd;
    851         UChar* pivBeg;
    852 
    853         convBE = ucnv_open("UTF-32BE", &err);
    854 
    855         srcBeg = srcBE;
    856         pivBeg = pivotBuffer;
    857         srcEnd = srcBE + 5;
    858         ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
    859         if (srcBeg != srcEnd) {
    860             log_err("Did not consume whole buffer on first call.\n");
    861         }
    862 
    863         srcEnd = srcBE + 8;
    864         ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
    865         if (srcBeg != srcEnd) {
    866             log_err("Did not consume whole buffer on second call.\n");
    867         }
    868 
    869         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
    870             log_err("Did not get expected results for UTF-32BE.\n");
    871         }
    872         ucnv_close(convBE);
    873     }
    874     {
    875         static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 };
    876         static const UChar expected[] = { 0x0031, 0x0030 };
    877         UConverter *convLE;
    878         UErrorCode err = U_ZERO_ERROR;
    879         UChar pivotBuffer[100];
    880         const UChar* const pivEnd = pivotBuffer + 100;
    881         const char* srcBeg;
    882         const char* srcEnd;
    883         UChar* pivBeg;
    884 
    885         convLE = ucnv_open("UTF-32LE", &err);
    886 
    887         srcBeg = srcLE;
    888         pivBeg = pivotBuffer;
    889         srcEnd = srcLE + 5;
    890         ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
    891         if (srcBeg != srcEnd) {
    892             log_err("Did not consume whole buffer on first call.\n");
    893         }
    894 
    895         srcEnd = srcLE + 8;
    896         ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
    897         if (srcBeg != srcEnd) {
    898             log_err("Did not consume whole buffer on second call.\n");
    899         }
    900 
    901         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
    902             log_err("Did not get expected results for UTF-32LE.\n");
    903         }
    904         ucnv_close(convLE);
    905     }
    906 #endif
    907 }
    908 
    909 /*Walk through the available converters*/
    910 static void TestAvailableConverters(){
    911     UErrorCode status=U_ZERO_ERROR;
    912     UConverter *conv=NULL;
    913     int32_t i=0;
    914     for(i=0; i < ucnv_countAvailable(); i++){
    915         status=U_ZERO_ERROR;
    916         conv=ucnv_open(ucnv_getAvailableName(i), &status);
    917         if(U_FAILURE(status)){
    918             log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n",
    919                         ucnv_getAvailableName(i), myErrorName(status));
    920             continue;
    921         }
    922         ucnv_close(conv);
    923     }
    924 
    925 }
    926 
    927 static void TestFlushInternalBuffer(){
    928     TestWithBufferSize(MAX_LENGTH, 1);
    929     TestWithBufferSize(1, 1);
    930     TestWithBufferSize(1, MAX_LENGTH);
    931     TestWithBufferSize(MAX_LENGTH, MAX_LENGTH);
    932 }
    933 
    934 static void TestWithBufferSize(int32_t insize, int32_t outsize){
    935 
    936     gInBufferSize =insize;
    937     gOutBufferSize = outsize;
    938 
    939      log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
    940     {
    941         UChar    sampleText[] =
    942             { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09,  0x002E  };
    943         const uint8_t expectedUTF8[] =
    944             { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
    945         int32_t  toUTF8Offs[] =
    946             { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
    947        /* int32_t fmUTF8Offs[] =
    948             { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/
    949 
    950         /*UTF-8*/
    951         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
    952             expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,FALSE))
    953              log_err("u-> UTF8 did not match.\n");
    954     }
    955 
    956 #if !UCONFIG_NO_LEGACY_CONVERSION
    957      log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE  \n");
    958     {
    959         UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
    960         const uint8_t toIBM943[]= { 0x61,
    961             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
    962             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
    963             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
    964             0x61 };
    965         int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
    966 
    967         if(!testConvertFromU(inputTest, UPRV_LENGTHOF(inputTest),
    968                 toIBM943, sizeof(toIBM943), "ibm-943",
    969                 (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,FALSE))
    970             log_err("u-> ibm-943 with subst with value did not match.\n");
    971     }
    972 #endif
    973 
    974      log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
    975     {
    976         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
    977             0xe0, 0x80,  0x61};
    978         UChar    expected1[] = {  0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061};
    979         int32_t offsets1[] = {   0x0000, 0x0001, 0x0004, 0x0005, 0x0006};
    980 
    981         if(!testConvertToU(sampleText1, sizeof(sampleText1),
    982                  expected1, UPRV_LENGTHOF(expected1),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE))
    983             log_err("utf8->u with substitute did not match.\n");;
    984     }
    985 
    986 #if !UCONFIG_NO_LEGACY_CONVERSION
    987     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
    988     /*to Unicode*/
    989     {
    990         const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
    991             0x81, 0xad, /*unassigned*/
    992             0x89, 0xd3 };
    993         UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
    994             0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
    995             0x7B87};
    996         int32_t  fromIBM943Offs [] =    { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
    997 
    998         if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU),
    999                  IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943",
   1000                 (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,FALSE))
   1001             log_err("ibm-943->u with substitute with value did not match.\n");
   1002 
   1003     }
   1004 #endif
   1005 }
   1006 
   1007 static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
   1008                 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
   1009 {
   1010 
   1011     int32_t i=0;
   1012     char *p=0;
   1013     const UChar *src;
   1014     char buffer[MAX_LENGTH];
   1015     int32_t offsetBuffer[MAX_LENGTH];
   1016     int32_t *offs=0;
   1017     char *targ;
   1018     char *targetLimit;
   1019     UChar *sourceLimit=0;
   1020     UErrorCode status = U_ZERO_ERROR;
   1021     UConverter *conv = 0;
   1022     conv = ucnv_open(codepage, &status);
   1023     if(U_FAILURE(status))
   1024     {
   1025         log_data_err("Couldn't open converter %s\n",codepage);
   1026         return TRUE;
   1027     }
   1028     log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
   1029 
   1030     for(i=0; i<MAX_LENGTH; i++){
   1031         buffer[i]=(char)0xF0;
   1032         offsetBuffer[i]=0xFF;
   1033     }
   1034 
   1035     src=source;
   1036     sourceLimit=(UChar*)src+(sourceLen);
   1037     targ=buffer;
   1038     targetLimit=targ+MAX_LENGTH;
   1039     offs=offsetBuffer;
   1040     ucnv_fromUnicode (conv,
   1041                   (char **)&targ,
   1042                   (const char *)targetLimit,
   1043                   &src,
   1044                   sourceLimit,
   1045                   expectOffsets ? offs : NULL,
   1046                   doFlush,
   1047                   &status);
   1048     ucnv_close(conv);
   1049     if(status != expectedStatus){
   1050           log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
   1051           return FALSE;
   1052     }
   1053 
   1054     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
   1055         sourceLen, targ-buffer);
   1056 
   1057     if(expectLen != targ-buffer)
   1058     {
   1059         log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
   1060         log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
   1061         printSeqErr((const unsigned char *)buffer, (int32_t)(targ-buffer));
   1062         printSeqErr((const unsigned char*)expect, expectLen);
   1063         return FALSE;
   1064     }
   1065 
   1066     if(memcmp(buffer, expect, expectLen)){
   1067         log_err("String does not match. FROM Unicode to codePage%s\n", codepage);
   1068         log_info("\nGot:");
   1069         printSeqErr((const unsigned char *)buffer, expectLen);
   1070         log_info("\nExpected:");
   1071         printSeqErr((const unsigned char *)expect, expectLen);
   1072         return FALSE;
   1073     }
   1074     else {
   1075         log_verbose("Matches!\n");
   1076     }
   1077 
   1078     if (expectOffsets != 0){
   1079         log_verbose("comparing %d offsets..\n", targ-buffer);
   1080         if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){
   1081             log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage);
   1082             log_info("\nGot  : ");
   1083             printSeqErr((const unsigned char*)buffer, (int32_t)(targ-buffer));
   1084             for(p=buffer;p<targ;p++)
   1085                 log_info("%d, ", offsetBuffer[p-buffer]);
   1086             log_info("\nExpected: ");
   1087             for(i=0; i< (targ-buffer); i++)
   1088                 log_info("%d,", expectOffsets[i]);
   1089         }
   1090     }
   1091 
   1092     return TRUE;
   1093 }
   1094 
   1095 
   1096 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
   1097                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
   1098 {
   1099     UErrorCode status = U_ZERO_ERROR;
   1100     UConverter *conv = 0;
   1101     int32_t i=0;
   1102     UChar *p=0;
   1103     const char* src;
   1104     UChar buffer[MAX_LENGTH];
   1105     int32_t offsetBuffer[MAX_LENGTH];
   1106     int32_t *offs=0;
   1107     UChar *targ;
   1108     UChar *targetLimit;
   1109     uint8_t *sourceLimit=0;
   1110 
   1111 
   1112 
   1113     conv = ucnv_open(codepage, &status);
   1114     if(U_FAILURE(status))
   1115     {
   1116         log_data_err("Couldn't open converter %s\n",codepage);
   1117         return TRUE;
   1118     }
   1119     log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
   1120 
   1121 
   1122 
   1123     for(i=0; i<MAX_LENGTH; i++){
   1124         buffer[i]=0xFFFE;
   1125         offsetBuffer[i]=-1;
   1126     }
   1127 
   1128     src=(const char *)source;
   1129     sourceLimit=(uint8_t*)(src+(sourceLen));
   1130     targ=buffer;
   1131     targetLimit=targ+MAX_LENGTH;
   1132     offs=offsetBuffer;
   1133 
   1134 
   1135 
   1136     ucnv_toUnicode (conv,
   1137                 &targ,
   1138                 targetLimit,
   1139                 (const char **)&src,
   1140                 (const char *)sourceLimit,
   1141                 expectOffsets ? offs : NULL,
   1142                 doFlush,
   1143                 &status);
   1144 
   1145     ucnv_close(conv);
   1146     if(status != expectedStatus){
   1147           log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
   1148           return FALSE;
   1149     }
   1150     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
   1151         sourceLen, targ-buffer);
   1152 
   1153 
   1154 
   1155 
   1156     log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2);
   1157 
   1158     if (expectOffsets != 0) {
   1159         if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){
   1160 
   1161             log_err("did not get the expected offsets from %s To UNICODE\n", codepage);
   1162             log_info("\nGot : ");
   1163             for(p=buffer;p<targ;p++)
   1164                 log_info("%d, ", offsetBuffer[p-buffer]);
   1165             log_info("\nExpected: ");
   1166             for(i=0; i<(targ-buffer); i++)
   1167                 log_info("%d, ", expectOffsets[i]);
   1168             log_info("\nGot result:");
   1169             for(i=0; i<(targ-buffer); i++)
   1170                 log_info("0x%04X,", buffer[i]);
   1171             log_info("\nFrom Input:");
   1172             for(i=0; i<(src-(const char *)source); i++)
   1173                 log_info("0x%02X,", (unsigned char)source[i]);
   1174             log_info("\n");
   1175         }
   1176     }
   1177     if(memcmp(buffer, expect, expectLen*2)){
   1178         log_err("String does not match. from codePage %s TO Unicode\n", codepage);
   1179         log_info("\nGot:");
   1180         printUSeqErr(buffer, expectLen);
   1181         log_info("\nExpected:");
   1182         printUSeqErr(expect, expectLen);
   1183         return FALSE;
   1184     }
   1185     else {
   1186         log_verbose("Matches!\n");
   1187     }
   1188 
   1189     return TRUE;
   1190 }
   1191 
   1192 
   1193 static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
   1194                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset)
   1195 {
   1196     UErrorCode status = U_ZERO_ERROR;
   1197     UConverter *conv = 0;
   1198     char    junkout[MAX_LENGTH]; /* FIX */
   1199     int32_t    junokout[MAX_LENGTH]; /* FIX */
   1200     char *p;
   1201     const UChar *src;
   1202     char *end;
   1203     char *targ;
   1204     int32_t *offs;
   1205     int i;
   1206     int32_t   realBufferSize;
   1207     char *realBufferEnd;
   1208     const UChar *realSourceEnd;
   1209     const UChar *sourceLimit;
   1210     UBool checkOffsets = TRUE;
   1211     UBool doFlush;
   1212 
   1213     UConverterFromUCallback oldAction = NULL;
   1214     const void* oldContext = NULL;
   1215 
   1216     for(i=0;i<MAX_LENGTH;i++)
   1217         junkout[i] = (char)0xF0;
   1218     for(i=0;i<MAX_LENGTH;i++)
   1219         junokout[i] = 0xFF;
   1220 
   1221     setNuConvTestName(codepage, "FROM");
   1222 
   1223     log_verbose("\n=========  %s\n", gNuConvTestName);
   1224 
   1225     conv = ucnv_open(codepage, &status);
   1226     if(U_FAILURE(status))
   1227     {
   1228         log_data_err("Couldn't open converter %s\n",codepage);
   1229         return TRUE;
   1230     }
   1231 
   1232     log_verbose("Converter opened..\n");
   1233     /*----setting the callback routine----*/
   1234     ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
   1235     if (U_FAILURE(status)) {
   1236         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
   1237     }
   1238     /*------------------------*/
   1239 
   1240     src = source;
   1241     targ = junkout;
   1242     offs = junokout;
   1243 
   1244     realBufferSize = UPRV_LENGTHOF(junkout);
   1245     realBufferEnd = junkout + realBufferSize;
   1246     realSourceEnd = source + sourceLen;
   1247 
   1248     if ( gOutBufferSize != realBufferSize )
   1249       checkOffsets = FALSE;
   1250 
   1251     if( gInBufferSize != MAX_LENGTH )
   1252       checkOffsets = FALSE;
   1253 
   1254     do
   1255     {
   1256         end = nct_min(targ + gOutBufferSize, realBufferEnd);
   1257         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
   1258 
   1259         doFlush = (UBool)(sourceLimit == realSourceEnd);
   1260 
   1261         if(targ == realBufferEnd)
   1262           {
   1263         log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
   1264         return FALSE;
   1265           }
   1266         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
   1267 
   1268 
   1269         status = U_ZERO_ERROR;
   1270         if(gInBufferSize ==999 && gOutBufferSize==999)
   1271             doFlush = FALSE;
   1272         ucnv_fromUnicode (conv,
   1273                   (char **)&targ,
   1274                   (const char *)end,
   1275                   &src,
   1276                   sourceLimit,
   1277                   offs,
   1278                   doFlush, /* flush if we're at the end of the input data */
   1279                   &status);
   1280         if(testReset)
   1281             ucnv_resetToUnicode(conv);
   1282         if(gInBufferSize ==999 && gOutBufferSize==999)
   1283             ucnv_resetToUnicode(conv);
   1284 
   1285       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
   1286 
   1287     if(U_FAILURE(status)) {
   1288         log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
   1289         return FALSE;
   1290       }
   1291 
   1292     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
   1293         sourceLen, targ-junkout);
   1294     if(getTestOption(VERBOSITY_OPTION))
   1295     {
   1296         char junk[999];
   1297         char offset_str[999];
   1298         char *ptr;
   1299 
   1300         junk[0] = 0;
   1301         offset_str[0] = 0;
   1302         for(ptr = junkout;ptr<targ;ptr++)
   1303         {
   1304             sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr);
   1305             sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]);
   1306         }
   1307 
   1308         log_verbose(junk);
   1309         printSeq((const unsigned char *)expect, expectLen);
   1310         if ( checkOffsets )
   1311           {
   1312             log_verbose("\nOffsets:");
   1313             log_verbose(offset_str);
   1314           }
   1315         log_verbose("\n");
   1316     }
   1317     ucnv_close(conv);
   1318 
   1319 
   1320     if(expectLen != targ-junkout)
   1321     {
   1322         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
   1323         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
   1324         log_info("\nGot:");
   1325         printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
   1326         log_info("\nExpected:");
   1327         printSeqErr((const unsigned char*)expect, expectLen);
   1328         return FALSE;
   1329     }
   1330 
   1331     if (checkOffsets && (expectOffsets != 0) )
   1332     {
   1333         log_verbose("comparing %d offsets..\n", targ-junkout);
   1334         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
   1335             log_err("did not get the expected offsets. %s", gNuConvTestName);
   1336             log_err("Got  : ");
   1337             printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
   1338             for(p=junkout;p<targ;p++)
   1339                 log_err("%d, ", junokout[p-junkout]);
   1340             log_err("\nExpected: ");
   1341             for(i=0; i<(targ-junkout); i++)
   1342                 log_err("%d,", expectOffsets[i]);
   1343         }
   1344     }
   1345 
   1346     log_verbose("comparing..\n");
   1347     if(!memcmp(junkout, expect, expectLen))
   1348     {
   1349         log_verbose("Matches!\n");
   1350         return TRUE;
   1351     }
   1352     else
   1353     {
   1354         log_err("String does not match. %s\n", gNuConvTestName);
   1355         printUSeqErr(source, sourceLen);
   1356         log_info("\nGot:");
   1357         printSeqErr((const unsigned char *)junkout, expectLen);
   1358         log_info("\nExpected:");
   1359         printSeqErr((const unsigned char *)expect, expectLen);
   1360 
   1361         return FALSE;
   1362     }
   1363 }
   1364 
   1365 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
   1366                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset)
   1367 {
   1368     UErrorCode status = U_ZERO_ERROR;
   1369     UConverter *conv = 0;
   1370     UChar    junkout[MAX_LENGTH]; /* FIX */
   1371     int32_t    junokout[MAX_LENGTH]; /* FIX */
   1372     const char *src;
   1373     const char *realSourceEnd;
   1374     const char *srcLimit;
   1375     UChar *p;
   1376     UChar *targ;
   1377     UChar *end;
   1378     int32_t *offs;
   1379     int i;
   1380     UBool   checkOffsets = TRUE;
   1381     int32_t   realBufferSize;
   1382     UChar *realBufferEnd;
   1383     UBool doFlush;
   1384 
   1385     UConverterToUCallback oldAction = NULL;
   1386     const void* oldContext = NULL;
   1387 
   1388 
   1389     for(i=0;i<MAX_LENGTH;i++)
   1390         junkout[i] = 0xFFFE;
   1391 
   1392     for(i=0;i<MAX_LENGTH;i++)
   1393         junokout[i] = -1;
   1394 
   1395     setNuConvTestName(codepage, "TO");
   1396 
   1397     log_verbose("\n=========  %s\n", gNuConvTestName);
   1398 
   1399     conv = ucnv_open(codepage, &status);
   1400     if(U_FAILURE(status))
   1401     {
   1402         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
   1403         return TRUE;
   1404     }
   1405 
   1406     log_verbose("Converter opened..\n");
   1407      /*----setting the callback routine----*/
   1408     ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
   1409     if (U_FAILURE(status)) {
   1410         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
   1411     }
   1412     /*-------------------------------------*/
   1413 
   1414     src = (const char *)source;
   1415     targ = junkout;
   1416     offs = junokout;
   1417 
   1418     realBufferSize = UPRV_LENGTHOF(junkout);
   1419     realBufferEnd = junkout + realBufferSize;
   1420     realSourceEnd = src + sourcelen;
   1421 
   1422     if ( gOutBufferSize != realBufferSize )
   1423       checkOffsets = FALSE;
   1424 
   1425     if( gInBufferSize != MAX_LENGTH )
   1426       checkOffsets = FALSE;
   1427 
   1428     do
   1429       {
   1430         end = nct_min( targ + gOutBufferSize, realBufferEnd);
   1431         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
   1432 
   1433         if(targ == realBufferEnd)
   1434         {
   1435             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
   1436             return FALSE;
   1437         }
   1438         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
   1439 
   1440         /* oldTarg = targ; */
   1441 
   1442         status = U_ZERO_ERROR;
   1443         doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : FALSE);
   1444 
   1445         ucnv_toUnicode (conv,
   1446                 &targ,
   1447                 end,
   1448                 (const char **)&src,
   1449                 (const char *)srcLimit,
   1450                 offs,
   1451                 doFlush, /* flush if we're at the end of hte source data */
   1452                 &status);
   1453         if(testReset)
   1454             ucnv_resetFromUnicode(conv);
   1455         if(gInBufferSize ==999 && gOutBufferSize==999)
   1456             ucnv_resetToUnicode(conv);
   1457         /*        offs += (targ-oldTarg); */
   1458 
   1459       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
   1460 
   1461     if(U_FAILURE(status))
   1462     {
   1463         log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
   1464         return FALSE;
   1465     }
   1466 
   1467     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
   1468         sourcelen, targ-junkout);
   1469     if(getTestOption(VERBOSITY_OPTION))
   1470     {
   1471         char junk[999];
   1472         char offset_str[999];
   1473 
   1474         UChar *ptr;
   1475 
   1476         junk[0] = 0;
   1477         offset_str[0] = 0;
   1478 
   1479         for(ptr = junkout;ptr<targ;ptr++)
   1480         {
   1481             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
   1482             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
   1483         }
   1484 
   1485         log_verbose(junk);
   1486 
   1487         if ( checkOffsets )
   1488           {
   1489             log_verbose("\nOffsets:");
   1490             log_verbose(offset_str);
   1491           }
   1492         log_verbose("\n");
   1493     }
   1494     ucnv_close(conv);
   1495 
   1496     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
   1497 
   1498     if (checkOffsets && (expectOffsets != 0))
   1499     {
   1500         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
   1501 
   1502             log_err("did not get the expected offsets. %s",gNuConvTestName);
   1503             for(p=junkout;p<targ;p++)
   1504                 log_err("%d, ", junokout[p-junkout]);
   1505             log_err("\nExpected: ");
   1506             for(i=0; i<(targ-junkout); i++)
   1507                 log_err("%d,", expectOffsets[i]);
   1508             log_err("");
   1509             for(i=0; i<(targ-junkout); i++)
   1510                 log_err("%X,", junkout[i]);
   1511             log_err("");
   1512             for(i=0; i<(src-(const char *)source); i++)
   1513                 log_err("%X,", (unsigned char)source[i]);
   1514         }
   1515     }
   1516 
   1517     if(!memcmp(junkout, expect, expectlen*2))
   1518     {
   1519         log_verbose("Matches!\n");
   1520         return TRUE;
   1521     }
   1522     else
   1523     {
   1524         log_err("String does not match. %s\n", gNuConvTestName);
   1525         log_verbose("String does not match. %s\n", gNuConvTestName);
   1526         log_info("\nGot:");
   1527         printUSeq(junkout, expectlen);
   1528         log_info("\nExpected:");
   1529         printUSeq(expect, expectlen);
   1530         return FALSE;
   1531     }
   1532 }
   1533 
   1534 
   1535 static void TestResetBehaviour(void){
   1536 #if !UCONFIG_NO_LEGACY_CONVERSION
   1537     log_verbose("Testing Reset for DBCS and MBCS\n");
   1538     {
   1539         static const UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
   1540         static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
   1541         static const int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
   1542 
   1543 
   1544         static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8};
   1545         static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7};
   1546         static const int32_t offsets1[] =  { 0,2,4,6};
   1547 
   1548         /*DBCS*/
   1549         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
   1550                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1551             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
   1552         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
   1553                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1554             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
   1555 
   1556         if(!testConvertToU(expected1, sizeof(expected1),
   1557                 sampleText1, UPRV_LENGTHOF(sampleText1), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1558                 offsets1, TRUE))
   1559            log_err("ibm-1363 -> did not match.\n");
   1560         /*MBCS*/
   1561         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
   1562                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1563             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
   1564         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
   1565                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1566             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
   1567 
   1568         if(!testConvertToU(expected1, sizeof(expected1),
   1569                 sampleText1, UPRV_LENGTHOF(sampleText1), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1570                 offsets1, TRUE))
   1571            log_err("ibm-1363 -> did not match.\n");
   1572 
   1573     }
   1574 
   1575     log_verbose("Testing Reset for ISO-2022-jp\n");
   1576     {
   1577         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
   1578 
   1579         static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
   1580                                     0x31,0x1A, 0x32};
   1581 
   1582 
   1583         static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
   1584 
   1585 
   1586         static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
   1587         static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
   1588                                     0x31,0x1A, 0x32};
   1589         static const int32_t offsets1[] =  { 3,5,10,11,12};
   1590 
   1591         /*iso-2022-jp*/
   1592         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
   1593                 expected, sizeof(expected), "iso-2022-jp",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1594             log_err("u-> not match.\n");
   1595         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
   1596                 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1597             log_err("u->  not match.\n");
   1598 
   1599         if(!testConvertToU(expected1, sizeof(expected1),
   1600                 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1601                 offsets1, TRUE))
   1602            log_err("iso-2022-jp -> did not match.\n");
   1603 
   1604     }
   1605 
   1606     /* BEGIN android-removed */
   1607     /* To save space, Android does not build full ISO-2022-CN tables.
   1608        We skip the tests for ISO-2022-CN. */
   1609     /*
   1610     log_verbose("Testing Reset for ISO-2022-cn\n");
   1611     {
   1612         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
   1613 
   1614         static const uint8_t expected[] = {
   1615                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
   1616                                     0x36, 0x21,
   1617                                     0x0f, 0x31,
   1618                                     0x1A,
   1619                                     0x32
   1620                                     };
   1621 
   1622 
   1623         static const int32_t offsets[] = {
   1624                                     0,    0,    0,    0,    0,    0,    0,
   1625                                     1,    1,
   1626                                     2,    2,
   1627                                     3,
   1628                                     5,  };
   1629 
   1630         UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
   1631         static const uint8_t expected1[] = {
   1632                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
   1633                                     0x36, 0x21,
   1634                                     0x1B, 0x24, 0x29, 0x47, 0x24, 0x22,
   1635                                     0x0f, 0x1A,
   1636                                     0x32
   1637                                     };
   1638         static const int32_t offsets1[] =  { 5,7,13,16,17};
   1639 
   1640         // iso-2022-CN  android-change
   1641         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
   1642                 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1643             log_err("u-> not match.\n");
   1644         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
   1645                 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1646             log_err("u-> not match.\n");
   1647 
   1648         if(!testConvertToU(expected1, sizeof(expected1),
   1649                 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1650                 offsets1, TRUE))
   1651            log_err("iso-2022-cn -> did not match.\n");
   1652     }
   1653     */
   1654     /* END android-removed */
   1655 
   1656         log_verbose("Testing Reset for ISO-2022-kr\n");
   1657     {
   1658         UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
   1659 
   1660         static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
   1661                                     0x0E, 0x6C, 0x69,
   1662                                     0x0f, 0x1A,
   1663                                     0x0e, 0x6F, 0x4B,
   1664                                     0x0F, 0x31,
   1665                                     0x1A,
   1666                                     0x32 };
   1667 
   1668         static const int32_t offsets[] = {-1, -1, -1, -1,
   1669                               0, 0, 0,
   1670                               1, 1,
   1671                               3, 3, 3,
   1672                               4, 4,
   1673                               5,
   1674                               7,
   1675                             };
   1676         static const UChar    sampleText1[] =   { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032};
   1677 
   1678         static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43,
   1679                                     0x0E, 0x6C, 0x69,
   1680                                     0x0f, 0x41,
   1681                                     0x0e, 0x6F, 0x4B,
   1682                                     0x0F, 0x31,
   1683                                     0x42,
   1684                                     0x32 };
   1685 
   1686         static const int32_t offsets1[] = {
   1687                               5, 8, 10,
   1688                               13, 14, 15
   1689 
   1690                             };
   1691         /*iso-2022-kr*/
   1692         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
   1693                 expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1694             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
   1695         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
   1696                 expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1697             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
   1698         if(!testConvertToU(expected1, sizeof(expected1),
   1699                 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1700                 offsets1, TRUE))
   1701            log_err("iso-2022-kr -> did not match.\n");
   1702     }
   1703 
   1704         log_verbose("Testing Reset for HZ\n");
   1705     {
   1706         static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
   1707 
   1708         static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
   1709                                     0x7E, 0x7D, 0x1A,
   1710                                     0x7E, 0x7B, 0x36, 0x21,
   1711                                     0x7E, 0x7D, 0x31,
   1712                                     0x1A,
   1713                                     0x32 };
   1714 
   1715 
   1716         static const int32_t offsets[] = {0,0,0,0,
   1717                              1,1,1,
   1718                              3,3,3,3,
   1719                              4,4,4,
   1720                              5,
   1721                              7,};
   1722         static const UChar    sampleText1[] =   { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032};
   1723 
   1724         static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B,
   1725                                     0x7E, 0x7D, 0x35,
   1726                                     0x7E, 0x7B, 0x36, 0x21,
   1727                                     0x7E, 0x7D, 0x31,
   1728                                     0x41,
   1729                                     0x32 };
   1730 
   1731 
   1732         static const int32_t offsets1[] = {2,6,9,13,14,15
   1733                             };
   1734 
   1735         /*hz*/
   1736         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
   1737                 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
   1738             log_err("u->  not match.\n");
   1739         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
   1740                 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1741             log_err("u->  not match.\n");
   1742         if(!testConvertToU(expected1, sizeof(expected1),
   1743                 sampleText1, UPRV_LENGTHOF(sampleText1), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1744                 offsets1, TRUE))
   1745            log_err("hz -> did not match.\n");
   1746     }
   1747 #endif
   1748 
   1749     /*UTF-8*/
   1750      log_verbose("Testing for UTF8\n");
   1751     {
   1752         static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
   1753         int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
   1754                            0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
   1755                            0x04, 0x06 };
   1756         static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
   1757             0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
   1758 
   1759 
   1760         static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
   1761         /*UTF-8*/
   1762         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
   1763             expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1764             log_err("u-> UTF8 with offsets and flush true did not match.\n");
   1765         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
   1766             expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
   1767             log_err("u-> UTF8 with offsets and flush true did not match.\n");
   1768         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
   1769             expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1770             log_err("u-> UTF8 with offsets and flush true did not match.\n");
   1771         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
   1772             expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
   1773             log_err("u-> UTF8 with offsets and flush true did not match.\n");
   1774         if(!testConvertToU(expected, sizeof(expected),
   1775             sampleText, UPRV_LENGTHOF(sampleText), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1776             log_err("UTF8 -> did not match.\n");
   1777         if(!testConvertToU(expected, sizeof(expected),
   1778             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1779             log_err("UTF8 -> did not match.\n");
   1780         if(!testConvertToU(expected, sizeof(expected),
   1781             sampleText, UPRV_LENGTHOF(sampleText), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE))
   1782             log_err("UTF8 -> did not match.\n");
   1783         if(!testConvertToU(expected, sizeof(expected),
   1784             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE))
   1785             log_err("UTF8 -> did not match.\n");
   1786 
   1787     }
   1788 
   1789 }
   1790 
   1791 /* Test that U_TRUNCATED_CHAR_FOUND is set. */
   1792 static void
   1793 doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) {
   1794     UConverter *cnv;
   1795 
   1796     UChar buffer[2];
   1797     UChar *target, *targetLimit;
   1798     const char *source, *sourceLimit;
   1799 
   1800     UErrorCode errorCode;
   1801 
   1802     errorCode=U_ZERO_ERROR;
   1803     cnv=ucnv_open(cnvName, &errorCode);
   1804     if(U_FAILURE(errorCode)) {
   1805         log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode));
   1806         return;
   1807     }
   1808     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
   1809     if(U_FAILURE(errorCode)) {
   1810         log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n",
   1811                     cnvName, u_errorName(errorCode));
   1812         ucnv_close(cnv);
   1813         return;
   1814     }
   1815 
   1816     source=(const char *)bytes;
   1817     sourceLimit=source+length;
   1818     target=buffer;
   1819     targetLimit=buffer+UPRV_LENGTHOF(buffer);
   1820 
   1821     /* 1. input bytes with flush=FALSE, then input nothing with flush=TRUE */
   1822     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &errorCode);
   1823     if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) {
   1824         log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=FALSE: %s, input left %d, output %d\n",
   1825                 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
   1826     }
   1827 
   1828     errorCode=U_ZERO_ERROR;
   1829     source=sourceLimit;
   1830     target=buffer;
   1831     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode);
   1832     if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) {
   1833         log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n",
   1834                 cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer));
   1835     }
   1836 
   1837     /* 2. input bytes with flush=TRUE */
   1838     ucnv_resetToUnicode(cnv);
   1839 
   1840     errorCode=U_ZERO_ERROR;
   1841     source=(const char *)bytes;
   1842     target=buffer;
   1843     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode);
   1844     if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) {
   1845         log_err("error TestTruncated(%s, 2): input bytes[%d], flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n",
   1846                 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
   1847     }
   1848 
   1849 
   1850     ucnv_close(cnv);
   1851 }
   1852 
   1853 static void
   1854 TestTruncated() {
   1855     static const struct {
   1856         const char *cnvName;
   1857         uint8_t bytes[8]; /* partial input bytes resulting in no output */
   1858         int32_t length;
   1859     } testCases[]={
   1860         { "IMAP-mailbox-name",  { 0x26 }, 1 }, /* & */
   1861         { "IMAP-mailbox-name",  { 0x26, 0x42 }, 2 }, /* &B */
   1862         { "IMAP-mailbox-name",  { 0x26, 0x42, 0x42 }, 3 }, /* &BB */
   1863         { "IMAP-mailbox-name",  { 0x26, 0x41, 0x41 }, 3 }, /* &AA */
   1864 
   1865         { "UTF-7",      { 0x2b, 0x42 }, 2 }, /* +B */
   1866         { "UTF-8",      { 0xd1 }, 1 },
   1867 
   1868         { "UTF-16BE",   { 0x4e }, 1 },
   1869         { "UTF-16LE",   { 0x4e }, 1 },
   1870         { "UTF-16",     { 0x4e }, 1 },
   1871         { "UTF-16",     { 0xff }, 1 },
   1872         { "UTF-16",     { 0xfe, 0xff, 0x4e }, 3 },
   1873 
   1874         { "UTF-32BE",   { 0, 0, 0x4e }, 3 },
   1875         { "UTF-32LE",   { 0x4e }, 1 },
   1876         { "UTF-32",     { 0, 0, 0x4e }, 3 },
   1877         { "UTF-32",     { 0xff }, 1 },
   1878         { "UTF-32",     { 0, 0, 0xfe, 0xff, 0 }, 5 },
   1879         { "SCSU",       { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */
   1880 
   1881 #if !UCONFIG_NO_LEGACY_CONVERSION
   1882         { "BOCU-1",     { 0xd5 }, 1 },
   1883 
   1884         { "Shift-JIS",  { 0xe0 }, 1 },
   1885 
   1886         { "ibm-939",    { 0x0e, 0x41 }, 2 } /* SO 0x41 */
   1887 #else
   1888         { "BOCU-1",     { 0xd5 }, 1 ,}
   1889 #endif
   1890     };
   1891     int32_t i;
   1892 
   1893     for(i=0; i<UPRV_LENGTHOF(testCases); ++i) {
   1894         doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length);
   1895     }
   1896 }
   1897 
   1898 typedef struct NameRange {
   1899     const char *name;
   1900     UChar32 start, end, start2, end2, notStart, notEnd;
   1901 } NameRange;
   1902 
   1903 static void
   1904 TestUnicodeSet() {
   1905     UErrorCode errorCode;
   1906     UConverter *cnv;
   1907     USet *set;
   1908     const char *name;
   1909     int32_t i, count;
   1910 
   1911     static const char *const completeSetNames[]={
   1912         "UTF-7",
   1913         "UTF-8",
   1914         "UTF-16",
   1915         "UTF-16BE",
   1916         "UTF-16LE",
   1917         "UTF-32",
   1918         "UTF-32BE",
   1919         "UTF-32LE",
   1920         "SCSU",
   1921         "BOCU-1",
   1922         "CESU-8",
   1923 #if !UCONFIG_NO_LEGACY_CONVERSION
   1924         "gb18030",
   1925 #endif
   1926         "IMAP-mailbox-name"
   1927     };
   1928 #if !UCONFIG_NO_LEGACY_CONVERSION
   1929     static const char *const lmbcsNames[]={
   1930         "LMBCS-1",
   1931         "LMBCS-2",
   1932         "LMBCS-3",
   1933         "LMBCS-4",
   1934         "LMBCS-5",
   1935         "LMBCS-6",
   1936         "LMBCS-8",
   1937         "LMBCS-11",
   1938         "LMBCS-16",
   1939         "LMBCS-17",
   1940         "LMBCS-18",
   1941         "LMBCS-19"
   1942     };
   1943 #endif
   1944 
   1945     static const NameRange nameRanges[]={
   1946         { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
   1947 #if !UCONFIG_NO_LEGACY_CONVERSION
   1948         { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
   1949 #endif
   1950         { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff },
   1951 #if !UCONFIG_NO_LEGACY_CONVERSION
   1952         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff },
   1953         { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff },
   1954         /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */
   1955         { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff }
   1956 #else
   1957         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }
   1958 #endif
   1959     };
   1960 
   1961     /* open an empty set */
   1962     set=uset_open(1, 0);
   1963 
   1964     count=ucnv_countAvailable();
   1965     for(i=0; i<count; ++i) {
   1966         errorCode=U_ZERO_ERROR;
   1967         name=ucnv_getAvailableName(i);
   1968         cnv=ucnv_open(name, &errorCode);
   1969         if(U_FAILURE(errorCode)) {
   1970             log_data_err("error: unable to open converter %s - %s\n",
   1971                     name, u_errorName(errorCode));
   1972             continue;
   1973         }
   1974 
   1975         uset_clear(set);
   1976         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
   1977         if(U_FAILURE(errorCode)) {
   1978             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
   1979                     name, u_errorName(errorCode));
   1980         } else if(uset_size(set)==0) {
   1981             log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name);
   1982         }
   1983 
   1984         ucnv_close(cnv);
   1985     }
   1986 
   1987     /* test converters that are known to convert all of Unicode (except maybe for surrogates) */
   1988     for(i=0; i<UPRV_LENGTHOF(completeSetNames); ++i) {
   1989         errorCode=U_ZERO_ERROR;
   1990         name=completeSetNames[i];
   1991         cnv=ucnv_open(name, &errorCode);
   1992         if(U_FAILURE(errorCode)) {
   1993             log_data_err("error: unable to open converter %s - %s\n",
   1994                     name, u_errorName(errorCode));
   1995             continue;
   1996         }
   1997 
   1998         uset_clear(set);
   1999         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
   2000         if(U_FAILURE(errorCode)) {
   2001             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
   2002                     name, u_errorName(errorCode));
   2003         } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) {
   2004             log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name);
   2005         }
   2006 
   2007         ucnv_close(cnv);
   2008     }
   2009 
   2010 #if !UCONFIG_NO_LEGACY_CONVERSION
   2011     /* test LMBCS variants which convert all of Unicode except for U+F6xx */
   2012     for(i=0; i<UPRV_LENGTHOF(lmbcsNames); ++i) {
   2013         errorCode=U_ZERO_ERROR;
   2014         name=lmbcsNames[i];
   2015         cnv=ucnv_open(name, &errorCode);
   2016         if(U_FAILURE(errorCode)) {
   2017             log_data_err("error: unable to open converter %s - %s\n",
   2018                     name, u_errorName(errorCode));
   2019             continue;
   2020         }
   2021 
   2022         uset_clear(set);
   2023         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
   2024         if(U_FAILURE(errorCode)) {
   2025             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
   2026                     name, u_errorName(errorCode));
   2027         } else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) {
   2028             log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name);
   2029         }
   2030 
   2031         ucnv_close(cnv);
   2032     }
   2033 #endif
   2034 
   2035     /* test specific sets */
   2036     for(i=0; i<UPRV_LENGTHOF(nameRanges); ++i) {
   2037         errorCode=U_ZERO_ERROR;
   2038         name=nameRanges[i].name;
   2039         cnv=ucnv_open(name, &errorCode);
   2040         if(U_FAILURE(errorCode)) {
   2041             log_data_err("error: unable to open converter %s - %s\n",
   2042                          name, u_errorName(errorCode));
   2043             continue;
   2044         }
   2045 
   2046         uset_clear(set);
   2047         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
   2048         if(U_FAILURE(errorCode)) {
   2049             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
   2050                     name, u_errorName(errorCode));
   2051         } else if(
   2052             !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) ||
   2053             (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2))
   2054         ) {
   2055             log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name);
   2056         } else if(nameRanges[i].notStart>=0) {
   2057             /* simulate containsAny() with the C API */
   2058             uset_complement(set);
   2059             if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) {
   2060                 log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name);
   2061             }
   2062         }
   2063 
   2064         ucnv_close(cnv);
   2065     }
   2066 
   2067     errorCode = U_ZERO_ERROR;
   2068     ucnv_getUnicodeSet(NULL, set, UCNV_ROUNDTRIP_SET, &errorCode);
   2069     if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
   2070         log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
   2071     }
   2072     errorCode = U_PARSE_ERROR;
   2073     /* Make sure that it does nothing if an error is passed in. Difficult to proper test for. */
   2074     ucnv_getUnicodeSet(NULL, NULL, UCNV_ROUNDTRIP_SET, &errorCode);
   2075     if (errorCode != U_PARSE_ERROR) {
   2076         log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
   2077     }
   2078 
   2079     uset_close(set);
   2080 }
   2081