Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2009, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /*****************************************************************************
      7 *
      8 * File CCONVTST.C
      9 *
     10 * Modification History:
     11 *        Name                     Description
     12 *   Madhu Katragadda              7/7/2000        Converter Tests for extended code coverage
     13 ******************************************************************************
     14 */
     15 #include <stdio.h>
     16 #include <stdlib.h>
     17 #include <string.h>
     18 #include "unicode/uloc.h"
     19 #include "unicode/ucnv.h"
     20 #include "unicode/utypes.h"
     21 #include "unicode/ustring.h"
     22 #include "unicode/uset.h"
     23 #include "cintltst.h"
     24 
     25 #define MAX_LENGTH 999
     26 
     27 #define UNICODE_LIMIT 0x10FFFF
     28 #define SURROGATE_HIGH_START    0xD800
     29 #define SURROGATE_LOW_END       0xDFFF
     30 
     31 static int32_t  gInBufferSize = 0;
     32 static int32_t  gOutBufferSize = 0;
     33 static char     gNuConvTestName[1024];
     34 
     35 #define nct_min(x,y)  ((x<y) ? x : y)
     36 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     37 
     38 static void printSeq(const unsigned char* a, int len);
     39 static void printSeqErr(const unsigned char* a, int len);
     40 static void printUSeq(const UChar* a, int len);
     41 static void printUSeqErr(const UChar* a, int len);
     42 static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
     43                 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
     44 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
     45                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
     46 
     47 static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
     48                 const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset);
     49 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
     50                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset);
     51 
     52 static void setNuConvTestName(const char *codepage, const char *direction)
     53 {
     54     sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
     55         codepage,
     56         direction,
     57         (int)gInBufferSize,
     58         (int)gOutBufferSize);
     59 }
     60 
     61 
     62 static void TestSurrogateBehaviour(void);
     63 static void TestErrorBehaviour(void);
     64 
     65 #if !UCONFIG_NO_LEGACY_CONVERSION
     66 static void TestToUnicodeErrorBehaviour(void);
     67 static void TestGetNextErrorBehaviour(void);
     68 #endif
     69 
     70 static void TestRegressionUTF8(void);
     71 static void TestRegressionUTF32(void);
     72 static void TestAvailableConverters(void);
     73 static void TestFlushInternalBuffer(void);  /*for improved code coverage in ucnv_cnv.c*/
     74 static void TestResetBehaviour(void);
     75 static void TestTruncated(void);
     76 static void TestUnicodeSet(void);
     77 
     78 static void TestWithBufferSize(int32_t osize, int32_t isize);
     79 
     80 
     81 static void printSeq(const unsigned char* a, int len)
     82 {
     83     int i=0;
     84     log_verbose("\n{");
     85     while (i<len)
     86         log_verbose("0x%02X ", a[i++]);
     87     log_verbose("}\n");
     88 }
     89 
     90 static void printUSeq(const UChar* a, int len)
     91 {
     92     int i=0;
     93     log_verbose("\n{");
     94     while (i<len)
     95         log_verbose("%0x04X ", a[i++]);
     96     log_verbose("}\n");
     97 }
     98 
     99 static void printSeqErr(const unsigned char* a, int len)
    100 {
    101     int i=0;
    102     fprintf(stderr, "\n{");
    103     while (i<len)  fprintf(stderr, "0x%02X ", a[i++]);
    104     fprintf(stderr, "}\n");
    105 }
    106 
    107 static void printUSeqErr(const UChar* a, int len)
    108 {
    109     int i=0;
    110     fprintf(stderr, "\n{");
    111     while (i<len)
    112         fprintf(stderr, "0x%04X ", a[i++]);
    113     fprintf(stderr,"}\n");
    114 }
    115 
    116 void addExtraTests(TestNode** root);
    117 
    118 void addExtraTests(TestNode** root)
    119 {
    120      addTest(root, &TestSurrogateBehaviour,         "tsconv/ncnvtst/TestSurrogateBehaviour");
    121      addTest(root, &TestErrorBehaviour,             "tsconv/ncnvtst/TestErrorBehaviour");
    122 
    123 #if !UCONFIG_NO_LEGACY_CONVERSION
    124      addTest(root, &TestToUnicodeErrorBehaviour,    "tsconv/ncnvtst/ToUnicodeErrorBehaviour");
    125      addTest(root, &TestGetNextErrorBehaviour,      "tsconv/ncnvtst/TestGetNextErrorBehaviour");
    126 #endif
    127 
    128      addTest(root, &TestAvailableConverters,        "tsconv/ncnvtst/TestAvailableConverters");
    129      addTest(root, &TestFlushInternalBuffer,        "tsconv/ncnvtst/TestFlushInternalBuffer");
    130      addTest(root, &TestResetBehaviour,             "tsconv/ncnvtst/TestResetBehaviour");
    131      addTest(root, &TestRegressionUTF8,             "tsconv/ncnvtst/TestRegressionUTF8");
    132      addTest(root, &TestRegressionUTF32,            "tsconv/ncnvtst/TestRegressionUTF32");
    133      addTest(root, &TestTruncated,                  "tsconv/ncnvtst/TestTruncated");
    134      addTest(root, &TestUnicodeSet,                 "tsconv/ncnvtst/TestUnicodeSet");
    135 }
    136 
    137 /*test surrogate behaviour*/
    138 static void TestSurrogateBehaviour(){
    139     log_verbose("Testing for SBCS and LATIN_1\n");
    140     {
    141         UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032};
    142         const uint8_t expected[] = {0x31, 0x1a, 0x32};
    143 
    144 #if !UCONFIG_NO_LEGACY_CONVERSION
    145         /*SBCS*/
    146         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    147                 expected, sizeof(expected), "ibm-920", 0 , TRUE, U_ZERO_ERROR))
    148             log_err("u-> ibm-920 [UCNV_SBCS] not match.\n");
    149 #endif
    150 
    151         /*LATIN_1*/
    152         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    153                 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR ))
    154             log_err("u-> LATIN_1 not match.\n");
    155 
    156     }
    157 
    158 #if !UCONFIG_NO_LEGACY_CONVERSION
    159     log_verbose("Testing for DBCS and MBCS\n");
    160     {
    161         UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
    162         const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
    163         int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
    164 
    165         /*DBCS*/
    166         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    167                 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR))
    168             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
    169         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    170                 expected, sizeof(expected), "ibm-1363", offsets , TRUE, U_ZERO_ERROR))
    171             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
    172         /*MBCS*/
    173         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    174                 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR))
    175             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
    176         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    177                 expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_ZERO_ERROR))
    178             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
    179     }
    180 
    181     log_verbose("Testing for ISO-2022-jp\n");
    182     {
    183         UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
    184 
    185         const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
    186                                     0x31,0x1A, 0x32};
    187 
    188 
    189         int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
    190 
    191         /*iso-2022-jp*/
    192         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    193                 expected, sizeof(expected), "iso-2022-jp", 0 , TRUE, U_ZERO_ERROR))
    194             log_err("u-> not match.\n");
    195         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    196                 expected, sizeof(expected), "iso-2022-jp", offsets , TRUE, U_ZERO_ERROR))
    197             log_err("u->  not match.\n");
    198     }
    199 
    200     log_verbose("Testing for ISO-2022-cn\n");
    201     {
    202         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
    203 
    204         static const uint8_t expected[] = {
    205                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
    206                                     0x36, 0x21,
    207                                     0x0F, 0x31,
    208                                     0x1A,
    209                                     0x32
    210                                     };
    211 
    212 
    213 
    214         static const int32_t offsets[] = {
    215                                     0,    0,    0,    0,    0,    0,    0,
    216                                     1,    1,
    217                                     2,    2,
    218                                     3,
    219                                     5,  };
    220 
    221         /*iso-2022-CN*/
    222         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    223                 expected, sizeof(expected), "iso-2022-cn", 0 , TRUE, U_ZERO_ERROR))
    224             log_err("u-> not match.\n");
    225         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    226                 expected, sizeof(expected), "iso-2022-cn", offsets , TRUE, U_ZERO_ERROR))
    227             log_err("u-> not match.\n");
    228     }
    229 
    230         log_verbose("Testing for ISO-2022-kr\n");
    231     {
    232         static const UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
    233 
    234         static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
    235                                     0x0E, 0x6C, 0x69,
    236                                     0x0f, 0x1A,
    237                                     0x0e, 0x6F, 0x4B,
    238                                     0x0F, 0x31,
    239                                     0x1A,
    240                                     0x32 };
    241 
    242         static const int32_t offsets[] = {-1, -1, -1, -1,
    243                               0, 0, 0,
    244                               1, 1,
    245                               3, 3, 3,
    246                               4, 4,
    247                               5,
    248                               7,
    249                             };
    250 
    251         /*iso-2022-kr*/
    252         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    253                 expected, sizeof(expected), "iso-2022-kr", 0 , TRUE, U_ZERO_ERROR))
    254             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
    255         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    256                 expected, sizeof(expected), "iso-2022-kr", offsets , TRUE, U_ZERO_ERROR))
    257             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
    258     }
    259 
    260         log_verbose("Testing for HZ\n");
    261     {
    262         static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
    263 
    264         static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
    265                                     0x7E, 0x7D, 0x1A,
    266                                     0x7E, 0x7B, 0x36, 0x21,
    267                                     0x7E, 0x7D, 0x31,
    268                                     0x1A,
    269                                     0x32 };
    270 
    271 
    272         static const int32_t offsets[] = {0,0,0,0,
    273                              1,1,1,
    274                              3,3,3,3,
    275                              4,4,4,
    276                              5,
    277                              7,};
    278 
    279         /*hz*/
    280         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    281                 expected, sizeof(expected), "HZ", 0 , TRUE, U_ZERO_ERROR))
    282             log_err("u-> HZ not match.\n");
    283         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    284                 expected, sizeof(expected), "HZ", offsets , TRUE, U_ZERO_ERROR))
    285             log_err("u-> HZ not match.\n");
    286     }
    287 #endif
    288 
    289     /*UTF-8*/
    290      log_verbose("Testing for UTF8\n");
    291     {
    292         static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
    293         static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
    294                            0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
    295                            0x04, 0x06 };
    296         static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
    297             0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
    298 
    299 
    300         static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
    301         /*UTF-8*/
    302         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    303             expected, sizeof(expected), "UTF8", offsets, TRUE, U_ZERO_ERROR ))
    304             log_err("u-> UTF8 with offsets and flush true did not match.\n");
    305         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    306             expected, sizeof(expected), "UTF8", 0, TRUE, U_ZERO_ERROR ))
    307             log_err("u-> UTF8 with offsets and flush true did not match.\n");
    308         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    309             expected, sizeof(expected), "UTF8", offsets, FALSE, U_ZERO_ERROR ))
    310             log_err("u-> UTF8 with offsets and flush true did not match.\n");
    311         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    312             expected, sizeof(expected), "UTF8", 0, FALSE, U_ZERO_ERROR ))
    313             log_err("u-> UTF8 with offsets and flush true did not match.\n");
    314 
    315         if(!convertToU(expected, sizeof(expected),
    316             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, TRUE, U_ZERO_ERROR ))
    317             log_err("UTF8 -> u did not match.\n");
    318         if(!convertToU(expected, sizeof(expected),
    319             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, FALSE, U_ZERO_ERROR ))
    320             log_err("UTF8 -> u did not match.\n");
    321         if(!convertToU(expected, sizeof(expected),
    322             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR ))
    323             log_err("UTF8 ->u  did not match.\n");
    324         if(!convertToU(expected, sizeof(expected),
    325             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR ))
    326             log_err("UTF8 -> u did not match.\n");
    327 
    328     }
    329 }
    330 
    331 /*test various error behaviours*/
    332 static void TestErrorBehaviour(){
    333     log_verbose("Testing for SBCS and LATIN_1\n");
    334     {
    335         static const UChar    sampleText[] =   { 0x0031, 0xd801};
    336         static const UChar    sampleText2[] =   { 0x0031, 0xd801, 0x0032};
    337         static const uint8_t expected0[] =          { 0x31};
    338         static const uint8_t expected[] =          { 0x31, 0x1a};
    339         static const uint8_t expected2[] =         { 0x31, 0x1a, 0x32};
    340 
    341 #if !UCONFIG_NO_LEGACY_CONVERSION
    342         /*SBCS*/
    343         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    344                 expected, sizeof(expected), "ibm-920", 0, TRUE, U_ZERO_ERROR))
    345             log_err("u-> ibm-920 [UCNV_SBCS] \n");
    346         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    347                 expected0, sizeof(expected0), "ibm-920", 0, FALSE, U_ZERO_ERROR))
    348             log_err("u-> ibm-920 [UCNV_SBCS] \n");
    349         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    350                 expected2, sizeof(expected2), "ibm-920", 0, TRUE, U_ZERO_ERROR))
    351             log_err("u-> ibm-920 [UCNV_SBCS] did not match\n");
    352 #endif
    353 
    354         /*LATIN_1*/
    355         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    356                 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR))
    357             log_err("u-> LATIN_1 is supposed to fail\n");
    358         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    359                 expected0, sizeof(expected0), "LATIN_1", 0, FALSE, U_ZERO_ERROR))
    360             log_err("u-> LATIN_1 is supposed to fail\n");
    361 
    362         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    363                 expected2, sizeof(expected2), "LATIN_1", 0, TRUE, U_ZERO_ERROR))
    364             log_err("u-> LATIN_1 did not match\n");
    365     }
    366 
    367 #if !UCONFIG_NO_LEGACY_CONVERSION
    368     log_verbose("Testing for DBCS and MBCS\n");
    369     {
    370         static const UChar    sampleText[]    = { 0x00a1, 0xd801};
    371         static const uint8_t expected[] = { 0xa2, 0xae};
    372         static const int32_t offsets[]        = { 0x00, 0x00};
    373         static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0};
    374         static const int32_t offsetsSUB[]        = { 0x00, 0x00, 0x01, 0x01};
    375 
    376         static const UChar       sampleText2[] = { 0x00a1, 0xd801, 0x00a4};
    377         static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
    378         static const int32_t offsets2[]        = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02};
    379 
    380         static const UChar       sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01};
    381         static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0};
    382         static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x01, 0x02, 0x02};
    383 
    384         /* BEGIN android-changed */
    385         /* Android uses a different EUC-JP table. We change one character,
    386          * choosing a mapping that is common to both tables. */
    387         static const UChar       sampleText4MBCS[] = { 0x0061, 0x9ED1, 0xdc01};
    388         static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xf4, 0xf8, 0xf4, 0xfe};
    389         /* static const UChar       sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01}; */
    390         /* static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe}; */
    391         /* END android-changed */
    392         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 };
    393 
    394         /*DBCS*/
    395         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    396                 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
    397             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
    398         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    399                 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_ZERO_ERROR))
    400             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
    401 
    402         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    403                 expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, TRUE, U_ZERO_ERROR))
    404             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
    405         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    406                 expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_ZERO_ERROR))
    407             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
    408 
    409 
    410         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    411                 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
    412             log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
    413         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    414                 expected2, sizeof(expected2), "ibm-1363", offsets2, TRUE, U_ZERO_ERROR))
    415             log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
    416 
    417         /*MBCS*/
    418         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    419                 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
    420             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    421         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    422                 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_ZERO_ERROR))
    423             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    424 
    425         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    426                 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
    427             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
    428         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    429                 expected2, sizeof(expected2), "ibm-1363", 0, FALSE, U_ZERO_ERROR))
    430             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
    431         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    432                 expected2, sizeof(expected2), "ibm-1363", offsets2, FALSE, U_ZERO_ERROR))
    433             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
    434 
    435         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    436                 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, TRUE, U_ZERO_ERROR))
    437             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    438         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    439                 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, FALSE, U_ZERO_ERROR))
    440             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    441 
    442         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    443                 expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, TRUE, U_ZERO_ERROR))
    444             log_err("u-> euc-jp [UCNV_MBCS] \n");
    445         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    446                 expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, FALSE, U_ZERO_ERROR))
    447             log_err("u-> euc-jp [UCNV_MBCS] \n");
    448     }
    449 
    450     /*iso-2022-jp*/
    451     log_verbose("Testing for iso-2022-jp\n");
    452     {
    453         static const UChar    sampleText[]    = { 0x0031, 0xd801};
    454         static const uint8_t expected[] = {  0x31};
    455         static const uint8_t expectedSUB[] = {  0x31, 0x1a};
    456         static const int32_t offsets[]        = { 0x00, 1};
    457 
    458         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
    459         static const uint8_t expected2[] = {  0x31,0x1A,0x32};
    460         static const int32_t offsets2[]        = { 0x00,0x01,0x02};
    461 
    462         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
    463         static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a};
    464         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 };
    465         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    466                 expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, TRUE, U_ZERO_ERROR))
    467             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
    468         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    469                 expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_ZERO_ERROR))
    470             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    471 
    472         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    473                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, TRUE, U_ZERO_ERROR))
    474             log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n");
    475         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    476                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
    477             log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
    478         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    479                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
    480             log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
    481 
    482         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    483                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR))
    484             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
    485         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    486                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, FALSE, U_ZERO_ERROR))
    487             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
    488     }
    489 
    490     /*iso-2022-cn*/
    491     log_verbose("Testing for iso-2022-cn\n");
    492     {
    493         static const UChar    sampleText[]    = { 0x0031, 0xd801};
    494         static const uint8_t expected[] = { 0x31};
    495         static const uint8_t expectedSUB[] = { 0x31, 0x1A};
    496         static const int32_t offsets[]        = { 0x00, 1};
    497 
    498         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
    499         static const uint8_t expected2[] = { 0x31, 0x1A,0x32};
    500         static const int32_t offsets2[]        = { 0x00, 0x01,0x02};
    501 
    502         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
    503         static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A};
    504         static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x02 };
    505 
    506         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
    507         static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a };
    508         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 };
    509         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    510                 expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, TRUE, U_ZERO_ERROR))
    511             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
    512         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    513                 expected, sizeof(expected), "iso-2022-cn", offsets, FALSE, U_ZERO_ERROR))
    514             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    515 
    516         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    517                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, TRUE, U_ZERO_ERROR))
    518             log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n");
    519         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    520                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR))
    521             log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
    522         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    523                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR))
    524             log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
    525 
    526         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    527                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, TRUE, U_ZERO_ERROR))
    528             log_err("u->iso-2022-cn [UCNV_MBCS] \n");
    529         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    530                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, FALSE, U_ZERO_ERROR))
    531             log_err("u-> iso-2022-cn[UCNV_MBCS] \n");
    532 
    533         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    534                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, TRUE, U_ZERO_ERROR))
    535             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
    536         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    537                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, FALSE, U_ZERO_ERROR))
    538             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
    539     }
    540 
    541     /*iso-2022-kr*/
    542     log_verbose("Testing for iso-2022-kr\n");
    543     {
    544         static const UChar    sampleText[]    = { 0x0031, 0xd801};
    545         static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31};
    546         static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A};
    547         static const int32_t offsets[]        = { -1,   -1,   -1,   -1,   0x00, 1};
    548 
    549         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
    550         static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32};
    551         static const int32_t offsets2[]        = { -1,   -1,   -1,   -1,   0x00, 0x01, 0x02};
    552 
    553         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
    554         static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43,  0x51, 0x50, 0x1A };
    555         static const int32_t offsets3MBCS[]        = { -1,   -1,   -1,   -1,    0x00, 0x01, 0x02, 0x02 };
    556 
    557         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    558                 expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR))
    559             log_err("u-> iso-2022-kr [UCNV_MBCS] \n");
    560         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    561                 expected, sizeof(expected), "iso-2022-kr", offsets, FALSE, U_ZERO_ERROR))
    562             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    563 
    564         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    565                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, TRUE, U_ZERO_ERROR))
    566             log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n");
    567         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    568                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR))
    569             log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
    570         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    571                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR))
    572             log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
    573 
    574         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    575                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, TRUE, U_ZERO_ERROR))
    576             log_err("u->iso-2022-kr [UCNV_MBCS] \n");
    577         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    578                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR))
    579             log_err("u-> iso-2022-kr[UCNV_MBCS] \n");
    580     }
    581 
    582     /*HZ*/
    583     log_verbose("Testing for HZ\n");
    584     {
    585         static const UChar    sampleText[]    = { 0x0031, 0xd801};
    586         static const uint8_t expected[] = { 0x7e, 0x7d, 0x31};
    587         static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A};
    588         static const int32_t offsets[]        = { 0x00, 0x00, 0x00, 1};
    589 
    590         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
    591         static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31,  0x1A,  0x32 };
    592         static const int32_t offsets2[]        = { 0x00, 0x00, 0x00, 0x01,  0x02 };
    593 
    594         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
    595         static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50,  0x1A };
    596         static const int32_t offsets3MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x02};
    597 
    598         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
    599         static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a };
    600         static const int32_t offsets4MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 };
    601         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    602                 expectedSUB, sizeof(expectedSUB), "HZ", offsets, TRUE, U_ZERO_ERROR))
    603             log_err("u-> HZ [UCNV_MBCS] \n");
    604         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    605                 expected, sizeof(expected), "HZ", offsets, FALSE, U_ZERO_ERROR))
    606             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    607 
    608         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    609                 expected2, sizeof(expected2), "HZ", offsets2, TRUE, U_ZERO_ERROR))
    610             log_err("u->HZ[UCNV_DBCS] did not match\n");
    611         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    612                 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR))
    613             log_err("u-> HZ [UCNV_DBCS] did not match\n");
    614         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    615                 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR))
    616             log_err("u-> HZ [UCNV_DBCS] did not match\n");
    617 
    618         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    619                 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, TRUE, U_ZERO_ERROR))
    620             log_err("u->HZ [UCNV_MBCS] \n");
    621         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    622                 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, FALSE, U_ZERO_ERROR))
    623             log_err("u-> HZ[UCNV_MBCS] \n");
    624 
    625         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    626                 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, TRUE, U_ZERO_ERROR))
    627             log_err("u-> HZ [UCNV_MBCS] \n");
    628         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    629                 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, FALSE, U_ZERO_ERROR))
    630             log_err("u-> HZ [UCNV_MBCS] \n");
    631     }
    632 #endif
    633 }
    634 
    635 #if !UCONFIG_NO_LEGACY_CONVERSION
    636 /*test different convertToUnicode error behaviours*/
    637 static void TestToUnicodeErrorBehaviour()
    638 {
    639     log_verbose("Testing error conditions for DBCS\n");
    640     {
    641         uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04};
    642         const UChar expected[] = { 0x00a1 };
    643 
    644         if(!convertToU(sampleText, sizeof(sampleText),
    645                 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, TRUE, U_ZERO_ERROR ))
    646             log_err("DBCS (ibm-1363)->Unicode  did not match.\n");
    647         if(!convertToU(sampleText, sizeof(sampleText),
    648                 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, FALSE, U_ZERO_ERROR ))
    649             log_err("DBCS (ibm-1363)->Unicode  with flush = false did not match.\n");
    650     }
    651     log_verbose("Testing error conditions for SBCS\n");
    652     {
    653         uint8_t sampleText[] = { 0xa2, 0xFF};
    654         const UChar expected[] = { 0x00c2 };
    655 
    656       /*  uint8_t sampleText2[] = { 0xa2, 0x70 };
    657         const UChar expected2[] = { 0x0073 };*/
    658 
    659         if(!convertToU(sampleText, sizeof(sampleText),
    660                 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, TRUE, U_ZERO_ERROR ))
    661             log_err("SBCS (ibm-1051)->Unicode  did not match.\n");
    662         if(!convertToU(sampleText, sizeof(sampleText),
    663                 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, FALSE, U_ZERO_ERROR ))
    664             log_err("SBCS (ibm-1051)->Unicode  with flush = false did not match.\n");
    665 
    666     }
    667 }
    668 
    669 static void TestGetNextErrorBehaviour(){
    670    /*Test for unassigned character*/
    671 #define INPUT_SIZE 1
    672     static const char input1[INPUT_SIZE]={ 0x70 };
    673     const char* source=(const char*)input1;
    674     UErrorCode err=U_ZERO_ERROR;
    675     UChar32 c=0;
    676     UConverter *cnv=ucnv_open("ibm-424", &err);
    677     if(U_FAILURE(err)) {
    678         log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err));
    679         return;
    680     }
    681     c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err);
    682     if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){
    683         log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n",  myErrorName(err), c);
    684     }
    685     ucnv_close(cnv);
    686 }
    687 #endif
    688 
    689 #define MAX_UTF16_LEN 2
    690 #define MAX_UTF8_LEN 4
    691 
    692 /*Regression test for utf8 converter*/
    693 static void TestRegressionUTF8(){
    694     UChar32 currCh = 0;
    695     int32_t offset8;
    696     int32_t offset16;
    697     UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
    698     uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH);
    699 
    700     while (currCh <= UNICODE_LIMIT) {
    701         offset16 = 0;
    702         offset8 = 0;
    703         while(currCh <= UNICODE_LIMIT
    704             && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN)
    705             && offset8 < (MAX_LENGTH - MAX_UTF8_LEN))
    706         {
    707             if (currCh == SURROGATE_HIGH_START) {
    708                 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
    709             }
    710             UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh);
    711             UTF8_APPEND_CHAR_SAFE(utf8, offset8, MAX_LENGTH, currCh);
    712             currCh++;
    713         }
    714         if(!convertFromU(standardForm, offset16,
    715             utf8, offset8, "UTF8", 0, TRUE, U_ZERO_ERROR )) {
    716             log_err("Unicode->UTF8 did not match.\n");
    717         }
    718         if(!convertToU(utf8, offset8,
    719             standardForm, offset16, "UTF8", 0, TRUE, U_ZERO_ERROR )) {
    720             log_err("UTF8->Unicode did not match.\n");
    721         }
    722     }
    723 
    724     free(standardForm);
    725     free(utf8);
    726 
    727     {
    728         static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 };
    729         static const UChar expected[] = { 0x0301, 0x0300 };
    730         UConverter *conv8;
    731         UErrorCode err = U_ZERO_ERROR;
    732         UChar pivotBuffer[100];
    733         const UChar* const pivEnd = pivotBuffer + 100;
    734         const char* srcBeg;
    735         const char* srcEnd;
    736         UChar* pivBeg;
    737 
    738         conv8 = ucnv_open("UTF-8", &err);
    739 
    740         srcBeg = src8;
    741         pivBeg = pivotBuffer;
    742         srcEnd = src8 + 3;
    743         ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
    744         if (srcBeg != srcEnd) {
    745             log_err("Did not consume whole buffer on first call.\n");
    746         }
    747 
    748         srcEnd = src8 + 4;
    749         ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
    750         if (srcBeg != srcEnd) {
    751             log_err("Did not consume whole buffer on second call.\n");
    752         }
    753 
    754         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
    755             log_err("Did not get expected results for UTF-8.\n");
    756         }
    757         ucnv_close(conv8);
    758     }
    759 }
    760 
    761 #define MAX_UTF32_LEN 1
    762 
    763 static void TestRegressionUTF32(){
    764     UChar32 currCh = 0;
    765     int32_t offset32;
    766     int32_t offset16;
    767     UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
    768     UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32));
    769 
    770     while (currCh <= UNICODE_LIMIT) {
    771         offset16 = 0;
    772         offset32 = 0;
    773         while(currCh <= UNICODE_LIMIT
    774             && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN)
    775             && offset32 < (MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN))
    776         {
    777             if (currCh == SURROGATE_HIGH_START) {
    778                 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
    779             }
    780             UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh);
    781             UTF32_APPEND_CHAR_SAFE(utf32, offset32, MAX_LENGTH, currCh);
    782             currCh++;
    783         }
    784         if(!convertFromU(standardForm, offset16,
    785             (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) {
    786             log_err("Unicode->UTF32 did not match.\n");
    787         }
    788         if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32),
    789             standardForm, offset16, "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) {
    790             log_err("UTF32->Unicode did not match.\n");
    791         }
    792     }
    793     free(standardForm);
    794     free(utf32);
    795 
    796     {
    797         /* Check for lone surrogate error handling. */
    798         static const UChar   sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 };
    799         static const UChar   sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 };
    800         static const uint8_t expectedUTF32BE[] = {
    801             0x00, 0x00, 0x00, 0x31,
    802             0x00, 0x00, 0xff, 0xfd,
    803             0x00, 0x00, 0x00, 0x32
    804         };
    805         static const uint8_t expectedUTF32LE[] = {
    806             0x31, 0x00, 0x00, 0x00,
    807             0xfd, 0xff, 0x00, 0x00,
    808             0x32, 0x00, 0x00, 0x00
    809         };
    810         static const int32_t offsetsUTF32[] = {
    811             0x00, 0x00, 0x00, 0x00,
    812             0x01, 0x01, 0x01, 0x01,
    813             0x02, 0x02, 0x02, 0x02
    814         };
    815 
    816         if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]),
    817                 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR))
    818             log_err("u->UTF-32BE\n");
    819         if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]),
    820                 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR))
    821             log_err("u->UTF-32BE\n");
    822 
    823         if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]),
    824                 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR))
    825             log_err("u->UTF-32LE\n");
    826         if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]),
    827                 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR))
    828             log_err("u->UTF-32LE\n");
    829     }
    830 
    831     {
    832         static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 };
    833         static const UChar expected[] = { 0x0031, 0x0030 };
    834         UConverter *convBE;
    835         UErrorCode err = U_ZERO_ERROR;
    836         UChar pivotBuffer[100];
    837         const UChar* const pivEnd = pivotBuffer + 100;
    838         const char* srcBeg;
    839         const char* srcEnd;
    840         UChar* pivBeg;
    841 
    842         convBE = ucnv_open("UTF-32BE", &err);
    843 
    844         srcBeg = srcBE;
    845         pivBeg = pivotBuffer;
    846         srcEnd = srcBE + 5;
    847         ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
    848         if (srcBeg != srcEnd) {
    849             log_err("Did not consume whole buffer on first call.\n");
    850         }
    851 
    852         srcEnd = srcBE + 8;
    853         ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
    854         if (srcBeg != srcEnd) {
    855             log_err("Did not consume whole buffer on second call.\n");
    856         }
    857 
    858         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
    859             log_err("Did not get expected results for UTF-32BE.\n");
    860         }
    861         ucnv_close(convBE);
    862     }
    863     {
    864         static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 };
    865         static const UChar expected[] = { 0x0031, 0x0030 };
    866         UConverter *convLE;
    867         UErrorCode err = U_ZERO_ERROR;
    868         UChar pivotBuffer[100];
    869         const UChar* const pivEnd = pivotBuffer + 100;
    870         const char* srcBeg;
    871         const char* srcEnd;
    872         UChar* pivBeg;
    873 
    874         convLE = ucnv_open("UTF-32LE", &err);
    875 
    876         srcBeg = srcLE;
    877         pivBeg = pivotBuffer;
    878         srcEnd = srcLE + 5;
    879         ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
    880         if (srcBeg != srcEnd) {
    881             log_err("Did not consume whole buffer on first call.\n");
    882         }
    883 
    884         srcEnd = srcLE + 8;
    885         ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
    886         if (srcBeg != srcEnd) {
    887             log_err("Did not consume whole buffer on second call.\n");
    888         }
    889 
    890         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
    891             log_err("Did not get expected results for UTF-32LE.\n");
    892         }
    893         ucnv_close(convLE);
    894     }
    895 }
    896 
    897 /*Walk through the available converters*/
    898 static void TestAvailableConverters(){
    899     UErrorCode status=U_ZERO_ERROR;
    900     UConverter *conv=NULL;
    901     int32_t i=0;
    902     for(i=0; i < ucnv_countAvailable(); i++){
    903         status=U_ZERO_ERROR;
    904         conv=ucnv_open(ucnv_getAvailableName(i), &status);
    905         if(U_FAILURE(status)){
    906             log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n",
    907                         ucnv_getAvailableName(i), myErrorName(status));
    908             continue;
    909         }
    910         ucnv_close(conv);
    911     }
    912 
    913 }
    914 
    915 static void TestFlushInternalBuffer(){
    916     TestWithBufferSize(MAX_LENGTH, 1);
    917     TestWithBufferSize(1, 1);
    918     TestWithBufferSize(1, MAX_LENGTH);
    919     TestWithBufferSize(MAX_LENGTH, MAX_LENGTH);
    920 }
    921 
    922 static void TestWithBufferSize(int32_t insize, int32_t outsize){
    923 
    924     gInBufferSize =insize;
    925     gOutBufferSize = outsize;
    926 
    927      log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
    928     {
    929         UChar    sampleText[] =
    930             { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09,  0x002E  };
    931         const uint8_t expectedUTF8[] =
    932             { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
    933         int32_t  toUTF8Offs[] =
    934             { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
    935        /* int32_t fmUTF8Offs[] =
    936             { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/
    937 
    938         /*UTF-8*/
    939         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    940             expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,FALSE))
    941              log_err("u-> UTF8 did not match.\n");
    942     }
    943 
    944 #if !UCONFIG_NO_LEGACY_CONVERSION
    945      log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE  \n");
    946     {
    947         UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
    948         const uint8_t toIBM943[]= { 0x61,
    949             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
    950             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
    951             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
    952             0x61 };
    953         int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
    954 
    955         if(!testConvertFromU(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
    956                 toIBM943, sizeof(toIBM943), "ibm-943",
    957                 (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,FALSE))
    958             log_err("u-> ibm-943 with subst with value did not match.\n");
    959     }
    960 #endif
    961 
    962      log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
    963     {
    964         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
    965             0xe0, 0x80,  0x61};
    966         UChar    expected1[] = {  0x0031, 0x4e8c, 0xfffd, 0x0061};
    967         int32_t offsets1[] = {   0x0000, 0x0001, 0x0004, 0x0006};
    968 
    969         if(!testConvertToU(sampleText1, sizeof(sampleText1),
    970                  expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE))
    971             log_err("utf8->u with substitute did not match.\n");;
    972     }
    973 
    974 #if !UCONFIG_NO_LEGACY_CONVERSION
    975     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
    976     /*to Unicode*/
    977     {
    978         const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
    979             0x81, 0xad, /*unassigned*/
    980             0x89, 0xd3 };
    981         UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
    982             0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
    983             0x7B87};
    984         int32_t  fromIBM943Offs [] =    { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
    985 
    986         if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU),
    987                  IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943",
    988                 (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,FALSE))
    989             log_err("ibm-943->u with substitute with value did not match.\n");
    990 
    991     }
    992 #endif
    993 }
    994 
    995 static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
    996                 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
    997 {
    998 
    999     int32_t i=0;
   1000     char *p=0;
   1001     const UChar *src;
   1002     char buffer[MAX_LENGTH];
   1003     int32_t offsetBuffer[MAX_LENGTH];
   1004     int32_t *offs=0;
   1005     char *targ;
   1006     char *targetLimit;
   1007     UChar *sourceLimit=0;
   1008     UErrorCode status = U_ZERO_ERROR;
   1009     UConverter *conv = 0;
   1010     conv = ucnv_open(codepage, &status);
   1011     if(U_FAILURE(status))
   1012     {
   1013         log_data_err("Couldn't open converter %s\n",codepage);
   1014         return TRUE;
   1015     }
   1016     log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
   1017 
   1018     for(i=0; i<MAX_LENGTH; i++){
   1019         buffer[i]=(char)0xF0;
   1020         offsetBuffer[i]=0xFF;
   1021     }
   1022 
   1023     src=source;
   1024     sourceLimit=(UChar*)src+(sourceLen);
   1025     targ=buffer;
   1026     targetLimit=targ+MAX_LENGTH;
   1027     offs=offsetBuffer;
   1028     ucnv_fromUnicode (conv,
   1029                   (char **)&targ,
   1030                   (const char *)targetLimit,
   1031                   &src,
   1032                   sourceLimit,
   1033                   expectOffsets ? offs : NULL,
   1034                   doFlush,
   1035                   &status);
   1036     ucnv_close(conv);
   1037     if(status != expectedStatus){
   1038           log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
   1039           return FALSE;
   1040     }
   1041 
   1042     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
   1043         sourceLen, targ-buffer);
   1044 
   1045     if(expectLen != targ-buffer)
   1046     {
   1047         log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
   1048         log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
   1049         printSeqErr((const unsigned char *)buffer, (int32_t)(targ-buffer));
   1050         printSeqErr((const unsigned char*)expect, expectLen);
   1051         return FALSE;
   1052     }
   1053 
   1054     if(memcmp(buffer, expect, expectLen)){
   1055         log_err("String does not match. FROM Unicode to codePage%s\n", codepage);
   1056         log_info("\nGot:");
   1057         printSeqErr((const unsigned char *)buffer, expectLen);
   1058         log_info("\nExpected:");
   1059         printSeqErr((const unsigned char *)expect, expectLen);
   1060         return FALSE;
   1061     }
   1062     else {
   1063         log_verbose("Matches!\n");
   1064     }
   1065 
   1066     if (expectOffsets != 0){
   1067         log_verbose("comparing %d offsets..\n", targ-buffer);
   1068         if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){
   1069             log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage);
   1070             log_info("\nGot  : ");
   1071             printSeqErr((const unsigned char*)buffer, (int32_t)(targ-buffer));
   1072             for(p=buffer;p<targ;p++)
   1073                 log_info("%d, ", offsetBuffer[p-buffer]);
   1074             log_info("\nExpected: ");
   1075             for(i=0; i< (targ-buffer); i++)
   1076                 log_info("%d,", expectOffsets[i]);
   1077         }
   1078     }
   1079 
   1080     return TRUE;
   1081 }
   1082 
   1083 
   1084 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
   1085                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
   1086 {
   1087     UErrorCode status = U_ZERO_ERROR;
   1088     UConverter *conv = 0;
   1089     int32_t i=0;
   1090     UChar *p=0;
   1091     const char* src;
   1092     UChar buffer[MAX_LENGTH];
   1093     int32_t offsetBuffer[MAX_LENGTH];
   1094     int32_t *offs=0;
   1095     UChar *targ;
   1096     UChar *targetLimit;
   1097     uint8_t *sourceLimit=0;
   1098 
   1099 
   1100 
   1101     conv = ucnv_open(codepage, &status);
   1102     if(U_FAILURE(status))
   1103     {
   1104         log_data_err("Couldn't open converter %s\n",codepage);
   1105         return TRUE;
   1106     }
   1107     log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
   1108 
   1109 
   1110 
   1111     for(i=0; i<MAX_LENGTH; i++){
   1112         buffer[i]=0xFFFE;
   1113         offsetBuffer[i]=-1;
   1114     }
   1115 
   1116     src=(const char *)source;
   1117     sourceLimit=(uint8_t*)(src+(sourceLen));
   1118     targ=buffer;
   1119     targetLimit=targ+MAX_LENGTH;
   1120     offs=offsetBuffer;
   1121 
   1122 
   1123 
   1124     ucnv_toUnicode (conv,
   1125                 &targ,
   1126                 targetLimit,
   1127                 (const char **)&src,
   1128                 (const char *)sourceLimit,
   1129                 expectOffsets ? offs : NULL,
   1130                 doFlush,
   1131                 &status);
   1132 
   1133     ucnv_close(conv);
   1134     if(status != expectedStatus){
   1135           log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
   1136           return FALSE;
   1137     }
   1138     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
   1139         sourceLen, targ-buffer);
   1140 
   1141 
   1142 
   1143 
   1144     log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2);
   1145 
   1146     if (expectOffsets != 0) {
   1147         if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){
   1148 
   1149             log_err("did not get the expected offsets from %s To UNICODE\n", codepage);
   1150             log_info("\nGot : ");
   1151             for(p=buffer;p<targ;p++)
   1152                 log_info("%d, ", offsetBuffer[p-buffer]);
   1153             log_info("\nExpected: ");
   1154             for(i=0; i<(targ-buffer); i++)
   1155                 log_info("%d, ", expectOffsets[i]);
   1156             log_info("\nGot result:");
   1157             for(i=0; i<(targ-buffer); i++)
   1158                 log_info("0x%04X,", buffer[i]);
   1159             log_info("\nFrom Input:");
   1160             for(i=0; i<(src-(const char *)source); i++)
   1161                 log_info("0x%02X,", (unsigned char)source[i]);
   1162             log_info("\n");
   1163         }
   1164     }
   1165     if(memcmp(buffer, expect, expectLen*2)){
   1166         log_err("String does not match. from codePage %s TO Unicode\n", codepage);
   1167         log_info("\nGot:");
   1168         printUSeqErr(buffer, expectLen);
   1169         log_info("\nExpected:");
   1170         printUSeqErr(expect, expectLen);
   1171         return FALSE;
   1172     }
   1173     else {
   1174         log_verbose("Matches!\n");
   1175     }
   1176 
   1177     return TRUE;
   1178 }
   1179 
   1180 
   1181 static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
   1182                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset)
   1183 {
   1184     UErrorCode status = U_ZERO_ERROR;
   1185     UConverter *conv = 0;
   1186     char    junkout[MAX_LENGTH]; /* FIX */
   1187     int32_t    junokout[MAX_LENGTH]; /* FIX */
   1188     char *p;
   1189     const UChar *src;
   1190     char *end;
   1191     char *targ;
   1192     int32_t *offs;
   1193     int i;
   1194     int32_t   realBufferSize;
   1195     char *realBufferEnd;
   1196     const UChar *realSourceEnd;
   1197     const UChar *sourceLimit;
   1198     UBool checkOffsets = TRUE;
   1199     UBool doFlush;
   1200 
   1201     UConverterFromUCallback oldAction = NULL;
   1202     const void* oldContext = NULL;
   1203 
   1204     for(i=0;i<MAX_LENGTH;i++)
   1205         junkout[i] = (char)0xF0;
   1206     for(i=0;i<MAX_LENGTH;i++)
   1207         junokout[i] = 0xFF;
   1208 
   1209     setNuConvTestName(codepage, "FROM");
   1210 
   1211     log_verbose("\n=========  %s\n", gNuConvTestName);
   1212 
   1213     conv = ucnv_open(codepage, &status);
   1214     if(U_FAILURE(status))
   1215     {
   1216         log_data_err("Couldn't open converter %s\n",codepage);
   1217         return TRUE;
   1218     }
   1219 
   1220     log_verbose("Converter opened..\n");
   1221     /*----setting the callback routine----*/
   1222     ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
   1223     if (U_FAILURE(status)) {
   1224         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
   1225     }
   1226     /*------------------------*/
   1227 
   1228     src = source;
   1229     targ = junkout;
   1230     offs = junokout;
   1231 
   1232     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
   1233     realBufferEnd = junkout + realBufferSize;
   1234     realSourceEnd = source + sourceLen;
   1235 
   1236     if ( gOutBufferSize != realBufferSize )
   1237       checkOffsets = FALSE;
   1238 
   1239     if( gInBufferSize != MAX_LENGTH )
   1240       checkOffsets = FALSE;
   1241 
   1242     do
   1243     {
   1244         end = nct_min(targ + gOutBufferSize, realBufferEnd);
   1245         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
   1246 
   1247         doFlush = (UBool)(sourceLimit == realSourceEnd);
   1248 
   1249         if(targ == realBufferEnd)
   1250           {
   1251         log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
   1252         return FALSE;
   1253           }
   1254         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
   1255 
   1256 
   1257         status = U_ZERO_ERROR;
   1258         if(gInBufferSize ==999 && gOutBufferSize==999)
   1259             doFlush = FALSE;
   1260         ucnv_fromUnicode (conv,
   1261                   (char **)&targ,
   1262                   (const char *)end,
   1263                   &src,
   1264                   sourceLimit,
   1265                   offs,
   1266                   doFlush, /* flush if we're at the end of the input data */
   1267                   &status);
   1268         if(testReset)
   1269             ucnv_resetToUnicode(conv);
   1270         if(gInBufferSize ==999 && gOutBufferSize==999)
   1271             ucnv_resetToUnicode(conv);
   1272 
   1273       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
   1274 
   1275     if(U_FAILURE(status)) {
   1276         log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
   1277         return FALSE;
   1278       }
   1279 
   1280     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
   1281         sourceLen, targ-junkout);
   1282     if(VERBOSITY)
   1283     {
   1284         char junk[999];
   1285         char offset_str[999];
   1286         char *ptr;
   1287 
   1288         junk[0] = 0;
   1289         offset_str[0] = 0;
   1290         for(ptr = junkout;ptr<targ;ptr++)
   1291         {
   1292             sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr);
   1293             sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]);
   1294         }
   1295 
   1296         log_verbose(junk);
   1297         printSeq((const unsigned char *)expect, expectLen);
   1298         if ( checkOffsets )
   1299           {
   1300             log_verbose("\nOffsets:");
   1301             log_verbose(offset_str);
   1302           }
   1303         log_verbose("\n");
   1304     }
   1305     ucnv_close(conv);
   1306 
   1307 
   1308     if(expectLen != targ-junkout)
   1309     {
   1310         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
   1311         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
   1312         log_info("\nGot:");
   1313         printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
   1314         log_info("\nExpected:");
   1315         printSeqErr((const unsigned char*)expect, expectLen);
   1316         return FALSE;
   1317     }
   1318 
   1319     if (checkOffsets && (expectOffsets != 0) )
   1320     {
   1321         log_verbose("comparing %d offsets..\n", targ-junkout);
   1322         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
   1323             log_err("did not get the expected offsets. %s", gNuConvTestName);
   1324             log_err("Got  : ");
   1325             printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
   1326             for(p=junkout;p<targ;p++)
   1327                 log_err("%d, ", junokout[p-junkout]);
   1328             log_err("\nExpected: ");
   1329             for(i=0; i<(targ-junkout); i++)
   1330                 log_err("%d,", expectOffsets[i]);
   1331         }
   1332     }
   1333 
   1334     log_verbose("comparing..\n");
   1335     if(!memcmp(junkout, expect, expectLen))
   1336     {
   1337         log_verbose("Matches!\n");
   1338         return TRUE;
   1339     }
   1340     else
   1341     {
   1342         log_err("String does not match. %s\n", gNuConvTestName);
   1343         printUSeqErr(source, sourceLen);
   1344         log_info("\nGot:");
   1345         printSeqErr((const unsigned char *)junkout, expectLen);
   1346         log_info("\nExpected:");
   1347         printSeqErr((const unsigned char *)expect, expectLen);
   1348 
   1349         return FALSE;
   1350     }
   1351 }
   1352 
   1353 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
   1354                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset)
   1355 {
   1356     UErrorCode status = U_ZERO_ERROR;
   1357     UConverter *conv = 0;
   1358     UChar    junkout[MAX_LENGTH]; /* FIX */
   1359     int32_t    junokout[MAX_LENGTH]; /* FIX */
   1360     const char *src;
   1361     const char *realSourceEnd;
   1362     const char *srcLimit;
   1363     UChar *p;
   1364     UChar *targ;
   1365     UChar *end;
   1366     int32_t *offs;
   1367     int i;
   1368     UBool   checkOffsets = TRUE;
   1369     int32_t   realBufferSize;
   1370     UChar *realBufferEnd;
   1371     UBool doFlush;
   1372 
   1373     UConverterToUCallback oldAction = NULL;
   1374     const void* oldContext = NULL;
   1375 
   1376 
   1377     for(i=0;i<MAX_LENGTH;i++)
   1378         junkout[i] = 0xFFFE;
   1379 
   1380     for(i=0;i<MAX_LENGTH;i++)
   1381         junokout[i] = -1;
   1382 
   1383     setNuConvTestName(codepage, "TO");
   1384 
   1385     log_verbose("\n=========  %s\n", gNuConvTestName);
   1386 
   1387     conv = ucnv_open(codepage, &status);
   1388     if(U_FAILURE(status))
   1389     {
   1390         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
   1391         return TRUE;
   1392     }
   1393 
   1394     log_verbose("Converter opened..\n");
   1395      /*----setting the callback routine----*/
   1396     ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
   1397     if (U_FAILURE(status)) {
   1398         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
   1399     }
   1400     /*-------------------------------------*/
   1401 
   1402     src = (const char *)source;
   1403     targ = junkout;
   1404     offs = junokout;
   1405 
   1406     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
   1407     realBufferEnd = junkout + realBufferSize;
   1408     realSourceEnd = src + sourcelen;
   1409 
   1410     if ( gOutBufferSize != realBufferSize )
   1411       checkOffsets = FALSE;
   1412 
   1413     if( gInBufferSize != MAX_LENGTH )
   1414       checkOffsets = FALSE;
   1415 
   1416     do
   1417       {
   1418         end = nct_min( targ + gOutBufferSize, realBufferEnd);
   1419         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
   1420 
   1421         if(targ == realBufferEnd)
   1422         {
   1423             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
   1424             return FALSE;
   1425         }
   1426         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
   1427 
   1428         /* oldTarg = targ; */
   1429 
   1430         status = U_ZERO_ERROR;
   1431         doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : FALSE);
   1432 
   1433         ucnv_toUnicode (conv,
   1434                 &targ,
   1435                 end,
   1436                 (const char **)&src,
   1437                 (const char *)srcLimit,
   1438                 offs,
   1439                 doFlush, /* flush if we're at the end of hte source data */
   1440                 &status);
   1441         if(testReset)
   1442             ucnv_resetFromUnicode(conv);
   1443         if(gInBufferSize ==999 && gOutBufferSize==999)
   1444             ucnv_resetToUnicode(conv);
   1445         /*        offs += (targ-oldTarg); */
   1446 
   1447       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
   1448 
   1449     if(U_FAILURE(status))
   1450     {
   1451         log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
   1452         return FALSE;
   1453     }
   1454 
   1455     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
   1456         sourcelen, targ-junkout);
   1457     if(VERBOSITY)
   1458     {
   1459         char junk[999];
   1460         char offset_str[999];
   1461 
   1462         UChar *ptr;
   1463 
   1464         junk[0] = 0;
   1465         offset_str[0] = 0;
   1466 
   1467         for(ptr = junkout;ptr<targ;ptr++)
   1468         {
   1469             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
   1470             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
   1471         }
   1472 
   1473         log_verbose(junk);
   1474 
   1475         if ( checkOffsets )
   1476           {
   1477             log_verbose("\nOffsets:");
   1478             log_verbose(offset_str);
   1479           }
   1480         log_verbose("\n");
   1481     }
   1482     ucnv_close(conv);
   1483 
   1484     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
   1485 
   1486     if (checkOffsets && (expectOffsets != 0))
   1487     {
   1488         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
   1489 
   1490             log_err("did not get the expected offsets. %s",gNuConvTestName);
   1491             for(p=junkout;p<targ;p++)
   1492                 log_err("%d, ", junokout[p-junkout]);
   1493             log_err("\nExpected: ");
   1494             for(i=0; i<(targ-junkout); i++)
   1495                 log_err("%d,", expectOffsets[i]);
   1496             log_err("");
   1497             for(i=0; i<(targ-junkout); i++)
   1498                 log_err("%X,", junkout[i]);
   1499             log_err("");
   1500             for(i=0; i<(src-(const char *)source); i++)
   1501                 log_err("%X,", (unsigned char)source[i]);
   1502         }
   1503     }
   1504 
   1505     if(!memcmp(junkout, expect, expectlen*2))
   1506     {
   1507         log_verbose("Matches!\n");
   1508         return TRUE;
   1509     }
   1510     else
   1511     {
   1512         log_err("String does not match. %s\n", gNuConvTestName);
   1513         log_verbose("String does not match. %s\n", gNuConvTestName);
   1514         log_info("\nGot:");
   1515         printUSeq(junkout, expectlen);
   1516         log_info("\nExpected:");
   1517         printUSeq(expect, expectlen);
   1518         return FALSE;
   1519     }
   1520 }
   1521 
   1522 
   1523 static void TestResetBehaviour(void){
   1524 #if !UCONFIG_NO_LEGACY_CONVERSION
   1525     log_verbose("Testing Reset for DBCS and MBCS\n");
   1526     {
   1527         static const UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
   1528         static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
   1529         static const int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
   1530 
   1531 
   1532         static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8};
   1533         static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7};
   1534         static const int32_t offsets1[] =  { 0,2,4,6};
   1535 
   1536         /*DBCS*/
   1537         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1538                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1539             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
   1540         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1541                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1542             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
   1543 
   1544         if(!testConvertToU(expected1, sizeof(expected1),
   1545                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1546                 offsets1, TRUE))
   1547            log_err("ibm-1363 -> did not match.\n");
   1548         /*MBCS*/
   1549         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1550                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1551             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
   1552         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1553                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1554             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
   1555 
   1556         if(!testConvertToU(expected1, sizeof(expected1),
   1557                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1558                 offsets1, TRUE))
   1559            log_err("ibm-1363 -> did not match.\n");
   1560 
   1561     }
   1562 
   1563     log_verbose("Testing Reset for ISO-2022-jp\n");
   1564     {
   1565         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
   1566 
   1567         static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
   1568                                     0x31,0x1A, 0x32};
   1569 
   1570 
   1571         static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
   1572 
   1573 
   1574         static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
   1575         static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
   1576                                     0x31,0x1A, 0x32};
   1577         static const int32_t offsets1[] =  { 3,5,10,11,12};
   1578 
   1579         /*iso-2022-jp*/
   1580         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1581                 expected, sizeof(expected), "iso-2022-jp",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1582             log_err("u-> not match.\n");
   1583         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1584                 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1585             log_err("u->  not match.\n");
   1586 
   1587         if(!testConvertToU(expected1, sizeof(expected1),
   1588                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1589                 offsets1, TRUE))
   1590            log_err("iso-2022-jp -> did not match.\n");
   1591 
   1592     }
   1593 
   1594     log_verbose("Testing Reset for ISO-2022-cn\n");
   1595     {
   1596         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
   1597 
   1598         static const uint8_t expected[] = {
   1599                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
   1600                                     0x36, 0x21,
   1601                                     0x0f, 0x31,
   1602                                     0x1A,
   1603                                     0x32
   1604                                     };
   1605 
   1606 
   1607         static const int32_t offsets[] = {
   1608                                     0,    0,    0,    0,    0,    0,    0,
   1609                                     1,    1,
   1610                                     2,    2,
   1611                                     3,
   1612                                     5,  };
   1613 
   1614         UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
   1615         static const uint8_t expected1[] = {
   1616                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
   1617                                     0x36, 0x21,
   1618                                     0x1B, 0x24, 0x29, 0x47, 0x24, 0x22,
   1619                                     0x0f, 0x1A,
   1620                                     0x32
   1621                                     };
   1622         static const int32_t offsets1[] =  { 5,7,13,16,17};
   1623 
   1624         /*iso-2022-CN*/
   1625         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1626                 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1627             log_err("u-> not match.\n");
   1628         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1629                 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1630             log_err("u-> not match.\n");
   1631 
   1632         if(!testConvertToU(expected1, sizeof(expected1),
   1633                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1634                 offsets1, TRUE))
   1635            log_err("iso-2022-cn -> did not match.\n");
   1636     }
   1637 
   1638         log_verbose("Testing Reset for ISO-2022-kr\n");
   1639     {
   1640         UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
   1641 
   1642         static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
   1643                                     0x0E, 0x6C, 0x69,
   1644                                     0x0f, 0x1A,
   1645                                     0x0e, 0x6F, 0x4B,
   1646                                     0x0F, 0x31,
   1647                                     0x1A,
   1648                                     0x32 };
   1649 
   1650         static const int32_t offsets[] = {-1, -1, -1, -1,
   1651                               0, 0, 0,
   1652                               1, 1,
   1653                               3, 3, 3,
   1654                               4, 4,
   1655                               5,
   1656                               7,
   1657                             };
   1658         static const UChar    sampleText1[] =   { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032};
   1659 
   1660         static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43,
   1661                                     0x0E, 0x6C, 0x69,
   1662                                     0x0f, 0x41,
   1663                                     0x0e, 0x6F, 0x4B,
   1664                                     0x0F, 0x31,
   1665                                     0x42,
   1666                                     0x32 };
   1667 
   1668         static const int32_t offsets1[] = {
   1669                               5, 8, 10,
   1670                               13, 14, 15
   1671 
   1672                             };
   1673         /*iso-2022-kr*/
   1674         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1675                 expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1676             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
   1677         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1678                 expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1679             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
   1680         if(!testConvertToU(expected1, sizeof(expected1),
   1681                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1682                 offsets1, TRUE))
   1683            log_err("iso-2022-kr -> did not match.\n");
   1684     }
   1685 
   1686         log_verbose("Testing Reset for HZ\n");
   1687     {
   1688         static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
   1689 
   1690         static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
   1691                                     0x7E, 0x7D, 0x1A,
   1692                                     0x7E, 0x7B, 0x36, 0x21,
   1693                                     0x7E, 0x7D, 0x31,
   1694                                     0x1A,
   1695                                     0x32 };
   1696 
   1697 
   1698         static const int32_t offsets[] = {0,0,0,0,
   1699                              1,1,1,
   1700                              3,3,3,3,
   1701                              4,4,4,
   1702                              5,
   1703                              7,};
   1704         static const UChar    sampleText1[] =   { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032};
   1705 
   1706         static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B,
   1707                                     0x7E, 0x7D, 0x35,
   1708                                     0x7E, 0x7B, 0x36, 0x21,
   1709                                     0x7E, 0x7D, 0x31,
   1710                                     0x41,
   1711                                     0x32 };
   1712 
   1713 
   1714         static const int32_t offsets1[] = {2,6,9,13,14,15
   1715                             };
   1716 
   1717         /*hz*/
   1718         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1719                 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
   1720             log_err("u->  not match.\n");
   1721         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1722                 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1723             log_err("u->  not match.\n");
   1724         if(!testConvertToU(expected1, sizeof(expected1),
   1725                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1726                 offsets1, TRUE))
   1727            log_err("hz -> did not match.\n");
   1728     }
   1729 #endif
   1730 
   1731     /*UTF-8*/
   1732      log_verbose("Testing for UTF8\n");
   1733     {
   1734         static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
   1735         int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
   1736                            0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
   1737                            0x04, 0x06 };
   1738         static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
   1739             0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
   1740 
   1741 
   1742         static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
   1743         /*UTF-8*/
   1744         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1745             expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1746             log_err("u-> UTF8 with offsets and flush true did not match.\n");
   1747         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1748             expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
   1749             log_err("u-> UTF8 with offsets and flush true did not match.\n");
   1750         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1751             expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1752             log_err("u-> UTF8 with offsets and flush true did not match.\n");
   1753         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1754             expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
   1755             log_err("u-> UTF8 with offsets and flush true did not match.\n");
   1756         if(!testConvertToU(expected, sizeof(expected),
   1757             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1758             log_err("UTF8 -> did not match.\n");
   1759         if(!testConvertToU(expected, sizeof(expected),
   1760             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1761             log_err("UTF8 -> did not match.\n");
   1762         if(!testConvertToU(expected, sizeof(expected),
   1763             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE))
   1764             log_err("UTF8 -> did not match.\n");
   1765         if(!testConvertToU(expected, sizeof(expected),
   1766             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE))
   1767             log_err("UTF8 -> did not match.\n");
   1768 
   1769     }
   1770 
   1771 }
   1772 
   1773 /* Test that U_TRUNCATED_CHAR_FOUND is set. */
   1774 static void
   1775 doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) {
   1776     UConverter *cnv;
   1777 
   1778     UChar buffer[2];
   1779     UChar *target, *targetLimit;
   1780     const char *source, *sourceLimit;
   1781 
   1782     UErrorCode errorCode;
   1783 
   1784     errorCode=U_ZERO_ERROR;
   1785     cnv=ucnv_open(cnvName, &errorCode);
   1786     if(U_FAILURE(errorCode)) {
   1787         log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode));
   1788         return;
   1789     }
   1790     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
   1791     if(U_FAILURE(errorCode)) {
   1792         log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n",
   1793                     cnvName, u_errorName(errorCode));
   1794         ucnv_close(cnv);
   1795         return;
   1796     }
   1797 
   1798     source=(const char *)bytes;
   1799     sourceLimit=source+length;
   1800     target=buffer;
   1801     targetLimit=buffer+LENGTHOF(buffer);
   1802 
   1803     /* 1. input bytes with flush=FALSE, then input nothing with flush=TRUE */
   1804     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &errorCode);
   1805     if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) {
   1806         log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=FALSE: %s, input left %d, output %d\n",
   1807                 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
   1808     }
   1809 
   1810     errorCode=U_ZERO_ERROR;
   1811     source=sourceLimit;
   1812     target=buffer;
   1813     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode);
   1814     if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) {
   1815         log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n",
   1816                 cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer));
   1817     }
   1818 
   1819     /* 2. input bytes with flush=TRUE */
   1820     ucnv_resetToUnicode(cnv);
   1821 
   1822     errorCode=U_ZERO_ERROR;
   1823     source=(const char *)bytes;
   1824     target=buffer;
   1825     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode);
   1826     if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) {
   1827         log_err("error TestTruncated(%s, 2): input bytes[%d], flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n",
   1828                 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
   1829     }
   1830 
   1831 
   1832     ucnv_close(cnv);
   1833 }
   1834 
   1835 static void
   1836 TestTruncated() {
   1837     static const struct {
   1838         const char *cnvName;
   1839         uint8_t bytes[8]; /* partial input bytes resulting in no output */
   1840         int32_t length;
   1841     } testCases[]={
   1842         { "IMAP-mailbox-name",  { 0x26 }, 1 }, /* & */
   1843         { "IMAP-mailbox-name",  { 0x26, 0x42 }, 2 }, /* &B */
   1844         { "IMAP-mailbox-name",  { 0x26, 0x42, 0x42 }, 3 }, /* &BB */
   1845         { "IMAP-mailbox-name",  { 0x26, 0x41, 0x41 }, 3 }, /* &AA */
   1846 
   1847         { "UTF-7",      { 0x2b, 0x42 }, 2 }, /* +B */
   1848         { "UTF-8",      { 0xd1 }, 1 },
   1849 
   1850         { "UTF-16BE",   { 0x4e }, 1 },
   1851         { "UTF-16LE",   { 0x4e }, 1 },
   1852         { "UTF-16",     { 0x4e }, 1 },
   1853         { "UTF-16",     { 0xff }, 1 },
   1854         { "UTF-16",     { 0xfe, 0xff, 0x4e }, 3 },
   1855 
   1856         { "UTF-32BE",   { 0, 0, 0x4e }, 3 },
   1857         { "UTF-32LE",   { 0x4e }, 1 },
   1858         { "UTF-32",     { 0, 0, 0x4e }, 3 },
   1859         { "UTF-32",     { 0xff }, 1 },
   1860         { "UTF-32",     { 0, 0, 0xfe, 0xff, 0 }, 5 },
   1861         { "SCSU",       { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */
   1862 
   1863 #if !UCONFIG_NO_LEGACY_CONVERSION
   1864         { "BOCU-1",     { 0xd5 }, 1 },
   1865 
   1866         { "Shift-JIS",  { 0xe0 }, 1 },
   1867 
   1868         { "ibm-939",    { 0x0e, 0x41 }, 2 } /* SO 0x41 */
   1869 #else
   1870         { "BOCU-1",     { 0xd5 }, 1 ,}
   1871 #endif
   1872     };
   1873     int32_t i;
   1874 
   1875     for(i=0; i<LENGTHOF(testCases); ++i) {
   1876         doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length);
   1877     }
   1878 }
   1879 
   1880 typedef struct NameRange {
   1881     const char *name;
   1882     UChar32 start, end, start2, end2, notStart, notEnd;
   1883 } NameRange;
   1884 
   1885 static void
   1886 TestUnicodeSet() {
   1887     UErrorCode errorCode;
   1888     UConverter *cnv;
   1889     USet *set;
   1890     const char *name;
   1891     int32_t i, count;
   1892 
   1893     static const char *const completeSetNames[]={
   1894         "UTF-7",
   1895         "UTF-8",
   1896         "UTF-16",
   1897         "UTF-16BE",
   1898         "UTF-16LE",
   1899         "UTF-32",
   1900         "UTF-32BE",
   1901         "UTF-32LE",
   1902         "SCSU",
   1903         "BOCU-1",
   1904         "CESU-8",
   1905 #if !UCONFIG_NO_LEGACY_CONVERSION
   1906         "gb18030",
   1907 #endif
   1908         "IMAP-mailbox-name"
   1909     };
   1910 
   1911     static const char *const lmbcsNames[]={
   1912 #if !UCONFIG_NO_LEGACY_CONVERSION
   1913         "LMBCS-1",
   1914         "LMBCS-2",
   1915         "LMBCS-3",
   1916         "LMBCS-4",
   1917         "LMBCS-5",
   1918         "LMBCS-6",
   1919         "LMBCS-8",
   1920         "LMBCS-11",
   1921         "LMBCS-16",
   1922         "LMBCS-17",
   1923         "LMBCS-18",
   1924         "LMBCS-19"
   1925 #endif
   1926     };
   1927 
   1928     static const NameRange nameRanges[]={
   1929         { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
   1930 #if !UCONFIG_NO_LEGACY_CONVERSION
   1931         { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
   1932 #endif
   1933         { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff },
   1934 #if !UCONFIG_NO_LEGACY_CONVERSION
   1935         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff },
   1936         { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff },
   1937         /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */
   1938         { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff }
   1939 #else
   1940         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }
   1941 #endif
   1942     };
   1943 
   1944     /* open an empty set */
   1945     set=uset_open(1, 0);
   1946 
   1947     count=ucnv_countAvailable();
   1948     for(i=0; i<count; ++i) {
   1949         errorCode=U_ZERO_ERROR;
   1950         name=ucnv_getAvailableName(i);
   1951         cnv=ucnv_open(name, &errorCode);
   1952         if(U_FAILURE(errorCode)) {
   1953             log_data_err("error: unable to open converter %s - %s\n",
   1954                     name, u_errorName(errorCode));
   1955             continue;
   1956         }
   1957 
   1958         uset_clear(set);
   1959         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
   1960         if(U_FAILURE(errorCode)) {
   1961             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
   1962                     name, u_errorName(errorCode));
   1963         } else if(uset_size(set)==0) {
   1964             log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name);
   1965         }
   1966 
   1967         ucnv_close(cnv);
   1968     }
   1969 
   1970     /* test converters that are known to convert all of Unicode (except maybe for surrogates) */
   1971     for(i=0; i<LENGTHOF(completeSetNames); ++i) {
   1972         errorCode=U_ZERO_ERROR;
   1973         name=completeSetNames[i];
   1974         cnv=ucnv_open(name, &errorCode);
   1975         if(U_FAILURE(errorCode)) {
   1976             log_data_err("error: unable to open converter %s - %s\n",
   1977                     name, u_errorName(errorCode));
   1978             continue;
   1979         }
   1980 
   1981         uset_clear(set);
   1982         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
   1983         if(U_FAILURE(errorCode)) {
   1984             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
   1985                     name, u_errorName(errorCode));
   1986         } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) {
   1987             log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name);
   1988         }
   1989 
   1990         ucnv_close(cnv);
   1991     }
   1992 
   1993     /* test LMBCS variants which convert all of Unicode except for U+F6xx */
   1994     for(i=0; i<LENGTHOF(lmbcsNames); ++i) {
   1995         errorCode=U_ZERO_ERROR;
   1996         name=lmbcsNames[i];
   1997         cnv=ucnv_open(name, &errorCode);
   1998         if(U_FAILURE(errorCode)) {
   1999             log_data_err("error: unable to open converter %s - %s\n",
   2000                     name, u_errorName(errorCode));
   2001             continue;
   2002         }
   2003 
   2004         uset_clear(set);
   2005         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
   2006         if(U_FAILURE(errorCode)) {
   2007             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
   2008                     name, u_errorName(errorCode));
   2009         } else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) {
   2010             log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name);
   2011         }
   2012 
   2013         ucnv_close(cnv);
   2014     }
   2015 
   2016     /* test specific sets */
   2017     for(i=0; i<LENGTHOF(nameRanges); ++i) {
   2018         errorCode=U_ZERO_ERROR;
   2019         name=nameRanges[i].name;
   2020         cnv=ucnv_open(name, &errorCode);
   2021         if(U_FAILURE(errorCode)) {
   2022             log_data_err("error: unable to open converter %s - %s\n",
   2023                          name, u_errorName(errorCode));
   2024             continue;
   2025         }
   2026 
   2027         uset_clear(set);
   2028         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
   2029         if(U_FAILURE(errorCode)) {
   2030             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
   2031                     name, u_errorName(errorCode));
   2032         } else if(
   2033             !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) ||
   2034             (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2))
   2035         ) {
   2036             log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name);
   2037         } else if(nameRanges[i].notStart>=0) {
   2038             /* simulate containsAny() with the C API */
   2039             uset_complement(set);
   2040             if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) {
   2041                 log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name);
   2042             }
   2043         }
   2044 
   2045         ucnv_close(cnv);
   2046     }
   2047 
   2048     errorCode = U_ZERO_ERROR;
   2049     ucnv_getUnicodeSet(NULL, set, UCNV_ROUNDTRIP_SET, &errorCode);
   2050     if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
   2051         log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
   2052     }
   2053     errorCode = U_PARSE_ERROR;
   2054     /* Make sure that it does nothing if an error is passed in. Difficult to proper test for. */
   2055     ucnv_getUnicodeSet(NULL, NULL, UCNV_ROUNDTRIP_SET, &errorCode);
   2056     if (errorCode != U_PARSE_ERROR) {
   2057         log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
   2058     }
   2059 
   2060     uset_close(set);
   2061 }
   2062