Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2009, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /*****************************************************************************
      7 *
      8 * File CCONVTST.C
      9 *
     10 * Modification History:
     11 *        Name                     Description
     12 *   Madhu Katragadda              7/7/2000        Converter Tests for extended code coverage
     13 ******************************************************************************
     14 */
     15 #include <stdio.h>
     16 #include <stdlib.h>
     17 #include <string.h>
     18 #include "unicode/uloc.h"
     19 #include "unicode/ucnv.h"
     20 #include "unicode/utypes.h"
     21 #include "unicode/ustring.h"
     22 #include "unicode/uset.h"
     23 #include "cintltst.h"
     24 
     25 #define MAX_LENGTH 999
     26 
     27 #define UNICODE_LIMIT 0x10FFFF
     28 #define SURROGATE_HIGH_START    0xD800
     29 #define SURROGATE_LOW_END       0xDFFF
     30 
     31 static int32_t  gInBufferSize = 0;
     32 static int32_t  gOutBufferSize = 0;
     33 static char     gNuConvTestName[1024];
     34 
     35 #define nct_min(x,y)  ((x<y) ? x : y)
     36 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     37 
     38 static void printSeq(const unsigned char* a, int len);
     39 static void printSeqErr(const unsigned char* a, int len);
     40 static void printUSeq(const UChar* a, int len);
     41 static void printUSeqErr(const UChar* a, int len);
     42 static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
     43                 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
     44 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
     45                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
     46 
     47 static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
     48                 const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset);
     49 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
     50                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset);
     51 
     52 static void setNuConvTestName(const char *codepage, const char *direction)
     53 {
     54     sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
     55         codepage,
     56         direction,
     57         (int)gInBufferSize,
     58         (int)gOutBufferSize);
     59 }
     60 
     61 
     62 static void TestSurrogateBehaviour(void);
     63 static void TestErrorBehaviour(void);
     64 
     65 #if !UCONFIG_NO_LEGACY_CONVERSION
     66 static void TestToUnicodeErrorBehaviour(void);
     67 static void TestGetNextErrorBehaviour(void);
     68 #endif
     69 
     70 static void TestRegressionUTF8(void);
     71 static void TestRegressionUTF32(void);
     72 static void TestAvailableConverters(void);
     73 static void TestFlushInternalBuffer(void);  /*for improved code coverage in ucnv_cnv.c*/
     74 static void TestResetBehaviour(void);
     75 static void TestTruncated(void);
     76 static void TestUnicodeSet(void);
     77 
     78 static void TestWithBufferSize(int32_t osize, int32_t isize);
     79 
     80 
     81 static void printSeq(const unsigned char* a, int len)
     82 {
     83     int i=0;
     84     log_verbose("\n{");
     85     while (i<len)
     86         log_verbose("0x%02X ", a[i++]);
     87     log_verbose("}\n");
     88 }
     89 
     90 static void printUSeq(const UChar* a, int len)
     91 {
     92     int i=0;
     93     log_verbose("\n{");
     94     while (i<len)
     95         log_verbose("%0x04X ", a[i++]);
     96     log_verbose("}\n");
     97 }
     98 
     99 static void printSeqErr(const unsigned char* a, int len)
    100 {
    101     int i=0;
    102     fprintf(stderr, "\n{");
    103     while (i<len)  fprintf(stderr, "0x%02X ", a[i++]);
    104     fprintf(stderr, "}\n");
    105 }
    106 
    107 static void printUSeqErr(const UChar* a, int len)
    108 {
    109     int i=0;
    110     fprintf(stderr, "\n{");
    111     while (i<len)
    112         fprintf(stderr, "0x%04X ", a[i++]);
    113     fprintf(stderr,"}\n");
    114 }
    115 
    116 void addExtraTests(TestNode** root);
    117 
    118 void addExtraTests(TestNode** root)
    119 {
    120      addTest(root, &TestSurrogateBehaviour,         "tsconv/ncnvtst/TestSurrogateBehaviour");
    121      addTest(root, &TestErrorBehaviour,             "tsconv/ncnvtst/TestErrorBehaviour");
    122 
    123 #if !UCONFIG_NO_LEGACY_CONVERSION
    124      addTest(root, &TestToUnicodeErrorBehaviour,    "tsconv/ncnvtst/ToUnicodeErrorBehaviour");
    125      addTest(root, &TestGetNextErrorBehaviour,      "tsconv/ncnvtst/TestGetNextErrorBehaviour");
    126 #endif
    127 
    128      addTest(root, &TestAvailableConverters,        "tsconv/ncnvtst/TestAvailableConverters");
    129      addTest(root, &TestFlushInternalBuffer,        "tsconv/ncnvtst/TestFlushInternalBuffer");
    130      addTest(root, &TestResetBehaviour,             "tsconv/ncnvtst/TestResetBehaviour");
    131      addTest(root, &TestRegressionUTF8,             "tsconv/ncnvtst/TestRegressionUTF8");
    132      addTest(root, &TestRegressionUTF32,            "tsconv/ncnvtst/TestRegressionUTF32");
    133      addTest(root, &TestTruncated,                  "tsconv/ncnvtst/TestTruncated");
    134      addTest(root, &TestUnicodeSet,                 "tsconv/ncnvtst/TestUnicodeSet");
    135 }
    136 
    137 /*test surrogate behaviour*/
    138 static void TestSurrogateBehaviour(){
    139     log_verbose("Testing for SBCS and LATIN_1\n");
    140     {
    141         UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032};
    142         const uint8_t expected[] = {0x31, 0x1a, 0x32};
    143 
    144 #if !UCONFIG_NO_LEGACY_CONVERSION
    145         /*SBCS*/
    146         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    147                 expected, sizeof(expected), "ibm-920", 0 , TRUE, U_ZERO_ERROR))
    148             log_err("u-> ibm-920 [UCNV_SBCS] not match.\n");
    149 #endif
    150 
    151         /*LATIN_1*/
    152         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    153                 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR ))
    154             log_err("u-> LATIN_1 not match.\n");
    155 
    156     }
    157 
    158 #if !UCONFIG_NO_LEGACY_CONVERSION
    159     log_verbose("Testing for DBCS and MBCS\n");
    160     {
    161         UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
    162         const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
    163         int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
    164 
    165         /*DBCS*/
    166         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    167                 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR))
    168             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
    169         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    170                 expected, sizeof(expected), "ibm-1363", offsets , TRUE, U_ZERO_ERROR))
    171             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
    172         /*MBCS*/
    173         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    174                 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR))
    175             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
    176         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    177                 expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_ZERO_ERROR))
    178             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
    179     }
    180 
    181 
    182    /* BEGIN android-removed */
    183    /* To save space, Android does not build full ISO2022 CJK tables.
    184       We skip the tests for ISO-2022. */
    185    /*
    186     log_verbose("Testing for ISO-2022-jp\n");
    187     {
    188         UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
    189 
    190         const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
    191                                     0x31,0x1A, 0x32};
    192 
    193 
    194         int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
    195 
    196         // iso-2022-jp
    197         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    198                 expected, sizeof(expected), "iso-2022-jp", 0 , TRUE, U_ZERO_ERROR))
    199             log_err("u-> not match.\n");
    200         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    201                 expected, sizeof(expected), "iso-2022-jp", offsets , TRUE, U_ZERO_ERROR))
    202             log_err("u->  not match.\n");
    203     }
    204 
    205     log_verbose("Testing for ISO-2022-cn\n");
    206     {
    207         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
    208 
    209         static const uint8_t expected[] = {
    210                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
    211                                     0x36, 0x21,
    212                                     0x0F, 0x31,
    213                                     0x1A,
    214                                     0x32
    215                                     };
    216 
    217 
    218 
    219         static const int32_t offsets[] = {
    220                                     0,    0,    0,    0,    0,    0,    0,
    221                                     1,    1,
    222                                     2,    2,
    223                                     3,
    224                                     5,  };
    225 
    226         // iso-2022-CN
    227         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    228                 expected, sizeof(expected), "iso-2022-cn", 0 , TRUE, U_ZERO_ERROR))
    229             log_err("u-> not match.\n");
    230         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    231                 expected, sizeof(expected), "iso-2022-cn", offsets , TRUE, U_ZERO_ERROR))
    232             log_err("u-> not match.\n");
    233     }
    234 
    235         log_verbose("Testing for ISO-2022-kr\n");
    236     {
    237         static const UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
    238 
    239         static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
    240                                     0x0E, 0x6C, 0x69,
    241                                     0x0f, 0x1A,
    242                                     0x0e, 0x6F, 0x4B,
    243                                     0x0F, 0x31,
    244                                     0x1A,
    245                                     0x32 };
    246 
    247         static const int32_t offsets[] = {-1, -1, -1, -1,
    248                               0, 0, 0,
    249                               1, 1,
    250                               3, 3, 3,
    251                               4, 4,
    252                               5,
    253                               7,
    254                             };
    255 
    256         // iso-2022-kr
    257         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    258                 expected, sizeof(expected), "iso-2022-kr", 0 , TRUE, U_ZERO_ERROR))
    259             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
    260         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    261                 expected, sizeof(expected), "iso-2022-kr", offsets , TRUE, U_ZERO_ERROR))
    262             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
    263     }
    264     */
    265     /* END android-removed */
    266         log_verbose("Testing for HZ\n");
    267     {
    268         static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
    269 
    270         static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
    271                                     0x7E, 0x7D, 0x1A,
    272                                     0x7E, 0x7B, 0x36, 0x21,
    273                                     0x7E, 0x7D, 0x31,
    274                                     0x1A,
    275                                     0x32 };
    276 
    277 
    278         static const int32_t offsets[] = {0,0,0,0,
    279                              1,1,1,
    280                              3,3,3,3,
    281                              4,4,4,
    282                              5,
    283                              7,};
    284 
    285         /*hz*/
    286         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    287                 expected, sizeof(expected), "HZ", 0 , TRUE, U_ZERO_ERROR))
    288             log_err("u-> HZ not match.\n");
    289         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    290                 expected, sizeof(expected), "HZ", offsets , TRUE, U_ZERO_ERROR))
    291             log_err("u-> HZ not match.\n");
    292     }
    293 #endif
    294 
    295     /*UTF-8*/
    296      log_verbose("Testing for UTF8\n");
    297     {
    298         static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
    299         static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
    300                            0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
    301                            0x04, 0x06 };
    302         static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
    303             0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
    304 
    305 
    306         static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
    307         /*UTF-8*/
    308         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    309             expected, sizeof(expected), "UTF8", offsets, TRUE, U_ZERO_ERROR ))
    310             log_err("u-> UTF8 with offsets and flush true did not match.\n");
    311         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    312             expected, sizeof(expected), "UTF8", 0, TRUE, U_ZERO_ERROR ))
    313             log_err("u-> UTF8 with offsets and flush true did not match.\n");
    314         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    315             expected, sizeof(expected), "UTF8", offsets, FALSE, U_ZERO_ERROR ))
    316             log_err("u-> UTF8 with offsets and flush true did not match.\n");
    317         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    318             expected, sizeof(expected), "UTF8", 0, FALSE, U_ZERO_ERROR ))
    319             log_err("u-> UTF8 with offsets and flush true did not match.\n");
    320 
    321         if(!convertToU(expected, sizeof(expected),
    322             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, TRUE, U_ZERO_ERROR ))
    323             log_err("UTF8 -> u did not match.\n");
    324         if(!convertToU(expected, sizeof(expected),
    325             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, FALSE, U_ZERO_ERROR ))
    326             log_err("UTF8 -> u did not match.\n");
    327         if(!convertToU(expected, sizeof(expected),
    328             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR ))
    329             log_err("UTF8 ->u  did not match.\n");
    330         if(!convertToU(expected, sizeof(expected),
    331             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR ))
    332             log_err("UTF8 -> u did not match.\n");
    333 
    334     }
    335 }
    336 
    337 /*test various error behaviours*/
    338 static void TestErrorBehaviour(){
    339     log_verbose("Testing for SBCS and LATIN_1\n");
    340     {
    341         static const UChar    sampleText[] =   { 0x0031, 0xd801};
    342         static const UChar    sampleText2[] =   { 0x0031, 0xd801, 0x0032};
    343         static const uint8_t expected0[] =          { 0x31};
    344         static const uint8_t expected[] =          { 0x31, 0x1a};
    345         static const uint8_t expected2[] =         { 0x31, 0x1a, 0x32};
    346 
    347 #if !UCONFIG_NO_LEGACY_CONVERSION
    348         /*SBCS*/
    349         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    350                 expected, sizeof(expected), "ibm-920", 0, TRUE, U_ZERO_ERROR))
    351             log_err("u-> ibm-920 [UCNV_SBCS] \n");
    352         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    353                 expected0, sizeof(expected0), "ibm-920", 0, FALSE, U_ZERO_ERROR))
    354             log_err("u-> ibm-920 [UCNV_SBCS] \n");
    355         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    356                 expected2, sizeof(expected2), "ibm-920", 0, TRUE, U_ZERO_ERROR))
    357             log_err("u-> ibm-920 [UCNV_SBCS] did not match\n");
    358 #endif
    359 
    360         /*LATIN_1*/
    361         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    362                 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR))
    363             log_err("u-> LATIN_1 is supposed to fail\n");
    364         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    365                 expected0, sizeof(expected0), "LATIN_1", 0, FALSE, U_ZERO_ERROR))
    366             log_err("u-> LATIN_1 is supposed to fail\n");
    367 
    368         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    369                 expected2, sizeof(expected2), "LATIN_1", 0, TRUE, U_ZERO_ERROR))
    370             log_err("u-> LATIN_1 did not match\n");
    371     }
    372 
    373 #if !UCONFIG_NO_LEGACY_CONVERSION
    374     log_verbose("Testing for DBCS and MBCS\n");
    375     {
    376         static const UChar    sampleText[]    = { 0x00a1, 0xd801};
    377         static const uint8_t expected[] = { 0xa2, 0xae};
    378         static const int32_t offsets[]        = { 0x00, 0x00};
    379         static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0};
    380         static const int32_t offsetsSUB[]        = { 0x00, 0x00, 0x01, 0x01};
    381 
    382         static const UChar       sampleText2[] = { 0x00a1, 0xd801, 0x00a4};
    383         static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
    384         static const int32_t offsets2[]        = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02};
    385 
    386         static const UChar       sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01};
    387         static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0};
    388         static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x01, 0x02, 0x02};
    389 
    390         /* BEGIN android-changed */
    391         /* Android uses a different EUC-JP table. We change one character,
    392          * choosing a mapping that is common to both tables. */
    393         static const UChar       sampleText4MBCS[] = { 0x0061, 0x9ED1, 0xdc01};
    394         static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xf4, 0xf8, 0xf4, 0xfe};
    395         /* static const UChar       sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01}; */
    396         /* static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe}; */
    397         /* END android-changed */
    398         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 };
    399 
    400         /*DBCS*/
    401         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    402                 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
    403             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
    404         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    405                 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_ZERO_ERROR))
    406             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
    407 
    408         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    409                 expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, TRUE, U_ZERO_ERROR))
    410             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
    411         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    412                 expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_ZERO_ERROR))
    413             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
    414 
    415 
    416         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    417                 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
    418             log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
    419         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    420                 expected2, sizeof(expected2), "ibm-1363", offsets2, TRUE, U_ZERO_ERROR))
    421             log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
    422 
    423         /*MBCS*/
    424         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    425                 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
    426             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    427         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    428                 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_ZERO_ERROR))
    429             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    430 
    431         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    432                 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
    433             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
    434         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    435                 expected2, sizeof(expected2), "ibm-1363", 0, FALSE, U_ZERO_ERROR))
    436             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
    437         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    438                 expected2, sizeof(expected2), "ibm-1363", offsets2, FALSE, U_ZERO_ERROR))
    439             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
    440 
    441         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    442                 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, TRUE, U_ZERO_ERROR))
    443             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    444         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    445                 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, FALSE, U_ZERO_ERROR))
    446             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    447 
    448         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    449                 expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, TRUE, U_ZERO_ERROR))
    450             log_err("u-> euc-jp [UCNV_MBCS] \n");
    451         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    452                 expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, FALSE, U_ZERO_ERROR))
    453             log_err("u-> euc-jp [UCNV_MBCS] \n");
    454     }
    455     /* BEGIN android-removed */
    456     /* To save space, Android does not build full ISO2022 CJK tables.
    457        We skip the tests for ISO-2022. */
    458     /*
    459     // iso-2022-jp
    460     log_verbose("Testing for iso-2022-jp\n");
    461     {
    462         static const UChar    sampleText[]    = { 0x0031, 0xd801};
    463         static const uint8_t expected[] = {  0x31};
    464         static const uint8_t expectedSUB[] = {  0x31, 0x1a};
    465         static const int32_t offsets[]        = { 0x00, 1};
    466 
    467         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
    468         static const uint8_t expected2[] = {  0x31,0x1A,0x32};
    469         static const int32_t offsets2[]        = { 0x00,0x01,0x02};
    470 
    471         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
    472         static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a};
    473         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 };
    474         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    475                 expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, TRUE, U_ZERO_ERROR))
    476             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
    477         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    478                 expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_ZERO_ERROR))
    479             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    480 
    481         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    482                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, TRUE, U_ZERO_ERROR))
    483             log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n");
    484         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    485                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
    486             log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
    487         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    488                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
    489             log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
    490 
    491         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    492                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR))
    493             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
    494         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    495                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, FALSE, U_ZERO_ERROR))
    496             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
    497     }
    498 
    499     // iso-2022-cn
    500     log_verbose("Testing for iso-2022-cn\n");
    501     {
    502         static const UChar    sampleText[]    = { 0x0031, 0xd801};
    503         static const uint8_t expected[] = { 0x31};
    504         static const uint8_t expectedSUB[] = { 0x31, 0x1A};
    505         static const int32_t offsets[]        = { 0x00, 1};
    506 
    507         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
    508         static const uint8_t expected2[] = { 0x31, 0x1A,0x32};
    509         static const int32_t offsets2[]        = { 0x00, 0x01,0x02};
    510 
    511         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
    512         static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A};
    513         static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x02 };
    514 
    515         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
    516         static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a };
    517         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 };
    518         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    519                 expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, TRUE, U_ZERO_ERROR))
    520             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
    521         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    522                 expected, sizeof(expected), "iso-2022-cn", offsets, FALSE, U_ZERO_ERROR))
    523             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    524 
    525         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    526                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, TRUE, U_ZERO_ERROR))
    527             log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n");
    528         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    529                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR))
    530             log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
    531         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    532                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR))
    533             log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
    534 
    535         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    536                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, TRUE, U_ZERO_ERROR))
    537             log_err("u->iso-2022-cn [UCNV_MBCS] \n");
    538         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    539                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, FALSE, U_ZERO_ERROR))
    540             log_err("u-> iso-2022-cn[UCNV_MBCS] \n");
    541 
    542         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    543                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, TRUE, U_ZERO_ERROR))
    544             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
    545         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    546                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, FALSE, U_ZERO_ERROR))
    547             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
    548     }
    549 
    550     // iso-2022-kr
    551     log_verbose("Testing for iso-2022-kr\n");
    552     {
    553         static const UChar    sampleText[]    = { 0x0031, 0xd801};
    554         static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31};
    555         static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A};
    556         static const int32_t offsets[]        = { -1,   -1,   -1,   -1,   0x00, 1};
    557 
    558         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
    559         static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32};
    560         static const int32_t offsets2[]        = { -1,   -1,   -1,   -1,   0x00, 0x01, 0x02};
    561 
    562         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
    563         static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43,  0x51, 0x50, 0x1A };
    564         static const int32_t offsets3MBCS[]        = { -1,   -1,   -1,   -1,    0x00, 0x01, 0x02, 0x02 };
    565 
    566         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    567                 expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR))
    568             log_err("u-> iso-2022-kr [UCNV_MBCS] \n");
    569         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    570                 expected, sizeof(expected), "iso-2022-kr", offsets, FALSE, U_ZERO_ERROR))
    571             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    572 
    573         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    574                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, TRUE, U_ZERO_ERROR))
    575             log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n");
    576         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    577                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR))
    578             log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
    579         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    580                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR))
    581             log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
    582 
    583         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    584                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, TRUE, U_ZERO_ERROR))
    585             log_err("u->iso-2022-kr [UCNV_MBCS] \n");
    586         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    587                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR))
    588             log_err("u-> iso-2022-kr[UCNV_MBCS] \n");
    589     }
    590     */
    591     /* END android-removed */
    592 
    593     /*HZ*/
    594     log_verbose("Testing for HZ\n");
    595     {
    596         static const UChar    sampleText[]    = { 0x0031, 0xd801};
    597         static const uint8_t expected[] = { 0x7e, 0x7d, 0x31};
    598         static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A};
    599         static const int32_t offsets[]        = { 0x00, 0x00, 0x00, 1};
    600 
    601         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
    602         static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31,  0x1A,  0x32 };
    603         static const int32_t offsets2[]        = { 0x00, 0x00, 0x00, 0x01,  0x02 };
    604 
    605         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
    606         static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50,  0x1A };
    607         static const int32_t offsets3MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x02};
    608 
    609         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
    610         static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a };
    611         static const int32_t offsets4MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 };
    612         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    613                 expectedSUB, sizeof(expectedSUB), "HZ", offsets, TRUE, U_ZERO_ERROR))
    614             log_err("u-> HZ [UCNV_MBCS] \n");
    615         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    616                 expected, sizeof(expected), "HZ", offsets, FALSE, U_ZERO_ERROR))
    617             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    618 
    619         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    620                 expected2, sizeof(expected2), "HZ", offsets2, TRUE, U_ZERO_ERROR))
    621             log_err("u->HZ[UCNV_DBCS] did not match\n");
    622         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    623                 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR))
    624             log_err("u-> HZ [UCNV_DBCS] did not match\n");
    625         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    626                 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR))
    627             log_err("u-> HZ [UCNV_DBCS] did not match\n");
    628 
    629         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    630                 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, TRUE, U_ZERO_ERROR))
    631             log_err("u->HZ [UCNV_MBCS] \n");
    632         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    633                 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, FALSE, U_ZERO_ERROR))
    634             log_err("u-> HZ[UCNV_MBCS] \n");
    635 
    636         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    637                 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, TRUE, U_ZERO_ERROR))
    638             log_err("u-> HZ [UCNV_MBCS] \n");
    639         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    640                 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, FALSE, U_ZERO_ERROR))
    641             log_err("u-> HZ [UCNV_MBCS] \n");
    642     }
    643 #endif
    644 }
    645 
    646 #if !UCONFIG_NO_LEGACY_CONVERSION
    647 /*test different convertToUnicode error behaviours*/
    648 static void TestToUnicodeErrorBehaviour()
    649 {
    650     log_verbose("Testing error conditions for DBCS\n");
    651     {
    652         uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04};
    653         const UChar expected[] = { 0x00a1 };
    654 
    655         if(!convertToU(sampleText, sizeof(sampleText),
    656                 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, TRUE, U_ZERO_ERROR ))
    657             log_err("DBCS (ibm-1363)->Unicode  did not match.\n");
    658         if(!convertToU(sampleText, sizeof(sampleText),
    659                 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, FALSE, U_ZERO_ERROR ))
    660             log_err("DBCS (ibm-1363)->Unicode  with flush = false did not match.\n");
    661     }
    662     log_verbose("Testing error conditions for SBCS\n");
    663     {
    664         uint8_t sampleText[] = { 0xa2, 0xFF};
    665         const UChar expected[] = { 0x00c2 };
    666 
    667       /*  uint8_t sampleText2[] = { 0xa2, 0x70 };
    668         const UChar expected2[] = { 0x0073 };*/
    669 
    670         if(!convertToU(sampleText, sizeof(sampleText),
    671                 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, TRUE, U_ZERO_ERROR ))
    672             log_err("SBCS (ibm-1051)->Unicode  did not match.\n");
    673         if(!convertToU(sampleText, sizeof(sampleText),
    674                 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, FALSE, U_ZERO_ERROR ))
    675             log_err("SBCS (ibm-1051)->Unicode  with flush = false did not match.\n");
    676 
    677     }
    678 }
    679 
    680 static void TestGetNextErrorBehaviour(){
    681    /*Test for unassigned character*/
    682 #define INPUT_SIZE 1
    683     static const char input1[INPUT_SIZE]={ 0x70 };
    684     const char* source=(const char*)input1;
    685     UErrorCode err=U_ZERO_ERROR;
    686     UChar32 c=0;
    687     UConverter *cnv=ucnv_open("ibm-424", &err);
    688     if(U_FAILURE(err)) {
    689         log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err));
    690         return;
    691     }
    692     c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err);
    693     if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){
    694         log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n",  myErrorName(err), c);
    695     }
    696     ucnv_close(cnv);
    697 }
    698 #endif
    699 
    700 #define MAX_UTF16_LEN 2
    701 #define MAX_UTF8_LEN 4
    702 
    703 /*Regression test for utf8 converter*/
    704 static void TestRegressionUTF8(){
    705     UChar32 currCh = 0;
    706     int32_t offset8;
    707     int32_t offset16;
    708     UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
    709     uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH);
    710 
    711     while (currCh <= UNICODE_LIMIT) {
    712         offset16 = 0;
    713         offset8 = 0;
    714         while(currCh <= UNICODE_LIMIT
    715             && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN)
    716             && offset8 < (MAX_LENGTH - MAX_UTF8_LEN))
    717         {
    718             if (currCh == SURROGATE_HIGH_START) {
    719                 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
    720             }
    721             UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh);
    722             UTF8_APPEND_CHAR_SAFE(utf8, offset8, MAX_LENGTH, currCh);
    723             currCh++;
    724         }
    725         if(!convertFromU(standardForm, offset16,
    726             utf8, offset8, "UTF8", 0, TRUE, U_ZERO_ERROR )) {
    727             log_err("Unicode->UTF8 did not match.\n");
    728         }
    729         if(!convertToU(utf8, offset8,
    730             standardForm, offset16, "UTF8", 0, TRUE, U_ZERO_ERROR )) {
    731             log_err("UTF8->Unicode did not match.\n");
    732         }
    733     }
    734 
    735     free(standardForm);
    736     free(utf8);
    737 
    738     {
    739         static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 };
    740         static const UChar expected[] = { 0x0301, 0x0300 };
    741         UConverter *conv8;
    742         UErrorCode err = U_ZERO_ERROR;
    743         UChar pivotBuffer[100];
    744         const UChar* const pivEnd = pivotBuffer + 100;
    745         const char* srcBeg;
    746         const char* srcEnd;
    747         UChar* pivBeg;
    748 
    749         conv8 = ucnv_open("UTF-8", &err);
    750 
    751         srcBeg = src8;
    752         pivBeg = pivotBuffer;
    753         srcEnd = src8 + 3;
    754         ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
    755         if (srcBeg != srcEnd) {
    756             log_err("Did not consume whole buffer on first call.\n");
    757         }
    758 
    759         srcEnd = src8 + 4;
    760         ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
    761         if (srcBeg != srcEnd) {
    762             log_err("Did not consume whole buffer on second call.\n");
    763         }
    764 
    765         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
    766             log_err("Did not get expected results for UTF-8.\n");
    767         }
    768         ucnv_close(conv8);
    769     }
    770 }
    771 
    772 #define MAX_UTF32_LEN 1
    773 
    774 static void TestRegressionUTF32(){
    775     UChar32 currCh = 0;
    776     int32_t offset32;
    777     int32_t offset16;
    778     UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
    779     UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32));
    780 
    781     while (currCh <= UNICODE_LIMIT) {
    782         offset16 = 0;
    783         offset32 = 0;
    784         while(currCh <= UNICODE_LIMIT
    785             && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN)
    786             && offset32 < (MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN))
    787         {
    788             if (currCh == SURROGATE_HIGH_START) {
    789                 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
    790             }
    791             UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh);
    792             UTF32_APPEND_CHAR_SAFE(utf32, offset32, MAX_LENGTH, currCh);
    793             currCh++;
    794         }
    795         if(!convertFromU(standardForm, offset16,
    796             (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) {
    797             log_err("Unicode->UTF32 did not match.\n");
    798         }
    799         if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32),
    800             standardForm, offset16, "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) {
    801             log_err("UTF32->Unicode did not match.\n");
    802         }
    803     }
    804     free(standardForm);
    805     free(utf32);
    806 
    807     {
    808         /* Check for lone surrogate error handling. */
    809         static const UChar   sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 };
    810         static const UChar   sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 };
    811         static const uint8_t expectedUTF32BE[] = {
    812             0x00, 0x00, 0x00, 0x31,
    813             0x00, 0x00, 0xff, 0xfd,
    814             0x00, 0x00, 0x00, 0x32
    815         };
    816         static const uint8_t expectedUTF32LE[] = {
    817             0x31, 0x00, 0x00, 0x00,
    818             0xfd, 0xff, 0x00, 0x00,
    819             0x32, 0x00, 0x00, 0x00
    820         };
    821         static const int32_t offsetsUTF32[] = {
    822             0x00, 0x00, 0x00, 0x00,
    823             0x01, 0x01, 0x01, 0x01,
    824             0x02, 0x02, 0x02, 0x02
    825         };
    826 
    827         if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]),
    828                 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR))
    829             log_err("u->UTF-32BE\n");
    830         if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]),
    831                 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR))
    832             log_err("u->UTF-32BE\n");
    833 
    834         if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]),
    835                 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR))
    836             log_err("u->UTF-32LE\n");
    837         if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]),
    838                 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR))
    839             log_err("u->UTF-32LE\n");
    840     }
    841 
    842     {
    843         static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 };
    844         static const UChar expected[] = { 0x0031, 0x0030 };
    845         UConverter *convBE;
    846         UErrorCode err = U_ZERO_ERROR;
    847         UChar pivotBuffer[100];
    848         const UChar* const pivEnd = pivotBuffer + 100;
    849         const char* srcBeg;
    850         const char* srcEnd;
    851         UChar* pivBeg;
    852 
    853         convBE = ucnv_open("UTF-32BE", &err);
    854 
    855         srcBeg = srcBE;
    856         pivBeg = pivotBuffer;
    857         srcEnd = srcBE + 5;
    858         ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
    859         if (srcBeg != srcEnd) {
    860             log_err("Did not consume whole buffer on first call.\n");
    861         }
    862 
    863         srcEnd = srcBE + 8;
    864         ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
    865         if (srcBeg != srcEnd) {
    866             log_err("Did not consume whole buffer on second call.\n");
    867         }
    868 
    869         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
    870             log_err("Did not get expected results for UTF-32BE.\n");
    871         }
    872         ucnv_close(convBE);
    873     }
    874     {
    875         static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 };
    876         static const UChar expected[] = { 0x0031, 0x0030 };
    877         UConverter *convLE;
    878         UErrorCode err = U_ZERO_ERROR;
    879         UChar pivotBuffer[100];
    880         const UChar* const pivEnd = pivotBuffer + 100;
    881         const char* srcBeg;
    882         const char* srcEnd;
    883         UChar* pivBeg;
    884 
    885         convLE = ucnv_open("UTF-32LE", &err);
    886 
    887         srcBeg = srcLE;
    888         pivBeg = pivotBuffer;
    889         srcEnd = srcLE + 5;
    890         ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
    891         if (srcBeg != srcEnd) {
    892             log_err("Did not consume whole buffer on first call.\n");
    893         }
    894 
    895         srcEnd = srcLE + 8;
    896         ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
    897         if (srcBeg != srcEnd) {
    898             log_err("Did not consume whole buffer on second call.\n");
    899         }
    900 
    901         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
    902             log_err("Did not get expected results for UTF-32LE.\n");
    903         }
    904         ucnv_close(convLE);
    905     }
    906 }
    907 
    908 /*Walk through the available converters*/
    909 static void TestAvailableConverters(){
    910     UErrorCode status=U_ZERO_ERROR;
    911     UConverter *conv=NULL;
    912     int32_t i=0;
    913     for(i=0; i < ucnv_countAvailable(); i++){
    914         status=U_ZERO_ERROR;
    915         conv=ucnv_open(ucnv_getAvailableName(i), &status);
    916         if(U_FAILURE(status)){
    917             log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n",
    918                         ucnv_getAvailableName(i), myErrorName(status));
    919             continue;
    920         }
    921         ucnv_close(conv);
    922     }
    923 
    924 }
    925 
    926 static void TestFlushInternalBuffer(){
    927     TestWithBufferSize(MAX_LENGTH, 1);
    928     TestWithBufferSize(1, 1);
    929     TestWithBufferSize(1, MAX_LENGTH);
    930     TestWithBufferSize(MAX_LENGTH, MAX_LENGTH);
    931 }
    932 
    933 static void TestWithBufferSize(int32_t insize, int32_t outsize){
    934 
    935     gInBufferSize =insize;
    936     gOutBufferSize = outsize;
    937 
    938      log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
    939     {
    940         UChar    sampleText[] =
    941             { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09,  0x002E  };
    942         const uint8_t expectedUTF8[] =
    943             { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
    944         int32_t  toUTF8Offs[] =
    945             { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
    946        /* int32_t fmUTF8Offs[] =
    947             { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/
    948 
    949         /*UTF-8*/
    950         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    951             expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,FALSE))
    952              log_err("u-> UTF8 did not match.\n");
    953     }
    954 
    955 #if !UCONFIG_NO_LEGACY_CONVERSION
    956      log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE  \n");
    957     {
    958         UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
    959         const uint8_t toIBM943[]= { 0x61,
    960             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
    961             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
    962             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
    963             0x61 };
    964         int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
    965 
    966         if(!testConvertFromU(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
    967                 toIBM943, sizeof(toIBM943), "ibm-943",
    968                 (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,FALSE))
    969             log_err("u-> ibm-943 with subst with value did not match.\n");
    970     }
    971 #endif
    972 
    973      log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
    974     {
    975         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
    976             0xe0, 0x80,  0x61};
    977         UChar    expected1[] = {  0x0031, 0x4e8c, 0xfffd, 0x0061};
    978         int32_t offsets1[] = {   0x0000, 0x0001, 0x0004, 0x0006};
    979 
    980         if(!testConvertToU(sampleText1, sizeof(sampleText1),
    981                  expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE))
    982             log_err("utf8->u with substitute did not match.\n");;
    983     }
    984 
    985 #if !UCONFIG_NO_LEGACY_CONVERSION
    986     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
    987     /*to Unicode*/
    988     {
    989         const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
    990             0x81, 0xad, /*unassigned*/
    991             0x89, 0xd3 };
    992         UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
    993             0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
    994             0x7B87};
    995         int32_t  fromIBM943Offs [] =    { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
    996 
    997         if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU),
    998                  IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943",
    999                 (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,FALSE))
   1000             log_err("ibm-943->u with substitute with value did not match.\n");
   1001 
   1002     }
   1003 #endif
   1004 }
   1005 
   1006 static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
   1007                 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
   1008 {
   1009 
   1010     int32_t i=0;
   1011     char *p=0;
   1012     const UChar *src;
   1013     char buffer[MAX_LENGTH];
   1014     int32_t offsetBuffer[MAX_LENGTH];
   1015     int32_t *offs=0;
   1016     char *targ;
   1017     char *targetLimit;
   1018     UChar *sourceLimit=0;
   1019     UErrorCode status = U_ZERO_ERROR;
   1020     UConverter *conv = 0;
   1021     conv = ucnv_open(codepage, &status);
   1022     if(U_FAILURE(status))
   1023     {
   1024         log_data_err("Couldn't open converter %s\n",codepage);
   1025         return TRUE;
   1026     }
   1027     log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
   1028 
   1029     for(i=0; i<MAX_LENGTH; i++){
   1030         buffer[i]=(char)0xF0;
   1031         offsetBuffer[i]=0xFF;
   1032     }
   1033 
   1034     src=source;
   1035     sourceLimit=(UChar*)src+(sourceLen);
   1036     targ=buffer;
   1037     targetLimit=targ+MAX_LENGTH;
   1038     offs=offsetBuffer;
   1039     ucnv_fromUnicode (conv,
   1040                   (char **)&targ,
   1041                   (const char *)targetLimit,
   1042                   &src,
   1043                   sourceLimit,
   1044                   expectOffsets ? offs : NULL,
   1045                   doFlush,
   1046                   &status);
   1047     ucnv_close(conv);
   1048     if(status != expectedStatus){
   1049           log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
   1050           return FALSE;
   1051     }
   1052 
   1053     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
   1054         sourceLen, targ-buffer);
   1055 
   1056     if(expectLen != targ-buffer)
   1057     {
   1058         log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
   1059         log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
   1060         printSeqErr((const unsigned char *)buffer, (int32_t)(targ-buffer));
   1061         printSeqErr((const unsigned char*)expect, expectLen);
   1062         return FALSE;
   1063     }
   1064 
   1065     if(memcmp(buffer, expect, expectLen)){
   1066         log_err("String does not match. FROM Unicode to codePage%s\n", codepage);
   1067         log_info("\nGot:");
   1068         printSeqErr((const unsigned char *)buffer, expectLen);
   1069         log_info("\nExpected:");
   1070         printSeqErr((const unsigned char *)expect, expectLen);
   1071         return FALSE;
   1072     }
   1073     else {
   1074         log_verbose("Matches!\n");
   1075     }
   1076 
   1077     if (expectOffsets != 0){
   1078         log_verbose("comparing %d offsets..\n", targ-buffer);
   1079         if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){
   1080             log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage);
   1081             log_info("\nGot  : ");
   1082             printSeqErr((const unsigned char*)buffer, (int32_t)(targ-buffer));
   1083             for(p=buffer;p<targ;p++)
   1084                 log_info("%d, ", offsetBuffer[p-buffer]);
   1085             log_info("\nExpected: ");
   1086             for(i=0; i< (targ-buffer); i++)
   1087                 log_info("%d,", expectOffsets[i]);
   1088         }
   1089     }
   1090 
   1091     return TRUE;
   1092 }
   1093 
   1094 
   1095 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
   1096                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
   1097 {
   1098     UErrorCode status = U_ZERO_ERROR;
   1099     UConverter *conv = 0;
   1100     int32_t i=0;
   1101     UChar *p=0;
   1102     const char* src;
   1103     UChar buffer[MAX_LENGTH];
   1104     int32_t offsetBuffer[MAX_LENGTH];
   1105     int32_t *offs=0;
   1106     UChar *targ;
   1107     UChar *targetLimit;
   1108     uint8_t *sourceLimit=0;
   1109 
   1110 
   1111 
   1112     conv = ucnv_open(codepage, &status);
   1113     if(U_FAILURE(status))
   1114     {
   1115         log_data_err("Couldn't open converter %s\n",codepage);
   1116         return TRUE;
   1117     }
   1118     log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
   1119 
   1120 
   1121 
   1122     for(i=0; i<MAX_LENGTH; i++){
   1123         buffer[i]=0xFFFE;
   1124         offsetBuffer[i]=-1;
   1125     }
   1126 
   1127     src=(const char *)source;
   1128     sourceLimit=(uint8_t*)(src+(sourceLen));
   1129     targ=buffer;
   1130     targetLimit=targ+MAX_LENGTH;
   1131     offs=offsetBuffer;
   1132 
   1133 
   1134 
   1135     ucnv_toUnicode (conv,
   1136                 &targ,
   1137                 targetLimit,
   1138                 (const char **)&src,
   1139                 (const char *)sourceLimit,
   1140                 expectOffsets ? offs : NULL,
   1141                 doFlush,
   1142                 &status);
   1143 
   1144     ucnv_close(conv);
   1145     if(status != expectedStatus){
   1146           log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
   1147           return FALSE;
   1148     }
   1149     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
   1150         sourceLen, targ-buffer);
   1151 
   1152 
   1153 
   1154 
   1155     log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2);
   1156 
   1157     if (expectOffsets != 0) {
   1158         if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){
   1159 
   1160             log_err("did not get the expected offsets from %s To UNICODE\n", codepage);
   1161             log_info("\nGot : ");
   1162             for(p=buffer;p<targ;p++)
   1163                 log_info("%d, ", offsetBuffer[p-buffer]);
   1164             log_info("\nExpected: ");
   1165             for(i=0; i<(targ-buffer); i++)
   1166                 log_info("%d, ", expectOffsets[i]);
   1167             log_info("\nGot result:");
   1168             for(i=0; i<(targ-buffer); i++)
   1169                 log_info("0x%04X,", buffer[i]);
   1170             log_info("\nFrom Input:");
   1171             for(i=0; i<(src-(const char *)source); i++)
   1172                 log_info("0x%02X,", (unsigned char)source[i]);
   1173             log_info("\n");
   1174         }
   1175     }
   1176     if(memcmp(buffer, expect, expectLen*2)){
   1177         log_err("String does not match. from codePage %s TO Unicode\n", codepage);
   1178         log_info("\nGot:");
   1179         printUSeqErr(buffer, expectLen);
   1180         log_info("\nExpected:");
   1181         printUSeqErr(expect, expectLen);
   1182         return FALSE;
   1183     }
   1184     else {
   1185         log_verbose("Matches!\n");
   1186     }
   1187 
   1188     return TRUE;
   1189 }
   1190 
   1191 
   1192 static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
   1193                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset)
   1194 {
   1195     UErrorCode status = U_ZERO_ERROR;
   1196     UConverter *conv = 0;
   1197     char    junkout[MAX_LENGTH]; /* FIX */
   1198     int32_t    junokout[MAX_LENGTH]; /* FIX */
   1199     char *p;
   1200     const UChar *src;
   1201     char *end;
   1202     char *targ;
   1203     int32_t *offs;
   1204     int i;
   1205     int32_t   realBufferSize;
   1206     char *realBufferEnd;
   1207     const UChar *realSourceEnd;
   1208     const UChar *sourceLimit;
   1209     UBool checkOffsets = TRUE;
   1210     UBool doFlush;
   1211 
   1212     UConverterFromUCallback oldAction = NULL;
   1213     const void* oldContext = NULL;
   1214 
   1215     for(i=0;i<MAX_LENGTH;i++)
   1216         junkout[i] = (char)0xF0;
   1217     for(i=0;i<MAX_LENGTH;i++)
   1218         junokout[i] = 0xFF;
   1219 
   1220     setNuConvTestName(codepage, "FROM");
   1221 
   1222     log_verbose("\n=========  %s\n", gNuConvTestName);
   1223 
   1224     conv = ucnv_open(codepage, &status);
   1225     if(U_FAILURE(status))
   1226     {
   1227         log_data_err("Couldn't open converter %s\n",codepage);
   1228         return TRUE;
   1229     }
   1230 
   1231     log_verbose("Converter opened..\n");
   1232     /*----setting the callback routine----*/
   1233     ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
   1234     if (U_FAILURE(status)) {
   1235         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
   1236     }
   1237     /*------------------------*/
   1238 
   1239     src = source;
   1240     targ = junkout;
   1241     offs = junokout;
   1242 
   1243     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
   1244     realBufferEnd = junkout + realBufferSize;
   1245     realSourceEnd = source + sourceLen;
   1246 
   1247     if ( gOutBufferSize != realBufferSize )
   1248       checkOffsets = FALSE;
   1249 
   1250     if( gInBufferSize != MAX_LENGTH )
   1251       checkOffsets = FALSE;
   1252 
   1253     do
   1254     {
   1255         end = nct_min(targ + gOutBufferSize, realBufferEnd);
   1256         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
   1257 
   1258         doFlush = (UBool)(sourceLimit == realSourceEnd);
   1259 
   1260         if(targ == realBufferEnd)
   1261           {
   1262         log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
   1263         return FALSE;
   1264           }
   1265         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
   1266 
   1267 
   1268         status = U_ZERO_ERROR;
   1269         if(gInBufferSize ==999 && gOutBufferSize==999)
   1270             doFlush = FALSE;
   1271         ucnv_fromUnicode (conv,
   1272                   (char **)&targ,
   1273                   (const char *)end,
   1274                   &src,
   1275                   sourceLimit,
   1276                   offs,
   1277                   doFlush, /* flush if we're at the end of the input data */
   1278                   &status);
   1279         if(testReset)
   1280             ucnv_resetToUnicode(conv);
   1281         if(gInBufferSize ==999 && gOutBufferSize==999)
   1282             ucnv_resetToUnicode(conv);
   1283 
   1284       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
   1285 
   1286     if(U_FAILURE(status)) {
   1287         log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
   1288         return FALSE;
   1289       }
   1290 
   1291     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
   1292         sourceLen, targ-junkout);
   1293     if(VERBOSITY)
   1294     {
   1295         char junk[999];
   1296         char offset_str[999];
   1297         char *ptr;
   1298 
   1299         junk[0] = 0;
   1300         offset_str[0] = 0;
   1301         for(ptr = junkout;ptr<targ;ptr++)
   1302         {
   1303             sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr);
   1304             sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]);
   1305         }
   1306 
   1307         log_verbose(junk);
   1308         printSeq((const unsigned char *)expect, expectLen);
   1309         if ( checkOffsets )
   1310           {
   1311             log_verbose("\nOffsets:");
   1312             log_verbose(offset_str);
   1313           }
   1314         log_verbose("\n");
   1315     }
   1316     ucnv_close(conv);
   1317 
   1318 
   1319     if(expectLen != targ-junkout)
   1320     {
   1321         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
   1322         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
   1323         log_info("\nGot:");
   1324         printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
   1325         log_info("\nExpected:");
   1326         printSeqErr((const unsigned char*)expect, expectLen);
   1327         return FALSE;
   1328     }
   1329 
   1330     if (checkOffsets && (expectOffsets != 0) )
   1331     {
   1332         log_verbose("comparing %d offsets..\n", targ-junkout);
   1333         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
   1334             log_err("did not get the expected offsets. %s", gNuConvTestName);
   1335             log_err("Got  : ");
   1336             printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
   1337             for(p=junkout;p<targ;p++)
   1338                 log_err("%d, ", junokout[p-junkout]);
   1339             log_err("\nExpected: ");
   1340             for(i=0; i<(targ-junkout); i++)
   1341                 log_err("%d,", expectOffsets[i]);
   1342         }
   1343     }
   1344 
   1345     log_verbose("comparing..\n");
   1346     if(!memcmp(junkout, expect, expectLen))
   1347     {
   1348         log_verbose("Matches!\n");
   1349         return TRUE;
   1350     }
   1351     else
   1352     {
   1353         log_err("String does not match. %s\n", gNuConvTestName);
   1354         printUSeqErr(source, sourceLen);
   1355         log_info("\nGot:");
   1356         printSeqErr((const unsigned char *)junkout, expectLen);
   1357         log_info("\nExpected:");
   1358         printSeqErr((const unsigned char *)expect, expectLen);
   1359 
   1360         return FALSE;
   1361     }
   1362 }
   1363 
   1364 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
   1365                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset)
   1366 {
   1367     UErrorCode status = U_ZERO_ERROR;
   1368     UConverter *conv = 0;
   1369     UChar    junkout[MAX_LENGTH]; /* FIX */
   1370     int32_t    junokout[MAX_LENGTH]; /* FIX */
   1371     const char *src;
   1372     const char *realSourceEnd;
   1373     const char *srcLimit;
   1374     UChar *p;
   1375     UChar *targ;
   1376     UChar *end;
   1377     int32_t *offs;
   1378     int i;
   1379     UBool   checkOffsets = TRUE;
   1380     int32_t   realBufferSize;
   1381     UChar *realBufferEnd;
   1382     UBool doFlush;
   1383 
   1384     UConverterToUCallback oldAction = NULL;
   1385     const void* oldContext = NULL;
   1386 
   1387 
   1388     for(i=0;i<MAX_LENGTH;i++)
   1389         junkout[i] = 0xFFFE;
   1390 
   1391     for(i=0;i<MAX_LENGTH;i++)
   1392         junokout[i] = -1;
   1393 
   1394     setNuConvTestName(codepage, "TO");
   1395 
   1396     log_verbose("\n=========  %s\n", gNuConvTestName);
   1397 
   1398     conv = ucnv_open(codepage, &status);
   1399     if(U_FAILURE(status))
   1400     {
   1401         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
   1402         return TRUE;
   1403     }
   1404 
   1405     log_verbose("Converter opened..\n");
   1406      /*----setting the callback routine----*/
   1407     ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
   1408     if (U_FAILURE(status)) {
   1409         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
   1410     }
   1411     /*-------------------------------------*/
   1412 
   1413     src = (const char *)source;
   1414     targ = junkout;
   1415     offs = junokout;
   1416 
   1417     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
   1418     realBufferEnd = junkout + realBufferSize;
   1419     realSourceEnd = src + sourcelen;
   1420 
   1421     if ( gOutBufferSize != realBufferSize )
   1422       checkOffsets = FALSE;
   1423 
   1424     if( gInBufferSize != MAX_LENGTH )
   1425       checkOffsets = FALSE;
   1426 
   1427     do
   1428       {
   1429         end = nct_min( targ + gOutBufferSize, realBufferEnd);
   1430         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
   1431 
   1432         if(targ == realBufferEnd)
   1433         {
   1434             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
   1435             return FALSE;
   1436         }
   1437         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
   1438 
   1439         /* oldTarg = targ; */
   1440 
   1441         status = U_ZERO_ERROR;
   1442         doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : FALSE);
   1443 
   1444         ucnv_toUnicode (conv,
   1445                 &targ,
   1446                 end,
   1447                 (const char **)&src,
   1448                 (const char *)srcLimit,
   1449                 offs,
   1450                 doFlush, /* flush if we're at the end of hte source data */
   1451                 &status);
   1452         if(testReset)
   1453             ucnv_resetFromUnicode(conv);
   1454         if(gInBufferSize ==999 && gOutBufferSize==999)
   1455             ucnv_resetToUnicode(conv);
   1456         /*        offs += (targ-oldTarg); */
   1457 
   1458       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
   1459 
   1460     if(U_FAILURE(status))
   1461     {
   1462         log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
   1463         return FALSE;
   1464     }
   1465 
   1466     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
   1467         sourcelen, targ-junkout);
   1468     if(VERBOSITY)
   1469     {
   1470         char junk[999];
   1471         char offset_str[999];
   1472 
   1473         UChar *ptr;
   1474 
   1475         junk[0] = 0;
   1476         offset_str[0] = 0;
   1477 
   1478         for(ptr = junkout;ptr<targ;ptr++)
   1479         {
   1480             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
   1481             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
   1482         }
   1483 
   1484         log_verbose(junk);
   1485 
   1486         if ( checkOffsets )
   1487           {
   1488             log_verbose("\nOffsets:");
   1489             log_verbose(offset_str);
   1490           }
   1491         log_verbose("\n");
   1492     }
   1493     ucnv_close(conv);
   1494 
   1495     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
   1496 
   1497     if (checkOffsets && (expectOffsets != 0))
   1498     {
   1499         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
   1500 
   1501             log_err("did not get the expected offsets. %s",gNuConvTestName);
   1502             for(p=junkout;p<targ;p++)
   1503                 log_err("%d, ", junokout[p-junkout]);
   1504             log_err("\nExpected: ");
   1505             for(i=0; i<(targ-junkout); i++)
   1506                 log_err("%d,", expectOffsets[i]);
   1507             log_err("");
   1508             for(i=0; i<(targ-junkout); i++)
   1509                 log_err("%X,", junkout[i]);
   1510             log_err("");
   1511             for(i=0; i<(src-(const char *)source); i++)
   1512                 log_err("%X,", (unsigned char)source[i]);
   1513         }
   1514     }
   1515 
   1516     if(!memcmp(junkout, expect, expectlen*2))
   1517     {
   1518         log_verbose("Matches!\n");
   1519         return TRUE;
   1520     }
   1521     else
   1522     {
   1523         log_err("String does not match. %s\n", gNuConvTestName);
   1524         log_verbose("String does not match. %s\n", gNuConvTestName);
   1525         log_info("\nGot:");
   1526         printUSeq(junkout, expectlen);
   1527         log_info("\nExpected:");
   1528         printUSeq(expect, expectlen);
   1529         return FALSE;
   1530     }
   1531 }
   1532 
   1533 
   1534 static void TestResetBehaviour(void){
   1535 #if !UCONFIG_NO_LEGACY_CONVERSION
   1536     log_verbose("Testing Reset for DBCS and MBCS\n");
   1537     {
   1538         static const UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
   1539         static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
   1540         static const int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
   1541 
   1542 
   1543         static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8};
   1544         static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7};
   1545         static const int32_t offsets1[] =  { 0,2,4,6};
   1546 
   1547         /*DBCS*/
   1548         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1549                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1550             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
   1551         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1552                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1553             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
   1554 
   1555         if(!testConvertToU(expected1, sizeof(expected1),
   1556                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1557                 offsets1, TRUE))
   1558            log_err("ibm-1363 -> did not match.\n");
   1559         /*MBCS*/
   1560         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1561                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1562             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
   1563         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1564                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1565             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
   1566 
   1567         if(!testConvertToU(expected1, sizeof(expected1),
   1568                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1569                 offsets1, TRUE))
   1570            log_err("ibm-1363 -> did not match.\n");
   1571 
   1572     }
   1573 
   1574     /* BEGIN android-removed */
   1575     /* To save space, Android does not build full ISO2022 CJK tables.
   1576        We skip the tests for ISO-2022. */
   1577     /*
   1578     log_verbose("Testing Reset for ISO-2022-jp\n");
   1579     {
   1580         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
   1581 
   1582         static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
   1583                                     0x31,0x1A, 0x32};
   1584 
   1585 
   1586         static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
   1587 
   1588 
   1589         static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
   1590         static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
   1591                                     0x31,0x1A, 0x32};
   1592         static const int32_t offsets1[] =  { 3,5,10,11,12};
   1593 
   1594         // iso-2022-jp
   1595         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1596                 expected, sizeof(expected), "iso-2022-jp",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1597             log_err("u-> not match.\n");
   1598         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1599                 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1600             log_err("u->  not match.\n");
   1601 
   1602         if(!testConvertToU(expected1, sizeof(expected1),
   1603                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1604                 offsets1, TRUE))
   1605            log_err("iso-2022-jp -> did not match.\n");
   1606 
   1607     }
   1608 
   1609     log_verbose("Testing Reset for ISO-2022-cn\n");
   1610     {
   1611         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
   1612 
   1613         static const uint8_t expected[] = {
   1614                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
   1615                                     0x36, 0x21,
   1616                                     0x0f, 0x31,
   1617                                     0x1A,
   1618                                     0x32
   1619                                     };
   1620 
   1621 
   1622         static const int32_t offsets[] = {
   1623                                     0,    0,    0,    0,    0,    0,    0,
   1624                                     1,    1,
   1625                                     2,    2,
   1626                                     3,
   1627                                     5,  };
   1628 
   1629         UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
   1630         static const uint8_t expected1[] = {
   1631                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
   1632                                     0x36, 0x21,
   1633                                     0x1B, 0x24, 0x29, 0x47, 0x24, 0x22,
   1634                                     0x0f, 0x1A,
   1635                                     0x32
   1636                                     };
   1637         static const int32_t offsets1[] =  { 5,7,13,16,17};
   1638 
   1639         // iso-2022-CN
   1640         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1641                 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1642             log_err("u-> not match.\n");
   1643         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1644                 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1645             log_err("u-> not match.\n");
   1646 
   1647         if(!testConvertToU(expected1, sizeof(expected1),
   1648                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1649                 offsets1, TRUE))
   1650            log_err("iso-2022-cn -> did not match.\n");
   1651     }
   1652 
   1653         log_verbose("Testing Reset for ISO-2022-kr\n");
   1654     {
   1655         UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
   1656 
   1657         static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
   1658                                     0x0E, 0x6C, 0x69,
   1659                                     0x0f, 0x1A,
   1660                                     0x0e, 0x6F, 0x4B,
   1661                                     0x0F, 0x31,
   1662                                     0x1A,
   1663                                     0x32 };
   1664 
   1665         static const int32_t offsets[] = {-1, -1, -1, -1,
   1666                               0, 0, 0,
   1667                               1, 1,
   1668                               3, 3, 3,
   1669                               4, 4,
   1670                               5,
   1671                               7,
   1672                             };
   1673         static const UChar    sampleText1[] =   { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032};
   1674 
   1675         static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43,
   1676                                     0x0E, 0x6C, 0x69,
   1677                                     0x0f, 0x41,
   1678                                     0x0e, 0x6F, 0x4B,
   1679                                     0x0F, 0x31,
   1680                                     0x42,
   1681                                     0x32 };
   1682 
   1683         static const int32_t offsets1[] = {
   1684                               5, 8, 10,
   1685                               13, 14, 15
   1686 
   1687                             };
   1688         // iso-2022-kr
   1689         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1690                 expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1691             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
   1692         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1693                 expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1694             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
   1695         if(!testConvertToU(expected1, sizeof(expected1),
   1696                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1697                 offsets1, TRUE))
   1698            log_err("iso-2022-kr -> did not match.\n");
   1699     }
   1700     */
   1701     /* END android-removed */
   1702 
   1703         log_verbose("Testing Reset for HZ\n");
   1704     {
   1705         static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
   1706 
   1707         static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
   1708                                     0x7E, 0x7D, 0x1A,
   1709                                     0x7E, 0x7B, 0x36, 0x21,
   1710                                     0x7E, 0x7D, 0x31,
   1711                                     0x1A,
   1712                                     0x32 };
   1713 
   1714 
   1715         static const int32_t offsets[] = {0,0,0,0,
   1716                              1,1,1,
   1717                              3,3,3,3,
   1718                              4,4,4,
   1719                              5,
   1720                              7,};
   1721         static const UChar    sampleText1[] =   { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032};
   1722 
   1723         static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B,
   1724                                     0x7E, 0x7D, 0x35,
   1725                                     0x7E, 0x7B, 0x36, 0x21,
   1726                                     0x7E, 0x7D, 0x31,
   1727                                     0x41,
   1728                                     0x32 };
   1729 
   1730 
   1731         static const int32_t offsets1[] = {2,6,9,13,14,15
   1732                             };
   1733 
   1734         /*hz*/
   1735         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1736                 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
   1737             log_err("u->  not match.\n");
   1738         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1739                 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1740             log_err("u->  not match.\n");
   1741         if(!testConvertToU(expected1, sizeof(expected1),
   1742                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1743                 offsets1, TRUE))
   1744            log_err("hz -> did not match.\n");
   1745     }
   1746 #endif
   1747 
   1748     /*UTF-8*/
   1749      log_verbose("Testing for UTF8\n");
   1750     {
   1751         static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
   1752         int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
   1753                            0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
   1754                            0x04, 0x06 };
   1755         static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
   1756             0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
   1757 
   1758 
   1759         static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
   1760         /*UTF-8*/
   1761         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1762             expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1763             log_err("u-> UTF8 with offsets and flush true did not match.\n");
   1764         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1765             expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
   1766             log_err("u-> UTF8 with offsets and flush true did not match.\n");
   1767         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1768             expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1769             log_err("u-> UTF8 with offsets and flush true did not match.\n");
   1770         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1771             expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
   1772             log_err("u-> UTF8 with offsets and flush true did not match.\n");
   1773         if(!testConvertToU(expected, sizeof(expected),
   1774             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1775             log_err("UTF8 -> did not match.\n");
   1776         if(!testConvertToU(expected, sizeof(expected),
   1777             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1778             log_err("UTF8 -> did not match.\n");
   1779         if(!testConvertToU(expected, sizeof(expected),
   1780             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE))
   1781             log_err("UTF8 -> did not match.\n");
   1782         if(!testConvertToU(expected, sizeof(expected),
   1783             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE))
   1784             log_err("UTF8 -> did not match.\n");
   1785 
   1786     }
   1787 
   1788 }
   1789 
   1790 /* Test that U_TRUNCATED_CHAR_FOUND is set. */
   1791 static void
   1792 doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) {
   1793     UConverter *cnv;
   1794 
   1795     UChar buffer[2];
   1796     UChar *target, *targetLimit;
   1797     const char *source, *sourceLimit;
   1798 
   1799     UErrorCode errorCode;
   1800 
   1801     errorCode=U_ZERO_ERROR;
   1802     cnv=ucnv_open(cnvName, &errorCode);
   1803     if(U_FAILURE(errorCode)) {
   1804         log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode));
   1805         return;
   1806     }
   1807     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
   1808     if(U_FAILURE(errorCode)) {
   1809         log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n",
   1810                     cnvName, u_errorName(errorCode));
   1811         ucnv_close(cnv);
   1812         return;
   1813     }
   1814 
   1815     source=(const char *)bytes;
   1816     sourceLimit=source+length;
   1817     target=buffer;
   1818     targetLimit=buffer+LENGTHOF(buffer);
   1819 
   1820     /* 1. input bytes with flush=FALSE, then input nothing with flush=TRUE */
   1821     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &errorCode);
   1822     if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) {
   1823         log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=FALSE: %s, input left %d, output %d\n",
   1824                 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
   1825     }
   1826 
   1827     errorCode=U_ZERO_ERROR;
   1828     source=sourceLimit;
   1829     target=buffer;
   1830     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode);
   1831     if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) {
   1832         log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n",
   1833                 cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer));
   1834     }
   1835 
   1836     /* 2. input bytes with flush=TRUE */
   1837     ucnv_resetToUnicode(cnv);
   1838 
   1839     errorCode=U_ZERO_ERROR;
   1840     source=(const char *)bytes;
   1841     target=buffer;
   1842     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode);
   1843     if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) {
   1844         log_err("error TestTruncated(%s, 2): input bytes[%d], flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n",
   1845                 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
   1846     }
   1847 
   1848 
   1849     ucnv_close(cnv);
   1850 }
   1851 
   1852 static void
   1853 TestTruncated() {
   1854     static const struct {
   1855         const char *cnvName;
   1856         uint8_t bytes[8]; /* partial input bytes resulting in no output */
   1857         int32_t length;
   1858     } testCases[]={
   1859         { "IMAP-mailbox-name",  { 0x26 }, 1 }, /* & */
   1860         { "IMAP-mailbox-name",  { 0x26, 0x42 }, 2 }, /* &B */
   1861         { "IMAP-mailbox-name",  { 0x26, 0x42, 0x42 }, 3 }, /* &BB */
   1862         { "IMAP-mailbox-name",  { 0x26, 0x41, 0x41 }, 3 }, /* &AA */
   1863 
   1864         { "UTF-7",      { 0x2b, 0x42 }, 2 }, /* +B */
   1865         { "UTF-8",      { 0xd1 }, 1 },
   1866 
   1867         { "UTF-16BE",   { 0x4e }, 1 },
   1868         { "UTF-16LE",   { 0x4e }, 1 },
   1869         { "UTF-16",     { 0x4e }, 1 },
   1870         { "UTF-16",     { 0xff }, 1 },
   1871         { "UTF-16",     { 0xfe, 0xff, 0x4e }, 3 },
   1872 
   1873         { "UTF-32BE",   { 0, 0, 0x4e }, 3 },
   1874         { "UTF-32LE",   { 0x4e }, 1 },
   1875         { "UTF-32",     { 0, 0, 0x4e }, 3 },
   1876         { "UTF-32",     { 0xff }, 1 },
   1877         { "UTF-32",     { 0, 0, 0xfe, 0xff, 0 }, 5 },
   1878         { "SCSU",       { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */
   1879 
   1880 #if !UCONFIG_NO_LEGACY_CONVERSION
   1881         { "BOCU-1",     { 0xd5 }, 1 },
   1882 
   1883         { "Shift-JIS",  { 0xe0 }, 1 },
   1884 
   1885         { "ibm-939",    { 0x0e, 0x41 }, 2 } /* SO 0x41 */
   1886 #else
   1887         { "BOCU-1",     { 0xd5 }, 1 ,}
   1888 #endif
   1889     };
   1890     int32_t i;
   1891 
   1892     for(i=0; i<LENGTHOF(testCases); ++i) {
   1893         doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length);
   1894     }
   1895 }
   1896 
   1897 typedef struct NameRange {
   1898     const char *name;
   1899     UChar32 start, end, start2, end2, notStart, notEnd;
   1900 } NameRange;
   1901 
   1902 static void
   1903 TestUnicodeSet() {
   1904     UErrorCode errorCode;
   1905     UConverter *cnv;
   1906     USet *set;
   1907     const char *name;
   1908     int32_t i, count;
   1909 
   1910     static const char *const completeSetNames[]={
   1911         "UTF-7",
   1912         "UTF-8",
   1913         "UTF-16",
   1914         "UTF-16BE",
   1915         "UTF-16LE",
   1916         "UTF-32",
   1917         "UTF-32BE",
   1918         "UTF-32LE",
   1919         "SCSU",
   1920         "BOCU-1",
   1921         "CESU-8",
   1922 #if !UCONFIG_NO_LEGACY_CONVERSION
   1923         "gb18030",
   1924 #endif
   1925         "IMAP-mailbox-name"
   1926     };
   1927 #if !UCONFIG_NO_LEGACY_CONVERSION
   1928     static const char *const lmbcsNames[]={
   1929         "LMBCS-1",
   1930         "LMBCS-2",
   1931         "LMBCS-3",
   1932         "LMBCS-4",
   1933         "LMBCS-5",
   1934         "LMBCS-6",
   1935         "LMBCS-8",
   1936         "LMBCS-11",
   1937         "LMBCS-16",
   1938         "LMBCS-17",
   1939         "LMBCS-18",
   1940         "LMBCS-19"
   1941     };
   1942 #endif
   1943 
   1944     static const NameRange nameRanges[]={
   1945         { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
   1946 #if !UCONFIG_NO_LEGACY_CONVERSION
   1947         { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
   1948 #endif
   1949         { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff },
   1950 #if !UCONFIG_NO_LEGACY_CONVERSION
   1951         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff },
   1952         { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff },
   1953         /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */
   1954         { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff }
   1955 #else
   1956         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }
   1957 #endif
   1958     };
   1959 
   1960     /* open an empty set */
   1961     set=uset_open(1, 0);
   1962 
   1963     count=ucnv_countAvailable();
   1964     for(i=0; i<count; ++i) {
   1965         errorCode=U_ZERO_ERROR;
   1966         name=ucnv_getAvailableName(i);
   1967         cnv=ucnv_open(name, &errorCode);
   1968         if(U_FAILURE(errorCode)) {
   1969             log_data_err("error: unable to open converter %s - %s\n",
   1970                     name, u_errorName(errorCode));
   1971             continue;
   1972         }
   1973 
   1974         uset_clear(set);
   1975         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
   1976         if(U_FAILURE(errorCode)) {
   1977             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
   1978                     name, u_errorName(errorCode));
   1979         } else if(uset_size(set)==0) {
   1980             log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name);
   1981         }
   1982 
   1983         ucnv_close(cnv);
   1984     }
   1985 
   1986     /* test converters that are known to convert all of Unicode (except maybe for surrogates) */
   1987     for(i=0; i<LENGTHOF(completeSetNames); ++i) {
   1988         errorCode=U_ZERO_ERROR;
   1989         name=completeSetNames[i];
   1990         cnv=ucnv_open(name, &errorCode);
   1991         if(U_FAILURE(errorCode)) {
   1992             log_data_err("error: unable to open converter %s - %s\n",
   1993                     name, u_errorName(errorCode));
   1994             continue;
   1995         }
   1996 
   1997         uset_clear(set);
   1998         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
   1999         if(U_FAILURE(errorCode)) {
   2000             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
   2001                     name, u_errorName(errorCode));
   2002         } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) {
   2003             log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name);
   2004         }
   2005 
   2006         ucnv_close(cnv);
   2007     }
   2008 
   2009 #if !UCONFIG_NO_LEGACY_CONVERSION
   2010     /* test LMBCS variants which convert all of Unicode except for U+F6xx */
   2011     for(i=0; i<LENGTHOF(lmbcsNames); ++i) {
   2012         errorCode=U_ZERO_ERROR;
   2013         name=lmbcsNames[i];
   2014         cnv=ucnv_open(name, &errorCode);
   2015         if(U_FAILURE(errorCode)) {
   2016             log_data_err("error: unable to open converter %s - %s\n",
   2017                     name, u_errorName(errorCode));
   2018             continue;
   2019         }
   2020 
   2021         uset_clear(set);
   2022         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
   2023         if(U_FAILURE(errorCode)) {
   2024             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
   2025                     name, u_errorName(errorCode));
   2026         } else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) {
   2027             log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name);
   2028         }
   2029 
   2030         ucnv_close(cnv);
   2031     }
   2032 #endif
   2033 
   2034     /* test specific sets */
   2035     for(i=0; i<LENGTHOF(nameRanges); ++i) {
   2036         errorCode=U_ZERO_ERROR;
   2037         name=nameRanges[i].name;
   2038         cnv=ucnv_open(name, &errorCode);
   2039         if(U_FAILURE(errorCode)) {
   2040             log_data_err("error: unable to open converter %s - %s\n",
   2041                          name, u_errorName(errorCode));
   2042             continue;
   2043         }
   2044 
   2045         uset_clear(set);
   2046         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
   2047         if(U_FAILURE(errorCode)) {
   2048             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
   2049                     name, u_errorName(errorCode));
   2050         } else if(
   2051             !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) ||
   2052             (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2))
   2053         ) {
   2054             log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name);
   2055         } else if(nameRanges[i].notStart>=0) {
   2056             /* simulate containsAny() with the C API */
   2057             uset_complement(set);
   2058             if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) {
   2059                 log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name);
   2060             }
   2061         }
   2062 
   2063         ucnv_close(cnv);
   2064     }
   2065 
   2066     errorCode = U_ZERO_ERROR;
   2067     ucnv_getUnicodeSet(NULL, set, UCNV_ROUNDTRIP_SET, &errorCode);
   2068     if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
   2069         log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
   2070     }
   2071     errorCode = U_PARSE_ERROR;
   2072     /* Make sure that it does nothing if an error is passed in. Difficult to proper test for. */
   2073     ucnv_getUnicodeSet(NULL, NULL, UCNV_ROUNDTRIP_SET, &errorCode);
   2074     if (errorCode != U_PARSE_ERROR) {
   2075         log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
   2076     }
   2077 
   2078     uset_close(set);
   2079 }
   2080