Home | History | Annotate | Download | only in cintltst
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2015, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 /*****************************************************************************
      7 *
      8 * File ncnvtst.c
      9 *
     10 * Modification History:
     11 *        Name                     Description
     12 *   Madhu Katragadda              7/7/2000        Converter Tests for extended code coverage
     13 ******************************************************************************
     14 */
     15 #include <stdio.h>
     16 #include <stdlib.h>
     17 #include <string.h>
     18 #include "unicode/uloc.h"
     19 #include "unicode/ucnv.h"
     20 #include "unicode/utypes.h"
     21 #include "unicode/ustring.h"
     22 #include "unicode/uset.h"
     23 #include "cintltst.h"
     24 #include "cmemory.h"
     25 
     26 #define MAX_LENGTH 999
     27 
     28 #define UNICODE_LIMIT 0x10FFFF
     29 #define SURROGATE_HIGH_START    0xD800
     30 #define SURROGATE_LOW_END       0xDFFF
     31 
     32 static int32_t  gInBufferSize = 0;
     33 static int32_t  gOutBufferSize = 0;
     34 static char     gNuConvTestName[1024];
     35 
     36 #define nct_min(x,y)  ((x<y) ? x : y)
     37 
     38 static void printSeq(const unsigned char* a, int len);
     39 static void printSeqErr(const unsigned char* a, int len);
     40 static void printUSeq(const UChar* a, int len);
     41 static void printUSeqErr(const UChar* a, int len);
     42 static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
     43                 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
     44 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
     45                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
     46 
     47 static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
     48                 const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset);
     49 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
     50                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset);
     51 
     52 static void setNuConvTestName(const char *codepage, const char *direction)
     53 {
     54     sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
     55         codepage,
     56         direction,
     57         (int)gInBufferSize,
     58         (int)gOutBufferSize);
     59 }
     60 
     61 
     62 static void TestSurrogateBehaviour(void);
     63 static void TestErrorBehaviour(void);
     64 
     65 #if !UCONFIG_NO_LEGACY_CONVERSION
     66 static void TestToUnicodeErrorBehaviour(void);
     67 static void TestGetNextErrorBehaviour(void);
     68 #endif
     69 
     70 static void TestRegressionUTF8(void);
     71 static void TestRegressionUTF32(void);
     72 static void TestAvailableConverters(void);
     73 static void TestFlushInternalBuffer(void);  /*for improved code coverage in ucnv_cnv.c*/
     74 static void TestResetBehaviour(void);
     75 static void TestTruncated(void);
     76 static void TestUnicodeSet(void);
     77 
     78 static void TestWithBufferSize(int32_t osize, int32_t isize);
     79 
     80 
     81 static void printSeq(const unsigned char* a, int len)
     82 {
     83     int i=0;
     84     log_verbose("\n{");
     85     while (i<len)
     86         log_verbose("0x%02X ", a[i++]);
     87     log_verbose("}\n");
     88 }
     89 
     90 static void printUSeq(const UChar* a, int len)
     91 {
     92     int i=0;
     93     log_verbose("\n{");
     94     while (i<len)
     95         log_verbose("%0x04X ", a[i++]);
     96     log_verbose("}\n");
     97 }
     98 
     99 static void printSeqErr(const unsigned char* a, int len)
    100 {
    101     int i=0;
    102     fprintf(stderr, "\n{");
    103     while (i<len)  fprintf(stderr, "0x%02X ", a[i++]);
    104     fprintf(stderr, "}\n");
    105 }
    106 
    107 static void printUSeqErr(const UChar* a, int len)
    108 {
    109     int i=0;
    110     fprintf(stderr, "\n{");
    111     while (i<len)
    112         fprintf(stderr, "0x%04X ", a[i++]);
    113     fprintf(stderr,"}\n");
    114 }
    115 
    116 void addExtraTests(TestNode** root);
    117 
    118 void addExtraTests(TestNode** root)
    119 {
    120      addTest(root, &TestSurrogateBehaviour,         "tsconv/ncnvtst/TestSurrogateBehaviour");
    121      addTest(root, &TestErrorBehaviour,             "tsconv/ncnvtst/TestErrorBehaviour");
    122 
    123 #if !UCONFIG_NO_LEGACY_CONVERSION
    124      addTest(root, &TestToUnicodeErrorBehaviour,    "tsconv/ncnvtst/ToUnicodeErrorBehaviour");
    125      addTest(root, &TestGetNextErrorBehaviour,      "tsconv/ncnvtst/TestGetNextErrorBehaviour");
    126 #endif
    127 
    128      addTest(root, &TestAvailableConverters,        "tsconv/ncnvtst/TestAvailableConverters");
    129      addTest(root, &TestFlushInternalBuffer,        "tsconv/ncnvtst/TestFlushInternalBuffer");
    130      addTest(root, &TestResetBehaviour,             "tsconv/ncnvtst/TestResetBehaviour");
    131      addTest(root, &TestRegressionUTF8,             "tsconv/ncnvtst/TestRegressionUTF8");
    132      addTest(root, &TestRegressionUTF32,            "tsconv/ncnvtst/TestRegressionUTF32");
    133      addTest(root, &TestTruncated,                  "tsconv/ncnvtst/TestTruncated");
    134      addTest(root, &TestUnicodeSet,                 "tsconv/ncnvtst/TestUnicodeSet");
    135 }
    136 
    137 /*test surrogate behaviour*/
    138 static void TestSurrogateBehaviour(){
    139     log_verbose("Testing for SBCS and LATIN_1\n");
    140     {
    141         UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032};
    142         const uint8_t expected[] = {0x31, 0x1a, 0x32};
    143 
    144 #if !UCONFIG_NO_LEGACY_CONVERSION
    145         /*SBCS*/
    146         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    147                 expected, sizeof(expected), "ibm-920", 0 , TRUE, U_ZERO_ERROR))
    148             log_err("u-> ibm-920 [UCNV_SBCS] not match.\n");
    149 #endif
    150 
    151         /*LATIN_1*/
    152         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    153                 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR ))
    154             log_err("u-> LATIN_1 not match.\n");
    155 
    156     }
    157 
    158 #if !UCONFIG_NO_LEGACY_CONVERSION
    159     log_verbose("Testing for DBCS and MBCS\n");
    160     {
    161         UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
    162         const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
    163         int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
    164 
    165         /*DBCS*/
    166         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    167                 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR))
    168             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
    169         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    170                 expected, sizeof(expected), "ibm-1363", offsets , TRUE, U_ZERO_ERROR))
    171             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
    172         /*MBCS*/
    173         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    174                 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR))
    175             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
    176         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    177                 expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_ZERO_ERROR))
    178             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
    179     }
    180 
    181     log_verbose("Testing for ISO-2022-jp\n");
    182     {
    183         UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
    184 
    185         const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
    186                                     0x31,0x1A, 0x32};
    187 
    188 
    189         int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
    190 
    191         // iso-2022-jp  android-change
    192         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    193                 expected, sizeof(expected), "iso-2022-jp", 0 , TRUE, U_ZERO_ERROR))
    194             log_err("u-> not match.\n");
    195         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    196                 expected, sizeof(expected), "iso-2022-jp", offsets , TRUE, U_ZERO_ERROR))
    197             log_err("u->  not match.\n");
    198     }
    199 
    200    /* BEGIN android-removed */
    201    /* To save space, Android does not build full ISO-2022-CN tables.
    202       We skip the tests for ISO-2022-CN. */
    203    /*
    204     log_verbose("Testing for ISO-2022-cn\n");
    205     {
    206         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
    207 
    208         static const uint8_t expected[] = {
    209                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
    210                                     0x36, 0x21,
    211                                     0x0F, 0x31,
    212                                     0x1A,
    213                                     0x32
    214                                     };
    215 
    216 
    217 
    218         static const int32_t offsets[] = {
    219                                     0,    0,    0,    0,    0,    0,    0,
    220                                     1,    1,
    221                                     2,    2,
    222                                     3,
    223                                     5,  };
    224 
    225         // iso-2022-CN  android-change
    226         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    227                 expected, sizeof(expected), "iso-2022-cn", 0 , TRUE, U_ZERO_ERROR))
    228             log_err("u-> not match.\n");
    229         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    230                 expected, sizeof(expected), "iso-2022-cn", offsets , TRUE, U_ZERO_ERROR))
    231             log_err("u-> not match.\n");
    232     }
    233     */
    234     /* END android-removed */
    235 
    236         log_verbose("Testing for ISO-2022-kr\n");
    237     {
    238         static const UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
    239 
    240         static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
    241                                     0x0E, 0x6C, 0x69,
    242                                     0x0f, 0x1A,
    243                                     0x0e, 0x6F, 0x4B,
    244                                     0x0F, 0x31,
    245                                     0x1A,
    246                                     0x32 };
    247 
    248         static const int32_t offsets[] = {-1, -1, -1, -1,
    249                               0, 0, 0,
    250                               1, 1,
    251                               3, 3, 3,
    252                               4, 4,
    253                               5,
    254                               7,
    255                             };
    256 
    257         // iso-2022-kr  android-change
    258         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    259                 expected, sizeof(expected), "iso-2022-kr", 0 , TRUE, U_ZERO_ERROR))
    260             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
    261         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    262                 expected, sizeof(expected), "iso-2022-kr", offsets , TRUE, U_ZERO_ERROR))
    263             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
    264     }
    265 
    266         log_verbose("Testing for HZ\n");
    267     {
    268         static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
    269 
    270         static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
    271                                     0x7E, 0x7D, 0x1A,
    272                                     0x7E, 0x7B, 0x36, 0x21,
    273                                     0x7E, 0x7D, 0x31,
    274                                     0x1A,
    275                                     0x32 };
    276 
    277 
    278         static const int32_t offsets[] = {0,0,0,0,
    279                              1,1,1,
    280                              3,3,3,3,
    281                              4,4,4,
    282                              5,
    283                              7,};
    284 
    285         /*hz*/
    286         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    287                 expected, sizeof(expected), "HZ", 0 , TRUE, U_ZERO_ERROR))
    288             log_err("u-> HZ not match.\n");
    289         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    290                 expected, sizeof(expected), "HZ", offsets , TRUE, U_ZERO_ERROR))
    291             log_err("u-> HZ not match.\n");
    292     }
    293 #endif
    294 
    295     /*UTF-8*/
    296      log_verbose("Testing for UTF8\n");
    297     {
    298         static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
    299         static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
    300                            0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
    301                            0x04, 0x06 };
    302         static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
    303             0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
    304 
    305 
    306         static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
    307         /*UTF-8*/
    308         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    309             expected, sizeof(expected), "UTF8", offsets, TRUE, U_ZERO_ERROR ))
    310             log_err("u-> UTF8 with offsets and flush true did not match.\n");
    311         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    312             expected, sizeof(expected), "UTF8", 0, TRUE, U_ZERO_ERROR ))
    313             log_err("u-> UTF8 with offsets and flush true did not match.\n");
    314         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    315             expected, sizeof(expected), "UTF8", offsets, FALSE, U_ZERO_ERROR ))
    316             log_err("u-> UTF8 with offsets and flush true did not match.\n");
    317         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    318             expected, sizeof(expected), "UTF8", 0, FALSE, U_ZERO_ERROR ))
    319             log_err("u-> UTF8 with offsets and flush true did not match.\n");
    320 
    321         if(!convertToU(expected, sizeof(expected),
    322             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, TRUE, U_ZERO_ERROR ))
    323             log_err("UTF8 -> u did not match.\n");
    324         if(!convertToU(expected, sizeof(expected),
    325             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, FALSE, U_ZERO_ERROR ))
    326             log_err("UTF8 -> u did not match.\n");
    327         if(!convertToU(expected, sizeof(expected),
    328             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR ))
    329             log_err("UTF8 ->u  did not match.\n");
    330         if(!convertToU(expected, sizeof(expected),
    331             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR ))
    332             log_err("UTF8 -> u did not match.\n");
    333 
    334     }
    335 }
    336 
    337 /*test various error behaviours*/
    338 static void TestErrorBehaviour(){
    339     log_verbose("Testing for SBCS and LATIN_1\n");
    340     {
    341         static const UChar    sampleText[] =   { 0x0031, 0xd801};
    342         static const UChar    sampleText2[] =   { 0x0031, 0xd801, 0x0032};
    343         static const uint8_t expected0[] =          { 0x31};
    344         static const uint8_t expected[] =          { 0x31, 0x1a};
    345         static const uint8_t expected2[] =         { 0x31, 0x1a, 0x32};
    346 
    347 #if !UCONFIG_NO_LEGACY_CONVERSION
    348         /*SBCS*/
    349         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    350                 expected, sizeof(expected), "ibm-920", 0, TRUE, U_ZERO_ERROR))
    351             log_err("u-> ibm-920 [UCNV_SBCS] \n");
    352         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    353                 expected0, sizeof(expected0), "ibm-920", 0, FALSE, U_ZERO_ERROR))
    354             log_err("u-> ibm-920 [UCNV_SBCS] \n");
    355         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    356                 expected2, sizeof(expected2), "ibm-920", 0, TRUE, U_ZERO_ERROR))
    357             log_err("u-> ibm-920 [UCNV_SBCS] did not match\n");
    358 #endif
    359 
    360         /*LATIN_1*/
    361         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    362                 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR))
    363             log_err("u-> LATIN_1 is supposed to fail\n");
    364         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    365                 expected0, sizeof(expected0), "LATIN_1", 0, FALSE, U_ZERO_ERROR))
    366             log_err("u-> LATIN_1 is supposed to fail\n");
    367 
    368         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    369                 expected2, sizeof(expected2), "LATIN_1", 0, TRUE, U_ZERO_ERROR))
    370             log_err("u-> LATIN_1 did not match\n");
    371     }
    372 
    373 #if !UCONFIG_NO_LEGACY_CONVERSION
    374     log_verbose("Testing for DBCS and MBCS\n");
    375     {
    376         static const UChar    sampleText[]    = { 0x00a1, 0xd801};
    377         static const uint8_t expected[] = { 0xa2, 0xae};
    378         static const int32_t offsets[]        = { 0x00, 0x00};
    379         static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0};
    380         static const int32_t offsetsSUB[]        = { 0x00, 0x00, 0x01, 0x01};
    381 
    382         static const UChar       sampleText2[] = { 0x00a1, 0xd801, 0x00a4};
    383         static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
    384         static const int32_t offsets2[]        = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02};
    385 
    386         static const UChar       sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01};
    387         static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0};
    388         static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x01, 0x02, 0x02};
    389 
    390         static const UChar       sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01};
    391         static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe};
    392         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 };
    393 
    394         /*DBCS*/
    395         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    396                 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
    397             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
    398         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    399                 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING))
    400             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
    401 
    402         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    403                 expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, TRUE, U_ZERO_ERROR))
    404             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
    405         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    406                 expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_AMBIGUOUS_ALIAS_WARNING))
    407             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
    408 
    409 
    410         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    411                 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
    412             log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
    413         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    414                 expected2, sizeof(expected2), "ibm-1363", offsets2, TRUE, U_ZERO_ERROR))
    415             log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
    416 
    417         /*MBCS*/
    418         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    419                 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
    420             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    421         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    422                 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING))
    423             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    424 
    425         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    426                 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR))
    427             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
    428         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    429                 expected2, sizeof(expected2), "ibm-1363", 0, FALSE, U_ZERO_ERROR))
    430             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
    431         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    432                 expected2, sizeof(expected2), "ibm-1363", offsets2, FALSE, U_ZERO_ERROR))
    433             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
    434 
    435         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    436                 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, TRUE, U_ZERO_ERROR))
    437             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    438         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    439                 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, FALSE, U_ZERO_ERROR))
    440             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    441 
    442         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    443                 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, TRUE, U_ZERO_ERROR))
    444             log_err("u-> euc-jp [UCNV_MBCS] \n");
    445         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    446                 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, FALSE, U_ZERO_ERROR))
    447             log_err("u-> euc-jp [UCNV_MBCS] \n");
    448     }
    449 
    450     // iso-2022-jp  android-change
    451     log_verbose("Testing for iso-2022-jp\n");
    452     {
    453         static const UChar    sampleText[]    = { 0x0031, 0xd801};
    454         static const uint8_t expected[] = {  0x31};
    455         static const uint8_t expectedSUB[] = {  0x31, 0x1a};
    456         static const int32_t offsets[]        = { 0x00, 1};
    457 
    458         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
    459         static const uint8_t expected2[] = {  0x31,0x1A,0x32};
    460         static const int32_t offsets2[]        = { 0x00,0x01,0x02};
    461 
    462         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
    463         static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a};
    464         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 };
    465         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    466                 expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, TRUE, U_ZERO_ERROR))
    467             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
    468         // Google Patch: Change expected result code from U_AMBIGUOUS_ALIAS_WARNING to U_ZERO_ERROR.
    469         //               Introduced with ICU 51.1.
    470         //               Markus says this warning can occur when the set of available converters is changed,
    471         //               and that it's not worth looking into in further detail.
    472         //               Note: public ICU was U_ZERO_ERROR prior to ICU 51.
    473         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    474                 expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_ZERO_ERROR))
    475             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
    476         // End of Google Patch.
    477 
    478         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    479                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, TRUE, U_ZERO_ERROR))
    480             log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n");
    481         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    482                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
    483             log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
    484         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    485                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
    486             log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
    487 
    488         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    489                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR))
    490             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
    491         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    492                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, FALSE, U_ZERO_ERROR))
    493             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
    494     }
    495 
    496     /* BEGIN android-removed */
    497     /* To save space, Android does not build full ISO-2022-CN tables.
    498        We skip the tests for ISO-2022-CN. */
    499     /*
    500     // iso-2022-cn  android-change
    501     log_verbose("Testing for iso-2022-cn\n");
    502     {
    503         static const UChar    sampleText[]    = { 0x0031, 0xd801};
    504         static const uint8_t expected[] = { 0x31};
    505         static const uint8_t expectedSUB[] = { 0x31, 0x1A};
    506         static const int32_t offsets[]        = { 0x00, 1};
    507 
    508         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
    509         static const uint8_t expected2[] = { 0x31, 0x1A,0x32};
    510         static const int32_t offsets2[]        = { 0x00, 0x01,0x02};
    511 
    512         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
    513         static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A};
    514         static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x02 };
    515 
    516         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
    517         static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a };
    518         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 };
    519         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    520                 expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, TRUE, U_ZERO_ERROR))
    521             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
    522         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    523                 expected, sizeof(expected), "iso-2022-cn", offsets, FALSE, U_ZERO_ERROR))
    524             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    525 
    526         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    527                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, TRUE, U_ZERO_ERROR))
    528             log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n");
    529         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    530                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR))
    531             log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
    532         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    533                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR))
    534             log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
    535 
    536         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    537                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, TRUE, U_ZERO_ERROR))
    538             log_err("u->iso-2022-cn [UCNV_MBCS] \n");
    539         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    540                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, FALSE, U_ZERO_ERROR))
    541             log_err("u-> iso-2022-cn[UCNV_MBCS] \n");
    542 
    543         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    544                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, TRUE, U_ZERO_ERROR))
    545             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
    546         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    547                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, FALSE, U_ZERO_ERROR))
    548             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
    549     }
    550     */
    551     /* END android-removed */
    552 
    553     // iso-2022-kr  android-change
    554     log_verbose("Testing for iso-2022-kr\n");
    555     {
    556         static const UChar    sampleText[]    = { 0x0031, 0xd801};
    557         static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31};
    558         static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A};
    559         static const int32_t offsets[]        = { -1,   -1,   -1,   -1,   0x00, 1};
    560 
    561         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
    562         static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32};
    563         static const int32_t offsets2[]        = { -1,   -1,   -1,   -1,   0x00, 0x01, 0x02};
    564 
    565         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
    566         static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43,  0x51, 0x50, 0x1A };
    567         static const int32_t offsets3MBCS[]        = { -1,   -1,   -1,   -1,    0x00, 0x01, 0x02, 0x02 };
    568 
    569         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    570                 expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR))
    571             log_err("u-> iso-2022-kr [UCNV_MBCS] \n");
    572         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    573                 expected, sizeof(expected), "iso-2022-kr", offsets, FALSE, U_ZERO_ERROR))
    574             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    575 
    576         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    577                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, TRUE, U_ZERO_ERROR))
    578             log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n");
    579         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    580                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR))
    581             log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
    582         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    583                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR))
    584             log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
    585 
    586         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    587                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, TRUE, U_ZERO_ERROR))
    588             log_err("u->iso-2022-kr [UCNV_MBCS] \n");
    589         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    590                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR))
    591             log_err("u-> iso-2022-kr[UCNV_MBCS] \n");
    592     }
    593 
    594     /*HZ*/
    595     log_verbose("Testing for HZ\n");
    596     {
    597         static const UChar    sampleText[]    = { 0x0031, 0xd801};
    598         static const uint8_t expected[] = { 0x7e, 0x7d, 0x31};
    599         static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A};
    600         static const int32_t offsets[]        = { 0x00, 0x00, 0x00, 1};
    601 
    602         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
    603         static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31,  0x1A,  0x32 };
    604         static const int32_t offsets2[]        = { 0x00, 0x00, 0x00, 0x01,  0x02 };
    605 
    606         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
    607         static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50,  0x1A };
    608         static const int32_t offsets3MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x02};
    609 
    610         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
    611         static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a };
    612         static const int32_t offsets4MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 };
    613         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    614                 expectedSUB, sizeof(expectedSUB), "HZ", offsets, TRUE, U_ZERO_ERROR))
    615             log_err("u-> HZ [UCNV_MBCS] \n");
    616         if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    617                 expected, sizeof(expected), "HZ", offsets, FALSE, U_ZERO_ERROR))
    618             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
    619 
    620         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    621                 expected2, sizeof(expected2), "HZ", offsets2, TRUE, U_ZERO_ERROR))
    622             log_err("u->HZ[UCNV_DBCS] did not match\n");
    623         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    624                 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR))
    625             log_err("u-> HZ [UCNV_DBCS] did not match\n");
    626         if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
    627                 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR))
    628             log_err("u-> HZ [UCNV_DBCS] did not match\n");
    629 
    630         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    631                 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, TRUE, U_ZERO_ERROR))
    632             log_err("u->HZ [UCNV_MBCS] \n");
    633         if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
    634                 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, FALSE, U_ZERO_ERROR))
    635             log_err("u-> HZ[UCNV_MBCS] \n");
    636 
    637         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    638                 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, TRUE, U_ZERO_ERROR))
    639             log_err("u-> HZ [UCNV_MBCS] \n");
    640         if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
    641                 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, FALSE, U_ZERO_ERROR))
    642             log_err("u-> HZ [UCNV_MBCS] \n");
    643     }
    644 #endif
    645 }
    646 
    647 #if !UCONFIG_NO_LEGACY_CONVERSION
    648 /*test different convertToUnicode error behaviours*/
    649 static void TestToUnicodeErrorBehaviour()
    650 {
    651     log_verbose("Testing error conditions for DBCS\n");
    652     {
    653         uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04};
    654         const UChar expected[] = { 0x00a1 };
    655 
    656         if(!convertToU(sampleText, sizeof(sampleText),
    657                 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, TRUE, U_AMBIGUOUS_ALIAS_WARNING ))
    658             log_err("DBCS (ibm-1363)->Unicode  did not match.\n");
    659         if(!convertToU(sampleText, sizeof(sampleText),
    660                 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING ))
    661             log_err("DBCS (ibm-1363)->Unicode  with flush = false did not match.\n");
    662     }
    663     log_verbose("Testing error conditions for SBCS\n");
    664     {
    665         uint8_t sampleText[] = { 0xa2, 0xFF};
    666         const UChar expected[] = { 0x00c2 };
    667 
    668       /*  uint8_t sampleText2[] = { 0xa2, 0x70 };
    669         const UChar expected2[] = { 0x0073 };*/
    670 
    671         if(!convertToU(sampleText, sizeof(sampleText),
    672                 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, TRUE, U_ZERO_ERROR ))
    673             log_err("SBCS (ibm-1051)->Unicode  did not match.\n");
    674         if(!convertToU(sampleText, sizeof(sampleText),
    675                 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, FALSE, U_ZERO_ERROR ))
    676             log_err("SBCS (ibm-1051)->Unicode  with flush = false did not match.\n");
    677 
    678     }
    679 }
    680 
    681 static void TestGetNextErrorBehaviour(){
    682    /*Test for unassigned character*/
    683 #define INPUT_SIZE 1
    684     static const char input1[INPUT_SIZE]={ 0x70 };
    685     const char* source=(const char*)input1;
    686     UErrorCode err=U_ZERO_ERROR;
    687     UChar32 c=0;
    688     UConverter *cnv=ucnv_open("ibm-424", &err);
    689     if(U_FAILURE(err)) {
    690         log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err));
    691         return;
    692     }
    693     c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err);
    694     if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){
    695         log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n",  myErrorName(err), c);
    696     }
    697     ucnv_close(cnv);
    698 }
    699 #endif
    700 
    701 #define MAX_UTF16_LEN 2
    702 #define MAX_UTF8_LEN 4
    703 
    704 /*Regression test for utf8 converter*/
    705 static void TestRegressionUTF8(){
    706     UChar32 currCh = 0;
    707     int32_t offset8;
    708     int32_t offset16;
    709     UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
    710     uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH);
    711 
    712     while (currCh <= UNICODE_LIMIT) {
    713         offset16 = 0;
    714         offset8 = 0;
    715         while(currCh <= UNICODE_LIMIT
    716             && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN)
    717             && offset8 < (MAX_LENGTH - MAX_UTF8_LEN))
    718         {
    719             if (currCh == SURROGATE_HIGH_START) {
    720                 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
    721             }
    722             UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh);
    723             UTF8_APPEND_CHAR_SAFE(utf8, offset8, MAX_LENGTH, currCh);
    724             currCh++;
    725         }
    726         if(!convertFromU(standardForm, offset16,
    727             utf8, offset8, "UTF8", 0, TRUE, U_ZERO_ERROR )) {
    728             log_err("Unicode->UTF8 did not match.\n");
    729         }
    730         if(!convertToU(utf8, offset8,
    731             standardForm, offset16, "UTF8", 0, TRUE, U_ZERO_ERROR )) {
    732             log_err("UTF8->Unicode did not match.\n");
    733         }
    734     }
    735 
    736     free(standardForm);
    737     free(utf8);
    738 
    739     {
    740         static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 };
    741         static const UChar expected[] = { 0x0301, 0x0300 };
    742         UConverter *conv8;
    743         UErrorCode err = U_ZERO_ERROR;
    744         UChar pivotBuffer[100];
    745         const UChar* const pivEnd = pivotBuffer + 100;
    746         const char* srcBeg;
    747         const char* srcEnd;
    748         UChar* pivBeg;
    749 
    750         conv8 = ucnv_open("UTF-8", &err);
    751 
    752         srcBeg = src8;
    753         pivBeg = pivotBuffer;
    754         srcEnd = src8 + 3;
    755         ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
    756         if (srcBeg != srcEnd) {
    757             log_err("Did not consume whole buffer on first call.\n");
    758         }
    759 
    760         srcEnd = src8 + 4;
    761         ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
    762         if (srcBeg != srcEnd) {
    763             log_err("Did not consume whole buffer on second call.\n");
    764         }
    765 
    766         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
    767             log_err("Did not get expected results for UTF-8.\n");
    768         }
    769         ucnv_close(conv8);
    770     }
    771 }
    772 
    773 #define MAX_UTF32_LEN 1
    774 
    775 static void TestRegressionUTF32(){
    776 #if !UCONFIG_ONLY_HTML_CONVERSION
    777     UChar32 currCh = 0;
    778     int32_t offset32;
    779     int32_t offset16;
    780     UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
    781     UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32));
    782 
    783     while (currCh <= UNICODE_LIMIT) {
    784         offset16 = 0;
    785         offset32 = 0;
    786         while(currCh <= UNICODE_LIMIT
    787             && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN)
    788             && offset32 < (MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN))
    789         {
    790             if (currCh == SURROGATE_HIGH_START) {
    791                 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
    792             }
    793             UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh);
    794             UTF32_APPEND_CHAR_SAFE(utf32, offset32, MAX_LENGTH, currCh);
    795             currCh++;
    796         }
    797         if(!convertFromU(standardForm, offset16,
    798             (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) {
    799             log_err("Unicode->UTF32 did not match.\n");
    800         }
    801         if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32),
    802             standardForm, offset16, "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) {
    803             log_err("UTF32->Unicode did not match.\n");
    804         }
    805     }
    806     free(standardForm);
    807     free(utf32);
    808 
    809     {
    810         /* Check for lone surrogate error handling. */
    811         static const UChar   sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 };
    812         static const UChar   sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 };
    813         static const uint8_t expectedUTF32BE[] = {
    814             0x00, 0x00, 0x00, 0x31,
    815             0x00, 0x00, 0xff, 0xfd,
    816             0x00, 0x00, 0x00, 0x32
    817         };
    818         static const uint8_t expectedUTF32LE[] = {
    819             0x31, 0x00, 0x00, 0x00,
    820             0xfd, 0xff, 0x00, 0x00,
    821             0x32, 0x00, 0x00, 0x00
    822         };
    823         static const int32_t offsetsUTF32[] = {
    824             0x00, 0x00, 0x00, 0x00,
    825             0x01, 0x01, 0x01, 0x01,
    826             0x02, 0x02, 0x02, 0x02
    827         };
    828 
    829         if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]),
    830                 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR))
    831             log_err("u->UTF-32BE\n");
    832         if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]),
    833                 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR))
    834             log_err("u->UTF-32BE\n");
    835 
    836         if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]),
    837                 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR))
    838             log_err("u->UTF-32LE\n");
    839         if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]),
    840                 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR))
    841             log_err("u->UTF-32LE\n");
    842     }
    843 
    844     {
    845         static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 };
    846         static const UChar expected[] = { 0x0031, 0x0030 };
    847         UConverter *convBE;
    848         UErrorCode err = U_ZERO_ERROR;
    849         UChar pivotBuffer[100];
    850         const UChar* const pivEnd = pivotBuffer + 100;
    851         const char* srcBeg;
    852         const char* srcEnd;
    853         UChar* pivBeg;
    854 
    855         convBE = ucnv_open("UTF-32BE", &err);
    856 
    857         srcBeg = srcBE;
    858         pivBeg = pivotBuffer;
    859         srcEnd = srcBE + 5;
    860         ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
    861         if (srcBeg != srcEnd) {
    862             log_err("Did not consume whole buffer on first call.\n");
    863         }
    864 
    865         srcEnd = srcBE + 8;
    866         ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
    867         if (srcBeg != srcEnd) {
    868             log_err("Did not consume whole buffer on second call.\n");
    869         }
    870 
    871         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
    872             log_err("Did not get expected results for UTF-32BE.\n");
    873         }
    874         ucnv_close(convBE);
    875     }
    876     {
    877         static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 };
    878         static const UChar expected[] = { 0x0031, 0x0030 };
    879         UConverter *convLE;
    880         UErrorCode err = U_ZERO_ERROR;
    881         UChar pivotBuffer[100];
    882         const UChar* const pivEnd = pivotBuffer + 100;
    883         const char* srcBeg;
    884         const char* srcEnd;
    885         UChar* pivBeg;
    886 
    887         convLE = ucnv_open("UTF-32LE", &err);
    888 
    889         srcBeg = srcLE;
    890         pivBeg = pivotBuffer;
    891         srcEnd = srcLE + 5;
    892         ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err);
    893         if (srcBeg != srcEnd) {
    894             log_err("Did not consume whole buffer on first call.\n");
    895         }
    896 
    897         srcEnd = srcLE + 8;
    898         ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err);
    899         if (srcBeg != srcEnd) {
    900             log_err("Did not consume whole buffer on second call.\n");
    901         }
    902 
    903         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
    904             log_err("Did not get expected results for UTF-32LE.\n");
    905         }
    906         ucnv_close(convLE);
    907     }
    908 #endif
    909 }
    910 
    911 /*Walk through the available converters*/
    912 static void TestAvailableConverters(){
    913     UErrorCode status=U_ZERO_ERROR;
    914     UConverter *conv=NULL;
    915     int32_t i=0;
    916     for(i=0; i < ucnv_countAvailable(); i++){
    917         status=U_ZERO_ERROR;
    918         conv=ucnv_open(ucnv_getAvailableName(i), &status);
    919         if(U_FAILURE(status)){
    920             log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n",
    921                         ucnv_getAvailableName(i), myErrorName(status));
    922             continue;
    923         }
    924         ucnv_close(conv);
    925     }
    926 
    927 }
    928 
    929 static void TestFlushInternalBuffer(){
    930     TestWithBufferSize(MAX_LENGTH, 1);
    931     TestWithBufferSize(1, 1);
    932     TestWithBufferSize(1, MAX_LENGTH);
    933     TestWithBufferSize(MAX_LENGTH, MAX_LENGTH);
    934 }
    935 
    936 static void TestWithBufferSize(int32_t insize, int32_t outsize){
    937 
    938     gInBufferSize =insize;
    939     gOutBufferSize = outsize;
    940 
    941      log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
    942     {
    943         UChar    sampleText[] =
    944             { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09,  0x002E  };
    945         const uint8_t expectedUTF8[] =
    946             { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
    947         int32_t  toUTF8Offs[] =
    948             { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
    949        /* int32_t fmUTF8Offs[] =
    950             { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/
    951 
    952         /*UTF-8*/
    953         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
    954             expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,FALSE))
    955              log_err("u-> UTF8 did not match.\n");
    956     }
    957 
    958 #if !UCONFIG_NO_LEGACY_CONVERSION
    959      log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE  \n");
    960     {
    961         UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
    962         const uint8_t toIBM943[]= { 0x61,
    963             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
    964             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
    965             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
    966             0x61 };
    967         int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
    968 
    969         if(!testConvertFromU(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
    970                 toIBM943, sizeof(toIBM943), "ibm-943",
    971                 (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,FALSE))
    972             log_err("u-> ibm-943 with subst with value did not match.\n");
    973     }
    974 #endif
    975 
    976      log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
    977     {
    978         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
    979             0xe0, 0x80,  0x61};
    980         UChar    expected1[] = {  0x0031, 0x4e8c, 0xfffd, 0x0061};
    981         int32_t offsets1[] = {   0x0000, 0x0001, 0x0004, 0x0006};
    982 
    983         if(!testConvertToU(sampleText1, sizeof(sampleText1),
    984                  expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE))
    985             log_err("utf8->u with substitute did not match.\n");;
    986     }
    987 
    988 #if !UCONFIG_NO_LEGACY_CONVERSION
    989     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
    990     /*to Unicode*/
    991     {
    992         const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
    993             0x81, 0xad, /*unassigned*/
    994             0x89, 0xd3 };
    995         UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
    996             0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
    997             0x7B87};
    998         int32_t  fromIBM943Offs [] =    { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
    999 
   1000         if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU),
   1001                  IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943",
   1002                 (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,FALSE))
   1003             log_err("ibm-943->u with substitute with value did not match.\n");
   1004 
   1005     }
   1006 #endif
   1007 }
   1008 
   1009 static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
   1010                 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
   1011 {
   1012 
   1013     int32_t i=0;
   1014     char *p=0;
   1015     const UChar *src;
   1016     char buffer[MAX_LENGTH];
   1017     int32_t offsetBuffer[MAX_LENGTH];
   1018     int32_t *offs=0;
   1019     char *targ;
   1020     char *targetLimit;
   1021     UChar *sourceLimit=0;
   1022     UErrorCode status = U_ZERO_ERROR;
   1023     UConverter *conv = 0;
   1024     conv = ucnv_open(codepage, &status);
   1025     if(U_FAILURE(status))
   1026     {
   1027         log_data_err("Couldn't open converter %s\n",codepage);
   1028         return TRUE;
   1029     }
   1030     log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
   1031 
   1032     for(i=0; i<MAX_LENGTH; i++){
   1033         buffer[i]=(char)0xF0;
   1034         offsetBuffer[i]=0xFF;
   1035     }
   1036 
   1037     src=source;
   1038     sourceLimit=(UChar*)src+(sourceLen);
   1039     targ=buffer;
   1040     targetLimit=targ+MAX_LENGTH;
   1041     offs=offsetBuffer;
   1042     ucnv_fromUnicode (conv,
   1043                   (char **)&targ,
   1044                   (const char *)targetLimit,
   1045                   &src,
   1046                   sourceLimit,
   1047                   expectOffsets ? offs : NULL,
   1048                   doFlush,
   1049                   &status);
   1050     ucnv_close(conv);
   1051     if(status != expectedStatus){
   1052           log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
   1053           return FALSE;
   1054     }
   1055 
   1056     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
   1057         sourceLen, targ-buffer);
   1058 
   1059     if(expectLen != targ-buffer)
   1060     {
   1061         log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
   1062         log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
   1063         printSeqErr((const unsigned char *)buffer, (int32_t)(targ-buffer));
   1064         printSeqErr((const unsigned char*)expect, expectLen);
   1065         return FALSE;
   1066     }
   1067 
   1068     if(memcmp(buffer, expect, expectLen)){
   1069         log_err("String does not match. FROM Unicode to codePage%s\n", codepage);
   1070         log_info("\nGot:");
   1071         printSeqErr((const unsigned char *)buffer, expectLen);
   1072         log_info("\nExpected:");
   1073         printSeqErr((const unsigned char *)expect, expectLen);
   1074         return FALSE;
   1075     }
   1076     else {
   1077         log_verbose("Matches!\n");
   1078     }
   1079 
   1080     if (expectOffsets != 0){
   1081         log_verbose("comparing %d offsets..\n", targ-buffer);
   1082         if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){
   1083             log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage);
   1084             log_info("\nGot  : ");
   1085             printSeqErr((const unsigned char*)buffer, (int32_t)(targ-buffer));
   1086             for(p=buffer;p<targ;p++)
   1087                 log_info("%d, ", offsetBuffer[p-buffer]);
   1088             log_info("\nExpected: ");
   1089             for(i=0; i< (targ-buffer); i++)
   1090                 log_info("%d,", expectOffsets[i]);
   1091         }
   1092     }
   1093 
   1094     return TRUE;
   1095 }
   1096 
   1097 
   1098 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
   1099                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
   1100 {
   1101     UErrorCode status = U_ZERO_ERROR;
   1102     UConverter *conv = 0;
   1103     int32_t i=0;
   1104     UChar *p=0;
   1105     const char* src;
   1106     UChar buffer[MAX_LENGTH];
   1107     int32_t offsetBuffer[MAX_LENGTH];
   1108     int32_t *offs=0;
   1109     UChar *targ;
   1110     UChar *targetLimit;
   1111     uint8_t *sourceLimit=0;
   1112 
   1113 
   1114 
   1115     conv = ucnv_open(codepage, &status);
   1116     if(U_FAILURE(status))
   1117     {
   1118         log_data_err("Couldn't open converter %s\n",codepage);
   1119         return TRUE;
   1120     }
   1121     log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
   1122 
   1123 
   1124 
   1125     for(i=0; i<MAX_LENGTH; i++){
   1126         buffer[i]=0xFFFE;
   1127         offsetBuffer[i]=-1;
   1128     }
   1129 
   1130     src=(const char *)source;
   1131     sourceLimit=(uint8_t*)(src+(sourceLen));
   1132     targ=buffer;
   1133     targetLimit=targ+MAX_LENGTH;
   1134     offs=offsetBuffer;
   1135 
   1136 
   1137 
   1138     ucnv_toUnicode (conv,
   1139                 &targ,
   1140                 targetLimit,
   1141                 (const char **)&src,
   1142                 (const char *)sourceLimit,
   1143                 expectOffsets ? offs : NULL,
   1144                 doFlush,
   1145                 &status);
   1146 
   1147     ucnv_close(conv);
   1148     if(status != expectedStatus){
   1149           log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
   1150           return FALSE;
   1151     }
   1152     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
   1153         sourceLen, targ-buffer);
   1154 
   1155 
   1156 
   1157 
   1158     log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2);
   1159 
   1160     if (expectOffsets != 0) {
   1161         if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){
   1162 
   1163             log_err("did not get the expected offsets from %s To UNICODE\n", codepage);
   1164             log_info("\nGot : ");
   1165             for(p=buffer;p<targ;p++)
   1166                 log_info("%d, ", offsetBuffer[p-buffer]);
   1167             log_info("\nExpected: ");
   1168             for(i=0; i<(targ-buffer); i++)
   1169                 log_info("%d, ", expectOffsets[i]);
   1170             log_info("\nGot result:");
   1171             for(i=0; i<(targ-buffer); i++)
   1172                 log_info("0x%04X,", buffer[i]);
   1173             log_info("\nFrom Input:");
   1174             for(i=0; i<(src-(const char *)source); i++)
   1175                 log_info("0x%02X,", (unsigned char)source[i]);
   1176             log_info("\n");
   1177         }
   1178     }
   1179     if(memcmp(buffer, expect, expectLen*2)){
   1180         log_err("String does not match. from codePage %s TO Unicode\n", codepage);
   1181         log_info("\nGot:");
   1182         printUSeqErr(buffer, expectLen);
   1183         log_info("\nExpected:");
   1184         printUSeqErr(expect, expectLen);
   1185         return FALSE;
   1186     }
   1187     else {
   1188         log_verbose("Matches!\n");
   1189     }
   1190 
   1191     return TRUE;
   1192 }
   1193 
   1194 
   1195 static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
   1196                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset)
   1197 {
   1198     UErrorCode status = U_ZERO_ERROR;
   1199     UConverter *conv = 0;
   1200     char    junkout[MAX_LENGTH]; /* FIX */
   1201     int32_t    junokout[MAX_LENGTH]; /* FIX */
   1202     char *p;
   1203     const UChar *src;
   1204     char *end;
   1205     char *targ;
   1206     int32_t *offs;
   1207     int i;
   1208     int32_t   realBufferSize;
   1209     char *realBufferEnd;
   1210     const UChar *realSourceEnd;
   1211     const UChar *sourceLimit;
   1212     UBool checkOffsets = TRUE;
   1213     UBool doFlush;
   1214 
   1215     UConverterFromUCallback oldAction = NULL;
   1216     const void* oldContext = NULL;
   1217 
   1218     for(i=0;i<MAX_LENGTH;i++)
   1219         junkout[i] = (char)0xF0;
   1220     for(i=0;i<MAX_LENGTH;i++)
   1221         junokout[i] = 0xFF;
   1222 
   1223     setNuConvTestName(codepage, "FROM");
   1224 
   1225     log_verbose("\n=========  %s\n", gNuConvTestName);
   1226 
   1227     conv = ucnv_open(codepage, &status);
   1228     if(U_FAILURE(status))
   1229     {
   1230         log_data_err("Couldn't open converter %s\n",codepage);
   1231         return TRUE;
   1232     }
   1233 
   1234     log_verbose("Converter opened..\n");
   1235     /*----setting the callback routine----*/
   1236     ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
   1237     if (U_FAILURE(status)) {
   1238         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
   1239     }
   1240     /*------------------------*/
   1241 
   1242     src = source;
   1243     targ = junkout;
   1244     offs = junokout;
   1245 
   1246     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
   1247     realBufferEnd = junkout + realBufferSize;
   1248     realSourceEnd = source + sourceLen;
   1249 
   1250     if ( gOutBufferSize != realBufferSize )
   1251       checkOffsets = FALSE;
   1252 
   1253     if( gInBufferSize != MAX_LENGTH )
   1254       checkOffsets = FALSE;
   1255 
   1256     do
   1257     {
   1258         end = nct_min(targ + gOutBufferSize, realBufferEnd);
   1259         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
   1260 
   1261         doFlush = (UBool)(sourceLimit == realSourceEnd);
   1262 
   1263         if(targ == realBufferEnd)
   1264           {
   1265         log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
   1266         return FALSE;
   1267           }
   1268         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
   1269 
   1270 
   1271         status = U_ZERO_ERROR;
   1272         if(gInBufferSize ==999 && gOutBufferSize==999)
   1273             doFlush = FALSE;
   1274         ucnv_fromUnicode (conv,
   1275                   (char **)&targ,
   1276                   (const char *)end,
   1277                   &src,
   1278                   sourceLimit,
   1279                   offs,
   1280                   doFlush, /* flush if we're at the end of the input data */
   1281                   &status);
   1282         if(testReset)
   1283             ucnv_resetToUnicode(conv);
   1284         if(gInBufferSize ==999 && gOutBufferSize==999)
   1285             ucnv_resetToUnicode(conv);
   1286 
   1287       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
   1288 
   1289     if(U_FAILURE(status)) {
   1290         log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
   1291         return FALSE;
   1292       }
   1293 
   1294     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
   1295         sourceLen, targ-junkout);
   1296     if(getTestOption(VERBOSITY_OPTION))
   1297     {
   1298         char junk[999];
   1299         char offset_str[999];
   1300         char *ptr;
   1301 
   1302         junk[0] = 0;
   1303         offset_str[0] = 0;
   1304         for(ptr = junkout;ptr<targ;ptr++)
   1305         {
   1306             sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr);
   1307             sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]);
   1308         }
   1309 
   1310         log_verbose(junk);
   1311         printSeq((const unsigned char *)expect, expectLen);
   1312         if ( checkOffsets )
   1313           {
   1314             log_verbose("\nOffsets:");
   1315             log_verbose(offset_str);
   1316           }
   1317         log_verbose("\n");
   1318     }
   1319     ucnv_close(conv);
   1320 
   1321 
   1322     if(expectLen != targ-junkout)
   1323     {
   1324         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
   1325         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
   1326         log_info("\nGot:");
   1327         printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
   1328         log_info("\nExpected:");
   1329         printSeqErr((const unsigned char*)expect, expectLen);
   1330         return FALSE;
   1331     }
   1332 
   1333     if (checkOffsets && (expectOffsets != 0) )
   1334     {
   1335         log_verbose("comparing %d offsets..\n", targ-junkout);
   1336         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
   1337             log_err("did not get the expected offsets. %s", gNuConvTestName);
   1338             log_err("Got  : ");
   1339             printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
   1340             for(p=junkout;p<targ;p++)
   1341                 log_err("%d, ", junokout[p-junkout]);
   1342             log_err("\nExpected: ");
   1343             for(i=0; i<(targ-junkout); i++)
   1344                 log_err("%d,", expectOffsets[i]);
   1345         }
   1346     }
   1347 
   1348     log_verbose("comparing..\n");
   1349     if(!memcmp(junkout, expect, expectLen))
   1350     {
   1351         log_verbose("Matches!\n");
   1352         return TRUE;
   1353     }
   1354     else
   1355     {
   1356         log_err("String does not match. %s\n", gNuConvTestName);
   1357         printUSeqErr(source, sourceLen);
   1358         log_info("\nGot:");
   1359         printSeqErr((const unsigned char *)junkout, expectLen);
   1360         log_info("\nExpected:");
   1361         printSeqErr((const unsigned char *)expect, expectLen);
   1362 
   1363         return FALSE;
   1364     }
   1365 }
   1366 
   1367 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
   1368                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset)
   1369 {
   1370     UErrorCode status = U_ZERO_ERROR;
   1371     UConverter *conv = 0;
   1372     UChar    junkout[MAX_LENGTH]; /* FIX */
   1373     int32_t    junokout[MAX_LENGTH]; /* FIX */
   1374     const char *src;
   1375     const char *realSourceEnd;
   1376     const char *srcLimit;
   1377     UChar *p;
   1378     UChar *targ;
   1379     UChar *end;
   1380     int32_t *offs;
   1381     int i;
   1382     UBool   checkOffsets = TRUE;
   1383     int32_t   realBufferSize;
   1384     UChar *realBufferEnd;
   1385     UBool doFlush;
   1386 
   1387     UConverterToUCallback oldAction = NULL;
   1388     const void* oldContext = NULL;
   1389 
   1390 
   1391     for(i=0;i<MAX_LENGTH;i++)
   1392         junkout[i] = 0xFFFE;
   1393 
   1394     for(i=0;i<MAX_LENGTH;i++)
   1395         junokout[i] = -1;
   1396 
   1397     setNuConvTestName(codepage, "TO");
   1398 
   1399     log_verbose("\n=========  %s\n", gNuConvTestName);
   1400 
   1401     conv = ucnv_open(codepage, &status);
   1402     if(U_FAILURE(status))
   1403     {
   1404         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
   1405         return TRUE;
   1406     }
   1407 
   1408     log_verbose("Converter opened..\n");
   1409      /*----setting the callback routine----*/
   1410     ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
   1411     if (U_FAILURE(status)) {
   1412         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
   1413     }
   1414     /*-------------------------------------*/
   1415 
   1416     src = (const char *)source;
   1417     targ = junkout;
   1418     offs = junokout;
   1419 
   1420     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
   1421     realBufferEnd = junkout + realBufferSize;
   1422     realSourceEnd = src + sourcelen;
   1423 
   1424     if ( gOutBufferSize != realBufferSize )
   1425       checkOffsets = FALSE;
   1426 
   1427     if( gInBufferSize != MAX_LENGTH )
   1428       checkOffsets = FALSE;
   1429 
   1430     do
   1431       {
   1432         end = nct_min( targ + gOutBufferSize, realBufferEnd);
   1433         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
   1434 
   1435         if(targ == realBufferEnd)
   1436         {
   1437             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
   1438             return FALSE;
   1439         }
   1440         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
   1441 
   1442         /* oldTarg = targ; */
   1443 
   1444         status = U_ZERO_ERROR;
   1445         doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : FALSE);
   1446 
   1447         ucnv_toUnicode (conv,
   1448                 &targ,
   1449                 end,
   1450                 (const char **)&src,
   1451                 (const char *)srcLimit,
   1452                 offs,
   1453                 doFlush, /* flush if we're at the end of hte source data */
   1454                 &status);
   1455         if(testReset)
   1456             ucnv_resetFromUnicode(conv);
   1457         if(gInBufferSize ==999 && gOutBufferSize==999)
   1458             ucnv_resetToUnicode(conv);
   1459         /*        offs += (targ-oldTarg); */
   1460 
   1461       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
   1462 
   1463     if(U_FAILURE(status))
   1464     {
   1465         log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
   1466         return FALSE;
   1467     }
   1468 
   1469     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
   1470         sourcelen, targ-junkout);
   1471     if(getTestOption(VERBOSITY_OPTION))
   1472     {
   1473         char junk[999];
   1474         char offset_str[999];
   1475 
   1476         UChar *ptr;
   1477 
   1478         junk[0] = 0;
   1479         offset_str[0] = 0;
   1480 
   1481         for(ptr = junkout;ptr<targ;ptr++)
   1482         {
   1483             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
   1484             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
   1485         }
   1486 
   1487         log_verbose(junk);
   1488 
   1489         if ( checkOffsets )
   1490           {
   1491             log_verbose("\nOffsets:");
   1492             log_verbose(offset_str);
   1493           }
   1494         log_verbose("\n");
   1495     }
   1496     ucnv_close(conv);
   1497 
   1498     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
   1499 
   1500     if (checkOffsets && (expectOffsets != 0))
   1501     {
   1502         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
   1503 
   1504             log_err("did not get the expected offsets. %s",gNuConvTestName);
   1505             for(p=junkout;p<targ;p++)
   1506                 log_err("%d, ", junokout[p-junkout]);
   1507             log_err("\nExpected: ");
   1508             for(i=0; i<(targ-junkout); i++)
   1509                 log_err("%d,", expectOffsets[i]);
   1510             log_err("");
   1511             for(i=0; i<(targ-junkout); i++)
   1512                 log_err("%X,", junkout[i]);
   1513             log_err("");
   1514             for(i=0; i<(src-(const char *)source); i++)
   1515                 log_err("%X,", (unsigned char)source[i]);
   1516         }
   1517     }
   1518 
   1519     if(!memcmp(junkout, expect, expectlen*2))
   1520     {
   1521         log_verbose("Matches!\n");
   1522         return TRUE;
   1523     }
   1524     else
   1525     {
   1526         log_err("String does not match. %s\n", gNuConvTestName);
   1527         log_verbose("String does not match. %s\n", gNuConvTestName);
   1528         log_info("\nGot:");
   1529         printUSeq(junkout, expectlen);
   1530         log_info("\nExpected:");
   1531         printUSeq(expect, expectlen);
   1532         return FALSE;
   1533     }
   1534 }
   1535 
   1536 
   1537 static void TestResetBehaviour(void){
   1538 #if !UCONFIG_NO_LEGACY_CONVERSION
   1539     log_verbose("Testing Reset for DBCS and MBCS\n");
   1540     {
   1541         static const UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
   1542         static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
   1543         static const int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
   1544 
   1545 
   1546         static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8};
   1547         static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7};
   1548         static const int32_t offsets1[] =  { 0,2,4,6};
   1549 
   1550         /*DBCS*/
   1551         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1552                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1553             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
   1554         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1555                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1556             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
   1557 
   1558         if(!testConvertToU(expected1, sizeof(expected1),
   1559                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1560                 offsets1, TRUE))
   1561            log_err("ibm-1363 -> did not match.\n");
   1562         /*MBCS*/
   1563         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1564                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1565             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
   1566         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1567                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1568             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
   1569 
   1570         if(!testConvertToU(expected1, sizeof(expected1),
   1571                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1572                 offsets1, TRUE))
   1573            log_err("ibm-1363 -> did not match.\n");
   1574 
   1575     }
   1576 
   1577     log_verbose("Testing Reset for ISO-2022-jp\n");
   1578     {
   1579         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
   1580 
   1581         static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
   1582                                     0x31,0x1A, 0x32};
   1583 
   1584 
   1585         static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
   1586 
   1587 
   1588         static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
   1589         static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
   1590                                     0x31,0x1A, 0x32};
   1591         static const int32_t offsets1[] =  { 3,5,10,11,12};
   1592 
   1593         // iso-2022-jp  android-change
   1594         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1595                 expected, sizeof(expected), "iso-2022-jp",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1596             log_err("u-> not match.\n");
   1597         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1598                 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1599             log_err("u->  not match.\n");
   1600 
   1601         if(!testConvertToU(expected1, sizeof(expected1),
   1602                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1603                 offsets1, TRUE))
   1604            log_err("iso-2022-jp -> did not match.\n");
   1605 
   1606     }
   1607 
   1608     /* BEGIN android-removed */
   1609     /* To save space, Android does not build full ISO-2022-CN tables.
   1610        We skip the tests for ISO-2022-CN. */
   1611     /*
   1612     log_verbose("Testing Reset for ISO-2022-cn\n");
   1613     {
   1614         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
   1615 
   1616         static const uint8_t expected[] = {
   1617                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
   1618                                     0x36, 0x21,
   1619                                     0x0f, 0x31,
   1620                                     0x1A,
   1621                                     0x32
   1622                                     };
   1623 
   1624 
   1625         static const int32_t offsets[] = {
   1626                                     0,    0,    0,    0,    0,    0,    0,
   1627                                     1,    1,
   1628                                     2,    2,
   1629                                     3,
   1630                                     5,  };
   1631 
   1632         UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
   1633         static const uint8_t expected1[] = {
   1634                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
   1635                                     0x36, 0x21,
   1636                                     0x1B, 0x24, 0x29, 0x47, 0x24, 0x22,
   1637                                     0x0f, 0x1A,
   1638                                     0x32
   1639                                     };
   1640         static const int32_t offsets1[] =  { 5,7,13,16,17};
   1641 
   1642         // iso-2022-CN  android-change
   1643         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1644                 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1645             log_err("u-> not match.\n");
   1646         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1647                 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1648             log_err("u-> not match.\n");
   1649 
   1650         if(!testConvertToU(expected1, sizeof(expected1),
   1651                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1652                 offsets1, TRUE))
   1653            log_err("iso-2022-cn -> did not match.\n");
   1654     }
   1655     */
   1656     /* END android-removed */
   1657 
   1658         log_verbose("Testing Reset for ISO-2022-kr\n");
   1659     {
   1660         UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
   1661 
   1662         static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
   1663                                     0x0E, 0x6C, 0x69,
   1664                                     0x0f, 0x1A,
   1665                                     0x0e, 0x6F, 0x4B,
   1666                                     0x0F, 0x31,
   1667                                     0x1A,
   1668                                     0x32 };
   1669 
   1670         static const int32_t offsets[] = {-1, -1, -1, -1,
   1671                               0, 0, 0,
   1672                               1, 1,
   1673                               3, 3, 3,
   1674                               4, 4,
   1675                               5,
   1676                               7,
   1677                             };
   1678         static const UChar    sampleText1[] =   { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032};
   1679 
   1680         static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43,
   1681                                     0x0E, 0x6C, 0x69,
   1682                                     0x0f, 0x41,
   1683                                     0x0e, 0x6F, 0x4B,
   1684                                     0x0F, 0x31,
   1685                                     0x42,
   1686                                     0x32 };
   1687 
   1688         static const int32_t offsets1[] = {
   1689                               5, 8, 10,
   1690                               13, 14, 15
   1691 
   1692                             };
   1693         // iso-2022-kr  android-change
   1694         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1695                 expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1696             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
   1697         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1698                 expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1699             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
   1700         if(!testConvertToU(expected1, sizeof(expected1),
   1701                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1702                 offsets1, TRUE))
   1703            log_err("iso-2022-kr -> did not match.\n");
   1704     }
   1705 
   1706         log_verbose("Testing Reset for HZ\n");
   1707     {
   1708         static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
   1709 
   1710         static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
   1711                                     0x7E, 0x7D, 0x1A,
   1712                                     0x7E, 0x7B, 0x36, 0x21,
   1713                                     0x7E, 0x7D, 0x31,
   1714                                     0x1A,
   1715                                     0x32 };
   1716 
   1717 
   1718         static const int32_t offsets[] = {0,0,0,0,
   1719                              1,1,1,
   1720                              3,3,3,3,
   1721                              4,4,4,
   1722                              5,
   1723                              7,};
   1724         static const UChar    sampleText1[] =   { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032};
   1725 
   1726         static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B,
   1727                                     0x7E, 0x7D, 0x35,
   1728                                     0x7E, 0x7B, 0x36, 0x21,
   1729                                     0x7E, 0x7D, 0x31,
   1730                                     0x41,
   1731                                     0x32 };
   1732 
   1733 
   1734         static const int32_t offsets1[] = {2,6,9,13,14,15
   1735                             };
   1736 
   1737         /*hz*/
   1738         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1739                 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
   1740             log_err("u->  not match.\n");
   1741         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1742                 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1743             log_err("u->  not match.\n");
   1744         if(!testConvertToU(expected1, sizeof(expected1),
   1745                 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
   1746                 offsets1, TRUE))
   1747            log_err("hz -> did not match.\n");
   1748     }
   1749 #endif
   1750 
   1751     /*UTF-8*/
   1752      log_verbose("Testing for UTF8\n");
   1753     {
   1754         static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
   1755         int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
   1756                            0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
   1757                            0x04, 0x06 };
   1758         static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
   1759             0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
   1760 
   1761 
   1762         static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
   1763         /*UTF-8*/
   1764         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1765             expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1766             log_err("u-> UTF8 with offsets and flush true did not match.\n");
   1767         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1768             expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
   1769             log_err("u-> UTF8 with offsets and flush true did not match.\n");
   1770         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1771             expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE))
   1772             log_err("u-> UTF8 with offsets and flush true did not match.\n");
   1773         if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
   1774             expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE))
   1775             log_err("u-> UTF8 with offsets and flush true did not match.\n");
   1776         if(!testConvertToU(expected, sizeof(expected),
   1777             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1778             log_err("UTF8 -> did not match.\n");
   1779         if(!testConvertToU(expected, sizeof(expected),
   1780             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE))
   1781             log_err("UTF8 -> did not match.\n");
   1782         if(!testConvertToU(expected, sizeof(expected),
   1783             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE))
   1784             log_err("UTF8 -> did not match.\n");
   1785         if(!testConvertToU(expected, sizeof(expected),
   1786             sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE))
   1787             log_err("UTF8 -> did not match.\n");
   1788 
   1789     }
   1790 
   1791 }
   1792 
   1793 /* Test that U_TRUNCATED_CHAR_FOUND is set. */
   1794 static void
   1795 doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) {
   1796     UConverter *cnv;
   1797 
   1798     UChar buffer[2];
   1799     UChar *target, *targetLimit;
   1800     const char *source, *sourceLimit;
   1801 
   1802     UErrorCode errorCode;
   1803 
   1804     errorCode=U_ZERO_ERROR;
   1805     cnv=ucnv_open(cnvName, &errorCode);
   1806     if(U_FAILURE(errorCode)) {
   1807         log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode));
   1808         return;
   1809     }
   1810     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
   1811     if(U_FAILURE(errorCode)) {
   1812         log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n",
   1813                     cnvName, u_errorName(errorCode));
   1814         ucnv_close(cnv);
   1815         return;
   1816     }
   1817 
   1818     source=(const char *)bytes;
   1819     sourceLimit=source+length;
   1820     target=buffer;
   1821     targetLimit=buffer+UPRV_LENGTHOF(buffer);
   1822 
   1823     /* 1. input bytes with flush=FALSE, then input nothing with flush=TRUE */
   1824     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &errorCode);
   1825     if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) {
   1826         log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=FALSE: %s, input left %d, output %d\n",
   1827                 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
   1828     }
   1829 
   1830     errorCode=U_ZERO_ERROR;
   1831     source=sourceLimit;
   1832     target=buffer;
   1833     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode);
   1834     if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) {
   1835         log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n",
   1836                 cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer));
   1837     }
   1838 
   1839     /* 2. input bytes with flush=TRUE */
   1840     ucnv_resetToUnicode(cnv);
   1841 
   1842     errorCode=U_ZERO_ERROR;
   1843     source=(const char *)bytes;
   1844     target=buffer;
   1845     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode);
   1846     if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) {
   1847         log_err("error TestTruncated(%s, 2): input bytes[%d], flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n",
   1848                 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
   1849     }
   1850 
   1851 
   1852     ucnv_close(cnv);
   1853 }
   1854 
   1855 static void
   1856 TestTruncated() {
   1857     static const struct {
   1858         const char *cnvName;
   1859         uint8_t bytes[8]; /* partial input bytes resulting in no output */
   1860         int32_t length;
   1861     } testCases[]={
   1862         { "IMAP-mailbox-name",  { 0x26 }, 1 }, /* & */
   1863         { "IMAP-mailbox-name",  { 0x26, 0x42 }, 2 }, /* &B */
   1864         { "IMAP-mailbox-name",  { 0x26, 0x42, 0x42 }, 3 }, /* &BB */
   1865         { "IMAP-mailbox-name",  { 0x26, 0x41, 0x41 }, 3 }, /* &AA */
   1866 
   1867         { "UTF-7",      { 0x2b, 0x42 }, 2 }, /* +B */
   1868         { "UTF-8",      { 0xd1 }, 1 },
   1869 
   1870         { "UTF-16BE",   { 0x4e }, 1 },
   1871         { "UTF-16LE",   { 0x4e }, 1 },
   1872         { "UTF-16",     { 0x4e }, 1 },
   1873         { "UTF-16",     { 0xff }, 1 },
   1874         { "UTF-16",     { 0xfe, 0xff, 0x4e }, 3 },
   1875 
   1876         { "UTF-32BE",   { 0, 0, 0x4e }, 3 },
   1877         { "UTF-32LE",   { 0x4e }, 1 },
   1878         { "UTF-32",     { 0, 0, 0x4e }, 3 },
   1879         { "UTF-32",     { 0xff }, 1 },
   1880         { "UTF-32",     { 0, 0, 0xfe, 0xff, 0 }, 5 },
   1881         { "SCSU",       { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */
   1882 
   1883 #if !UCONFIG_NO_LEGACY_CONVERSION
   1884         { "BOCU-1",     { 0xd5 }, 1 },
   1885 
   1886         { "Shift-JIS",  { 0xe0 }, 1 },
   1887 
   1888         { "ibm-939",    { 0x0e, 0x41 }, 2 } /* SO 0x41 */
   1889 #else
   1890         { "BOCU-1",     { 0xd5 }, 1 ,}
   1891 #endif
   1892     };
   1893     int32_t i;
   1894 
   1895     for(i=0; i<UPRV_LENGTHOF(testCases); ++i) {
   1896         doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length);
   1897     }
   1898 }
   1899 
   1900 typedef struct NameRange {
   1901     const char *name;
   1902     UChar32 start, end, start2, end2, notStart, notEnd;
   1903 } NameRange;
   1904 
   1905 static void
   1906 TestUnicodeSet() {
   1907     UErrorCode errorCode;
   1908     UConverter *cnv;
   1909     USet *set;
   1910     const char *name;
   1911     int32_t i, count;
   1912 
   1913     static const char *const completeSetNames[]={
   1914         "UTF-7",
   1915         "UTF-8",
   1916         "UTF-16",
   1917         "UTF-16BE",
   1918         "UTF-16LE",
   1919         "UTF-32",
   1920         "UTF-32BE",
   1921         "UTF-32LE",
   1922         "SCSU",
   1923         "BOCU-1",
   1924         "CESU-8",
   1925 #if !UCONFIG_NO_LEGACY_CONVERSION
   1926         "gb18030",
   1927 #endif
   1928         "IMAP-mailbox-name"
   1929     };
   1930 #if !UCONFIG_NO_LEGACY_CONVERSION
   1931     static const char *const lmbcsNames[]={
   1932         "LMBCS-1",
   1933         "LMBCS-2",
   1934         "LMBCS-3",
   1935         "LMBCS-4",
   1936         "LMBCS-5",
   1937         "LMBCS-6",
   1938         "LMBCS-8",
   1939         "LMBCS-11",
   1940         "LMBCS-16",
   1941         "LMBCS-17",
   1942         "LMBCS-18",
   1943         "LMBCS-19"
   1944     };
   1945 #endif
   1946 
   1947     static const NameRange nameRanges[]={
   1948         { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
   1949 #if !UCONFIG_NO_LEGACY_CONVERSION
   1950         { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
   1951 #endif
   1952         { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff },
   1953 #if !UCONFIG_NO_LEGACY_CONVERSION
   1954         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff },
   1955         { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff },
   1956         /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */
   1957         { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff }
   1958 #else
   1959         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }
   1960 #endif
   1961     };
   1962 
   1963     /* open an empty set */
   1964     set=uset_open(1, 0);
   1965 
   1966     count=ucnv_countAvailable();
   1967     for(i=0; i<count; ++i) {
   1968         errorCode=U_ZERO_ERROR;
   1969         name=ucnv_getAvailableName(i);
   1970         cnv=ucnv_open(name, &errorCode);
   1971         if(U_FAILURE(errorCode)) {
   1972             log_data_err("error: unable to open converter %s - %s\n",
   1973                     name, u_errorName(errorCode));
   1974             continue;
   1975         }
   1976 
   1977         uset_clear(set);
   1978         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
   1979         if(U_FAILURE(errorCode)) {
   1980             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
   1981                     name, u_errorName(errorCode));
   1982         } else if(uset_size(set)==0) {
   1983             log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name);
   1984         }
   1985 
   1986         ucnv_close(cnv);
   1987     }
   1988 
   1989     /* test converters that are known to convert all of Unicode (except maybe for surrogates) */
   1990     for(i=0; i<UPRV_LENGTHOF(completeSetNames); ++i) {
   1991         errorCode=U_ZERO_ERROR;
   1992         name=completeSetNames[i];
   1993         cnv=ucnv_open(name, &errorCode);
   1994         if(U_FAILURE(errorCode)) {
   1995             log_data_err("error: unable to open converter %s - %s\n",
   1996                     name, u_errorName(errorCode));
   1997             continue;
   1998         }
   1999 
   2000         uset_clear(set);
   2001         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
   2002         if(U_FAILURE(errorCode)) {
   2003             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
   2004                     name, u_errorName(errorCode));
   2005         } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) {
   2006             log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name);
   2007         }
   2008 
   2009         ucnv_close(cnv);
   2010     }
   2011 
   2012 #if !UCONFIG_NO_LEGACY_CONVERSION
   2013     /* test LMBCS variants which convert all of Unicode except for U+F6xx */
   2014     for(i=0; i<UPRV_LENGTHOF(lmbcsNames); ++i) {
   2015         errorCode=U_ZERO_ERROR;
   2016         name=lmbcsNames[i];
   2017         cnv=ucnv_open(name, &errorCode);
   2018         if(U_FAILURE(errorCode)) {
   2019             log_data_err("error: unable to open converter %s - %s\n",
   2020                     name, u_errorName(errorCode));
   2021             continue;
   2022         }
   2023 
   2024         uset_clear(set);
   2025         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
   2026         if(U_FAILURE(errorCode)) {
   2027             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
   2028                     name, u_errorName(errorCode));
   2029         } else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) {
   2030             log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name);
   2031         }
   2032 
   2033         ucnv_close(cnv);
   2034     }
   2035 #endif
   2036 
   2037     /* test specific sets */
   2038     for(i=0; i<UPRV_LENGTHOF(nameRanges); ++i) {
   2039         errorCode=U_ZERO_ERROR;
   2040         name=nameRanges[i].name;
   2041         cnv=ucnv_open(name, &errorCode);
   2042         if(U_FAILURE(errorCode)) {
   2043             log_data_err("error: unable to open converter %s - %s\n",
   2044                          name, u_errorName(errorCode));
   2045             continue;
   2046         }
   2047 
   2048         uset_clear(set);
   2049         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
   2050         if(U_FAILURE(errorCode)) {
   2051             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
   2052                     name, u_errorName(errorCode));
   2053         } else if(
   2054             !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) ||
   2055             (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2))
   2056         ) {
   2057             log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name);
   2058         } else if(nameRanges[i].notStart>=0) {
   2059             /* simulate containsAny() with the C API */
   2060             uset_complement(set);
   2061             if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) {
   2062                 log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name);
   2063             }
   2064         }
   2065 
   2066         ucnv_close(cnv);
   2067     }
   2068 
   2069     errorCode = U_ZERO_ERROR;
   2070     ucnv_getUnicodeSet(NULL, set, UCNV_ROUNDTRIP_SET, &errorCode);
   2071     if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
   2072         log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
   2073     }
   2074     errorCode = U_PARSE_ERROR;
   2075     /* Make sure that it does nothing if an error is passed in. Difficult to proper test for. */
   2076     ucnv_getUnicodeSet(NULL, NULL, UCNV_ROUNDTRIP_SET, &errorCode);
   2077     if (errorCode != U_PARSE_ERROR) {
   2078         log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
   2079     }
   2080 
   2081     uset_close(set);
   2082 }
   2083